In [None]:
import orjson, os
with open("local.settings.json") as f:
    os.environ.update(orjson.loads(f.read())["Values"])

# Database

In [None]:
from sqlalchemy import create_engine
engine = create_engine(os.environ['DATABIND_SQL_KEYSTONE'])
schema = 'keystone'
table_name = 'Audience'
columns = []

In [None]:
from sqlalchemy import MetaData, Table, select
import pandas as pd

# Create a MetaData object
metadata = MetaData()

# Reflect the table
table = Table(table_name, metadata, schema=schema, autoload_with=engine)

# select
columns = ["id", "status"]
selected_columns = [table.c[col] for col in columns]
stmt = select(*selected_columns)

# Execute the query
with engine.connect() as conn:
    result = conn.execute(stmt)
    # Use mappings() to fetch rows as dictionaries
    rows = [row for row in result.mappings()]

audiences = pd.DataFrame(rows)

# Table

In [None]:
import os
import json
from azure.data.tables import TableClient
import pandas as pd

def get_partition_keys_from_connection_string(
    conn_str: str,
    name_filter: str = "orchestrator_esquire_audience"
) -> pd.DataFrame:
    """
    Returns a DataFrame of entities with PartitionKey and Name
    where Name == name_filter. Filtering is applied server-side,
    and results are ordered by CreatedTime ascending (oldest first).

    :param conn_str: Azure Storage connection string
    :param name_filter: The value to match in the 'Name' field
    :return: pandas DataFrame with PartitionKey, Name, RuntimeStatus, CustomStatus, CreatedTime
    """
    # Initialize client
    table_client = TableClient.from_connection_string(conn_str, table_name="productionInstances")

    # Select minimal columns to reduce payload
    select_fields = ["PartitionKey", "RuntimeStatus", "CustomStatus", "CreatedTime"]

    # OData filter: property names are case-sensitive to your schema
    odata_filter = f"Name eq '{name_filter}'"

    # Query entities (pagination handled internally)
    entities = list(table_client.query_entities(select=select_fields, query_filter=odata_filter))

    # Convert to DataFrame
    df = pd.DataFrame(entities)

    # Parse JSON in CustomStatus safely
    if "CustomStatus" in df.columns:
        df["CustomStatus"] = df["CustomStatus"].apply(lambda x: json.loads(x) if isinstance(x, str) else x)

    # Sort by CreatedTime (ascending, oldest first)
    if "CreatedTime" in df.columns:
        df = df.sort_values(by="CreatedTime", ascending=True).reset_index(drop=True)

    return df


# Compare

In [None]:
instances = get_partition_keys_from_connection_string(os.getenv("AzureWebJobsStorage", ""))
instance_ids = set(instances["PartitionKey"].to_list())
audience_ids = set(audiences[audiences["status"]]["id"].to_list())
absent_audiences = instance_ids - audience_ids
display(len(instance_ids & audience_ids))
display(len(absent_audiences))
display(f"{len(absent_audiences) / len(instance_ids):.2%} set to be removed")

# Delete

In [None]:
import os, requests

for id in absent_audiences:
    print(id)
    resp = requests.delete(
        url="https://esquire-auto-audience.azurewebsites.net/runtime/webhooks/durabletask/instances/{id}".format(
            id=id
        ),
        params={
            "taskHub": "production",
            "connection": "Storage",
            "code": os.environ['AZFUNC_MASTER_CODE'],
        },
    )


In [None]:
import os, requests

failed_ids = [
    "cm91qfd6x001gvgynef466kwo", 
    "cm9jyq6s5000f52duspcainyn",
]

for id in failed_ids:
    print(id)
    requests.delete(
        url="https://esquire-auto-audience.azurewebsites.net/runtime/webhooks/durabletask/instances/{id}".format(
            id=id
        ),
        params={
            "taskHub": "production",
            "connection": "Storage",
            "code": os.environ["AZFUNC_MASTER_CODE"],
        },
    )
    requests.post(
        url="https://esquire-auto-audience.azurewebsites.net/api/audiences/{id}".format(
            id=id
        ),
        params={
            "force": 1,
            "code": os.environ["AZFUNC_MASTER_CODE"],
        },
    )