# Create Data Quality Monitors Using the Arize GraphQL API

### Step 1: Initialize the GraphQL Client using your developer API Key

In [None]:
!pip install gql[all]

from gql import Client, gql
from gql.transport.requests import RequestsHTTPTransport

### Get your API key
First - make sure you have developer permissions. If you are able to visit the [API explorer](https://app.arize.com/graphql), you have developer permissions, if not, please ask your Account Admin to provide you with access. 

The API key can be retrieved from the [API explorer](https://app.arize.com/graphql) page. Click the button on the top right called "Get Your API Key." A modal will pop up with your key, copy that into the `API_KEY` constant below. 

NOTE: this key is different than the SDK key used to send data to Arize. 

In [None]:
API_KEY = "YOUR_API_KEY"

# Select your transport with a defined url endpoint
transport = RequestsHTTPTransport(
    url="https://app.arize.com/graphql/", headers={"x-api-key": API_KEY}
)

# Create a GraphQL client using the defined transport
client = Client(transport=transport, fetch_schema_from_transport=True)

### Step 2: Execute a GraphQL query to get all your model's features

In [None]:
# We start this query from your model. Models have globally unique IDs. You can get your modelId by visiting app.arize.com.
# The url will be in this format: https://app.arize.com/organizations/:orgId/spaces/:spaceId/models/:modelId

MODEL_ID = "YOUR_MODEL_ID"


# A re-usable query for fetching your model's features, a page at a time
features_query = gql(
    """
    query getFeatures($modelId: ID!, $cursor: String) {
        model: node(id: $modelId) {
            ... on Model {
                name
                modelSchema {
                    features(first: 20, after: $cursor, filter: { exclude:{ dataTypes:EMBEDDING }}) {
                        edges {
                            feature: node {
                                dimension {
                                    name
                                }
                            }
                        }
                        pageInfo {
                            endCursor
                        }
                    }
                }
            }
        }
    }

"""
)

# Base query parameters for fetching features
params = {"modelId": MODEL_ID}
# An array of features that we will append to
features = []
model_name = ""

# Execute the query on the transport. Continue to pull data until there is no more features
while True:
    paged_response = client.execute(features_query, params)
    model_name = paged_response["model"]["name"]
    # Append the monitors to your list
    features.extend(paged_response["model"]["modelSchema"]["features"]["edges"])
    # If there is another page of information, point the cursor to the next page and fetch more
    end_cursor = paged_response["model"]["modelSchema"]["features"]["pageInfo"]["endCursor"]
    print("pageInfo end_cursor %s" % (end_cursor))
    if end_cursor:
        print("There is another page of features. Loading more.")
        params["cursor"] = end_cursor
    else:
        print("No more features to pull. The list is complete!")
        break

print("Retrieved {} features".format(len(features)))

#### Step 2a: Print our some of the features to check that results are what is expected

In [None]:
import pandas as pd

# The features have a nested JSON structure, let's flatten it into a data frame
features_df = pd.json_normalize(features, sep=".")
features_df.head()

### Step 3: Iterate over features and create Data Quality Monitors

In [None]:
# now we can create data quality monitors for all of our model's features
create_data_quality_monitor_query = gql(
    """
        mutation CreateDataQualityMonitor(
            $input: CreateDataQualityMonitorMutationInput!
        ) {
            createDataQualityMonitor(input: $input) {
                monitor {
                    id
                    name
                    monitorCategory
                    dataQualityMetric
                    threshold
                    operator
                }
            }
        }
    """
)

created_monitors = []

for f in features:
    feature = f["feature"]["dimension"]["name"]
    monitor_name = f"{feature} Percent Empty"
    input = {
        "input": {
            "name": monitor_name,
            "modelId": MODEL_ID,
            "dimensionName": feature,
            "dimensionCategory": "featureLabel",
            "dataQualityMetric": "percentEmpty",
            "threshold": 10,
            "operator": "greaterThan",
        }
    }
    print(f"Creating Data Quality Monitor: {monitor_name}")
    create_monitor_response = client.execute(create_data_quality_monitor_query, input)
    created_monitors.append(create_monitor_response["createDataQualityMonitor"]["monitor"])

print("Created {} Data Quality Monitors".format(len(created_monitors)))

monitors_df = pd.json_normalize(created_monitors, sep=".")

### Step 4: Export the resulting monitors to a spreadsheet

In [None]:
!pip install gspread
# src: https://colab.research.google.com/notebooks/snippets/sheets.ipynb
from google.colab import auth

auth.authenticate_user()

import gspread
from google.auth import default

creds, _ = default()

gc = gspread.authorize(creds)

sheet_name = f"{model_name} Data Quality Monitors"
sh = gc.create(sheet_name)

# Open our new sheet and add some data.
worksheet = gc.open(sheet_name).sheet1

# Let's now write the dataframe to google sheets
worksheet.update([monitors_df.columns.values.tolist()] + monitors_df.values.tolist())

# print the URL
print(sh.url)