# Create Performance Monitors Using the Arize GraphQL API

### Step 1: Initialize the GraphQL Client using your developer API Key

In [None]:
!pip install gql[all]
from gql import Client, gql
from gql.transport.requests import RequestsHTTPTransport

### Get your API key
First - make sure you have developer permissions. If you are able to visit the [API explorer](https://app.arize.com/graphql), you have developer permissions, if not, please ask your Account Admin to provide you with access. 

The API key can be retrieved from the [API explorer](https://app.arize.com/graphql) page. Click the button on the top right called "Get Your API Key." A modal will pop up with your key, copy that into the `API_KEY` constant below. 

NOTE: this key is different than the SDK key used to send data to Arize. 

In [None]:
API_KEY = "YOUR_API_KEY"

# Select your transport with a defined url endpoint
transport = RequestsHTTPTransport(
    url="https://app.arize.com/graphql/", headers={"x-api-key": API_KEY}
)

# Create a GraphQL client using the defined transport
client = Client(transport=transport, fetch_schema_from_transport=True)

### Step 2: Execute a GraphQL query to get all your models and monitors

In [None]:
# We start this query from your space. Spaces have globally unique IDs. You can get your spaceId by visiting app.arize.com.
# The url will be in this format: https://app.arize.com/organizations/:orgId/spaces/:spaceId
# NOTE: this is not the same as the space key used to send data using the SDK
SPACE_ID = "YOUR_SPACE_ID"

# A re-usable query for fetching your models, a page at a time
models_query = gql(
    """
    query getModels($spaceId: ID!, $cursor: String) {
        space: node(id: $spaceId) {
            ... on Space {
                name
                models (first: 50, after: $cursor) {
                    pageInfo {
                        endCursor
                    }
                    edges {
                        model: node {
                            id
                            name
                        }
                    }
                }
            }
        }
    }
"""
)

# Base query parameters for fetching models
params = {"spaceId": SPACE_ID}
# An array of models that we will append to
models = []
space_name = ""


# Execute the query on the transport. Continue to pull data until there is no more monitors
while True:
    paged_response = client.execute(models_query, params)
    space_name = paged_response["space"]["name"]
    # Append the monitors to your list
    models.extend(paged_response["space"]["models"]["edges"])
    # If there is another page of information, point the cursor to the next page and fetch more
    end_cursor = paged_response["space"]["models"]["pageInfo"]["endCursor"]
    print("pageInfo end_cursor %s" % (end_cursor))
    if end_cursor:
        print("There is another page of models. Loading more.")
        params["cursor"] = end_cursor
    else:
        # No more models to pull. The list is complete!
        break

print("Retrieved {} models".format(len(models)))

#### Step 2a: Print our some of the models to check that exports are expected

In [None]:
import pandas as pd

# The models have a nested JSON structure, let's flatten it into a data frame
models_df = pd.json_normalize(models, sep=".")
models_df.head()

### Important: Step 3 involves exporting, manipulating, and importing modified data leveraging google sheets (recommended). If you prefer to use a standard .csv instead, consult the following cell. Otherwise, ignore and proceed to step 3.

In [None]:
# only consult this section if using csv, then skip ahead to step 4.
# uncomment the following code to create a monitors.csv file in your current directory,
# modify the csv by adding column headers and rows (consult monitor_columns in step 3a.),
# and convert back to a pandas dataframe

# monitors_csv = monitors_df.to_csv('monitors.csv')
# perf_monitors_df = pd.read_csv('monitors.csv')
# perf_monitors_df.head()

### Step 3: Save models in a spreadsheet to reference for modelId

In [None]:
!pip install gspread
# src: https://colab.research.google.com/notebooks/snippets/sheets.ipynb
from google.colab import auth

auth.authenticate_user()

import gspread
from google.auth import default

creds, _ = default()

gc = gspread.authorize(creds)

sheet_name = f"{space_name} models"
sh = gc.create(sheet_name)

# Open our new sheet and add some data.
worksheet = gc.open(sheet_name).sheet1

# Let's now write the dataframe to google sheets
worksheet.update([models_df.columns.values.tolist()] + models_df.values.tolist())

# print the URL
print(sh.url)

#### 3a. Create a second sheet within the spreadsheet to populate with monitor inputs

We want to create a second sheet for creating performance monitors. Reference the first sheet to populate the modelId column for the creation of performance monitors

In [None]:
# consult the documentation for examples about these fields and their optionality
# optional fields may be left blank in the sheet
monitor_columns = [
    "modelId",
    "name",
    "performanceMetric",
    "operator",
    "threshold",
    "filters",  # set to 1 in cell if there is a filter
    "filters.dimensionType",
    "filters.operator",
    "filters.name",
    "filters.values",
    "evaluationWindowLengthSeconds",
    "contacts",
]

In [None]:
# run this cell only once to create a new worksheet and populate it with column headers
create_worksheet_name = "example_inputs"
sh.add_worksheet(title=create_worksheet_name, rows="100", cols="20")
worksheet = sh.worksheet(create_worksheet_name)
worksheet.update([monitor_columns])

#### Step 3b. Import the data back into a dataframe

In [None]:
worksheet = sh.worksheet(create_worksheet_name)

# convert the spreadsheet rows back into a pandas dataframe
perf_monitors_df = pd.DataFrame(worksheet.get_all_records())
perf_monitors_df.head()

### Step 4: Create performance monitors

Create a performance monitor for each row in the sheet with explicit thresholds. The below example is specifically for creating monitors with one filtered dimension with either an explicit threshold or dynamicAutoThreshold.

Tip: Exclude threshold from the mutation to enable the dynamicAutoThreshold (set above/below 2
standard deviations of the 14-day average of each datapoint). 

Include dynamicAutoThreshold in the mutation input INSTEAD of threshold AND in the worksheet as a column to specify a multiple of the standard deviation to set the threshold.

In [None]:
create_performance_monitor_mutation = gql(
    """
    mutation createPerformanceMonitorMutation(
      $modelId: ID!, 
      $name: String!, 
      $performanceMetric: PerformanceMetric!, 
      $operator: ComparisonOperator!, 
      $threshold: Float, 
      $filters: [DimensionFilterInput!], 
      $evaluationWindowLengthSeconds: Float, 
      $contacts: [MonitorContactInput!]
    ) {
      createPerformanceMonitor(
        input: { 
          modelId: $modelId, 
          name: $name, 
          performanceMetric: $performanceMetric, 
          operator: $operator, 
          threshold: $threshold, 
          filters: $filters, 
          evaluationWindowLengthSeconds: $evaluationWindowLengthSeconds, 
          contacts: $contacts
        }
      ) {
        monitor { id, name, threshold }
      }
    }
"""
)

n = 0

for _, row in perf_monitors_df.iterrows():

    seven_days = 259200  # seconds

    param_keys = [
        "modelId",
        "name",
        "performanceMetric",
        "operator",
        "threshold",
        "evaluationWindowLengthSeconds",
    ]

    params = {key: row[key] for key in param_keys if key in row}

    filter_params = {}
    contact_params = {}

    if row["filters"]:
        filter_params["dimensionType"] = row["filters.dimensionType"]
        filter_params["operator"] = row["filters.operator"]
        if row["filters.name"]:
            filter_params["name"] = row["filters.name"]
        filter_params["values"] = [str(row["filters.values"])]
        params["filters"] = filter_params
    if row["evaluationWindowLengthSeconds"]:
        params["evaluationWindowLengthSeconds"] = row["evaluationWindowLengthSeconds"]
    else:
        params["evaluationWindowLengthSeconds"] = seven_days
    if row["contacts"]:
        contact_params["notificationChannelType"] = "email"
        contact_params["emailAddress"] = row["contacts"]
        params["contacts"] = contact_params

    result = client.execute(create_performance_monitor_mutation, params)
    n += 1
    print(f"{n} created performance monitor with {params}")