# GraphQL endpoint performance testing notebook

This notebook will help you run some performance testing in your GraphQL artifact. You can use this notebooks to test a single GraphQL endpoint.

What you will need:
1. A GraphQL artifact attached to one or more data sources
2. Filling out the values in the "variables" section below
3. You are good to go!

#### STEP 1: Fill out the variables

In [8]:
'''
This cell defined some important variables for the test setup.
TODO: replace the values here with the correct values for your testing!
'''

# Change this to your GraphQL endpoint!
# To find the correct value, go to your GraphQL artifact in the Fabric portal, and, in the top menu
# ribbon, you will find a "Copy Endpoint" button
graphql_endpoint = '[ENDPOINT]'
# Change this where you want your results file to live
# You can also leave it as default - the code will create the results file for you if it does not exist.
results_file_path = './results_test.csv'

# Change this to the query you wish to run in your GraphQL artifact during the performance experiment!
# You can also leave this value as default if this query would work in your GraphQL artifact
query_gql = """
    query {
} 
  
"""

# Chang ethis to the variables you wish to run with your GraphQL query! 
# You can also leave this value as default (if it makes sense for your query) or use an empty dictionary 
# if you don't with to use variables.
# eg:
# variables = {
# }

variables = {
    "variable_name": "variable_value"
}


#### STEP 2: Run the experiment!

In [9]:
# Importing all libraries needed
from azure.identity import InteractiveBrowserCredential
import requests
import time
import requests
import os
import numpy as np
import pandas as pd

In [None]:
'''
Run this cell to run the performance test!

As soon as you run this cell, you will be prompte to log in into your account. Make sure to log in with an account
that has access to both the GRAPHQL artifact you wish to use AND the DATASOURCE(S) your GraphQL artifact has attached.

The experiment will run for 1 hour and save all the results in the file defined in the variable "results_file_path"
'''

# Setting up the results file
with open(results_file_path,'a+') as fd:
    if os.stat(results_file_path).st_size == 0:
        fd.write("Root Activity ID, Date, Total elapsed time (ms), Overhead (ms), Data Source Query Execution (ms)" + "\n")

# GraphQL setup
app = InteractiveBrowserCredential()
scp = 'https://analysis.windows.net/powerbi/api/user_impersonation'
result = app.get_token(scp)

if not result.token:
    print('Error:', "Could not get access token")

headers = {
    'Authorization': f'Bearer {result.token}',
    'Content-Type': 'application/json'
}

overhead_times = []
db_query_execution_times = []
total_times = []
# Use session to re-use the connections instead of creating a new one for each request
session = requests.Session()

# Issue GraphQL request
try:
    while True:
        # Running the request
        start = time.perf_counter()
        response = session.post(graphql_endpoint, json={'query': query_gql, 'variables': variables}, headers=headers)
        end = time.perf_counter()
        response.raise_for_status()

        # Getting the data
        data = response.json()
        # -- RAID
        root_activity_id = response.headers['x-ms-root-activity-id']
        # -- Date
        date = response.headers['Date'].split(",")[1]
        # -- Performance
        overhead = "N/A"
        db_time = "N/A"
        if('x-ms-latency' in response.headers.values()):
            latency = response.headers['x-ms-latency']
            overhead = latency.split(";")[0]
            overhead_times.append(overhead)

            db_time = latency.split(";")[1]
            db_query_execution_times.append(db_time)

        performance_total = (end - start) * 1000
        total_times.append(performance_total)

        # Saving data to file
        with open(results_file_path,'a') as fd:
            fd.write(root_activity_id + "," + date + "," + str(performance_total) + "," + overhead + "," + db_time + "\n")
        
        # Waiting 2 seconds
        time.sleep(2)
    
except Exception as error:
    print(f"Query failed with error: {error}")

##### See results from latest run!

In [None]:
total_df = pd.DataFrame(data={
    "Average": [np.mean(total_times)],
    "Standart Deviation" : [np.std(total_times)],
    "P90": [np.percentile(total_times, 90)],
    "P95": [np.percentile(total_times, 95)],
    "P99": [np.percentile(total_times, 99)]
})
total_df = total_df.style.set_caption("Total Time Statistics")
display(total_df)

if len(overhead_times) > 0:
    overhead_df = pd.DataFrame(data={
        "Average": [np.mean(overhead_times)],
        "Standart Deviation" : [np.std(overhead_times)],
        "P90": [np.percentile(overhead_times, 90)],
        "P95": [np.percentile(overhead_times, 95)],
        "P99": [np.percentile(overhead_times, 99)]
    })
    overhead_df = overhead_df.style.set_caption("Overhead Time Statistics")
    display(overhead_df)

if len(db_query_execution_times) > 0:
    db_query_execution_df = pd.DataFrame(data={
        "Average": [np.mean(db_query_execution_times)],
        "Standart Deviation" : [np.std(db_query_execution_times)],
        "P90": [np.percentile(db_query_execution_times, 90)],
        "P95": [np.percentile(db_query_execution_times, 95)],
        "P99": [np.percentile(db_query_execution_times, 99)]
    })
    db_query_execution_df = db_query_execution_df.style.set_caption("Datasource Query Execution Time Statistics")
    display(total_df)