For exporting to PDF:
```bash
apt-get install pandoc 
```

Install dependencies:
```bash
pip install bokeh
pip install jupyter_bokeh
```


In [32]:

import pandas as pd
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import re

from numpy import NaN

results = "./full-stack-test/scorer_load_test_1000vus_45m_increased_timeout.csv"
df = pd.read_csv(results, low_memory=False)
if "tracked=true" in df.extra_tags.unique():
    df = df[df["extra_tags"]=="tracked=true"]
df["timestamp"] = pd.to_datetime(df.timestamp, unit='s')

output_notebook()

df

Unnamed: 0,metric_name,timestamp,metric_value,check,error,error_code,expected_response,group,method,name,proto,scenario,service,status,subproto,tls_version,url,extra_tags,metadata
0,data_sent,2024-05-13 20:53:46,0.000000,,,,,::setup,,,,,,,,,,,
1,data_received,2024-05-13 20:53:46,0.000000,,,,,::setup,,,,,,,,,,,
2,iteration_duration,2024-05-13 20:53:46,0.001375,,,,,::setup,,,,,,,,,,,
3,vus,2024-05-13 20:53:46,1000.000000,,,,,,,,,,,,,,,,
4,vus_max,2024-05-13 20:53:46,1000.000000,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2967883,data_sent,2024-05-13 21:39:19,10662.000000,,,,,,,,,default,,,,,,,
2967884,data_received,2024-05-13 21:39:19,3958.000000,,,,,,,,,default,,,,,,,
2967885,data_sent,2024-05-13 21:39:19,0.000000,,,,,::teardown,,,,,,,,,,,
2967886,data_received,2024-05-13 21:39:19,0.000000,,,,,::teardown,,,,,,,,,,,


In [33]:

def extract_url_prefix(url):
    if url is NaN or 'localhost' in url:
        return None
    pattern = r"(.*)(0x[a-fA-F0-9]{40})"
    match = re.match(pattern, url)
    return match.group(1) if match else url

df['normalized_url'] = df['url'].apply(extract_url_prefix)
df['normalized_url']

df = df[df['normalized_url'].notna()]

In [34]:
df.metric_name.unique()

array(['http_reqs', 'http_req_duration', 'http_req_blocked',
       'http_req_connecting', 'http_req_tls_handshaking',
       'http_req_sending', 'http_req_waiting', 'http_req_receiving',
       'http_req_failed'], dtype=object)

In [35]:
df.columns

Index(['metric_name', 'timestamp', 'metric_value', 'check', 'error',
       'error_code', 'expected_response', 'group', 'method', 'name', 'proto',
       'scenario', 'service', 'status', 'subproto', 'tls_version', 'url',
       'extra_tags', 'metadata', 'normalized_url'],
      dtype='object')

In [36]:
df[df.metric_name == "http_reqs"].groupby(by=["status", "normalized_url", "method"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metric_name,timestamp,metric_value,check,error,error_code,expected_response,group,name,proto,scenario,service,subproto,tls_version,url,extra_tags,metadata
status,normalized_url,method,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0.0,https://api.staging.scorer.gitcoin.co/account/nonce,GET,3392,3392,3392,0,3392,3392,3392,0,3392,0,3392,0,0,0,3392,0,0
0.0,https://api.staging.scorer.gitcoin.co/ceramic-cache/score/,GET,9349,9349,9349,0,9349,9349,9349,0,9349,0,9349,0,0,0,9349,0,0
0.0,https://api.staging.scorer.gitcoin.co/ceramic-cache/stamp?address=,GET,3730,3730,3730,0,3730,3730,3730,0,3730,0,3730,0,0,0,3730,0,0
0.0,https://api.staging.scorer.gitcoin.co/ceramic-cache/stamps/bulk,PATCH,9335,9335,9335,0,9335,9335,9335,0,9335,0,9335,0,0,0,9335,0,0
0.0,https://api.staging.scorer.gitcoin.co/ceramic-cache/weights,GET,2874,2874,2874,0,2874,2874,2874,0,2874,0,2874,0,0,0,2874,0,0
0.0,https://api.staging.scorer.gitcoin.co/passport-admin/banners,GET,2523,2523,2523,0,2523,2523,2523,0,2523,0,2523,0,0,0,2523,0,0
200.0,https://api.staging.scorer.gitcoin.co/account/nonce,GET,17447,17447,17447,0,0,0,17447,0,17447,17447,17447,0,0,17447,17447,0,0
200.0,https://api.staging.scorer.gitcoin.co/ceramic-cache/score/,GET,79590,79590,79590,0,0,0,79590,0,79590,79590,79590,0,0,79590,79590,0,0
200.0,https://api.staging.scorer.gitcoin.co/ceramic-cache/stamp?address=,GET,20534,20534,20534,0,0,0,20534,0,20534,20534,20534,0,0,20534,20534,0,0
200.0,https://api.staging.scorer.gitcoin.co/ceramic-cache/stamps/bulk,PATCH,73978,73978,73978,0,0,0,73978,0,73978,73978,73978,0,0,73978,73978,0,0


In [37]:
# df['normalized_url'] = df.url.str.extract(r'(?<=http://localhost:8080)(.*)')
df.columns

Index(['metric_name', 'timestamp', 'metric_value', 'check', 'error',
       'error_code', 'expected_response', 'group', 'method', 'name', 'proto',
       'scenario', 'service', 'status', 'subproto', 'tls_version', 'url',
       'extra_tags', 'metadata', 'normalized_url'],
      dtype='object')

In [38]:
df_duration = df[df.metric_name == "http_req_duration"]

dft = df_duration.set_index("timestamp")

dfts = dft.groupby(by=["normalized_url", "status"]).resample("s").agg({
    "metric_value": ["min", "max", "mean", "count"]
})

dfts.reset_index(inplace=True)
dfts.set_index("timestamp", inplace=True)

colors = [
    "blue",
    "red",
    "green",
    "purple",
    "orange",
    "teal",
    "pink",
    "yellow",
    "cyan",
    "maroon",
    "olive",
    "navy",
    "magenta",
    "brown",
    # "slate gray",
    # "forest green",
    "lavender",
    "coral",
    "turquoise",
    "gold"
]


color = iter(colors)
p = figure(title="Duration (status == 200)", x_axis_label='x', y_axis_label='y', frame_width=1500,
           tools="pan,wheel_zoom,box_zoom,reset,hover,crosshair",
           x_axis_type="datetime")

for idx, ((url, status), group) in enumerate(dfts.groupby(by=["normalized_url", "status"])):
    if int(status) == 200:
        # p.line(group.index, group["metric_value"]["min"], legend_label=f"min - {url}", line_width=2, color=next(color))
        # p.line(group.index, group["metric_value"]["max"], legend_label=f"max - {url}", line_width=2, color=next(color))
        p.line(group.index, group["metric_value"]["mean"], legend_label=f"mean - {url}", line_width=2, color=next(color))


show(p)


In [39]:

df_duration = df[df.metric_name == "http_req_duration"]

dft = df_duration.set_index("timestamp")

dfts = dft.groupby(by=["normalized_url", "status"]).resample("s").agg({
    "metric_value": ["min", "max", "mean", "count"]
})

dfts.reset_index(inplace=True)
dfts.set_index("timestamp", inplace=True)

colors = [
    "blue",
    "red",
    "green",
    "purple",
    "orange",
    "teal",
    "pink",
    "yellow",
    "cyan",
    "maroon",
    "olive",
    "navy",
    "magenta",
    "brown",
    "brown",
    "red",
    "lavender",
    "coral",
    "turquoise",
    "gold",
]


p = figure(title="Req / second (status == 200)", x_axis_label='x', y_axis_label='y', frame_width=1500,
           tools="pan,wheel_zoom,box_zoom,reset,hover,crosshair",
           x_axis_type="datetime")

for idx, ((url, status), group) in enumerate(dfts.groupby(by=["normalized_url", "status"])):
    if int(status) == 200:
        print(idx)
        p.line(group.index, group["metric_value"]["count"], legend_label=f"{int(status)} - {url}", line_width=2, color=colors[idx])


show(p)

1
4
7
10
14
17


In [40]:

from math import pi
from bokeh.models import DatetimeTickFormatter
from collections import defaultdict

df_duration = df[df.metric_name == "http_req_duration"]

dft = df_duration.set_index("timestamp")

# dfts = dft.groupby(by=["url"]).resample("1Min").agg({
#     "metric_value": ["min", "max", "mean", "count"]
# })

dfts = dft.groupby(by=["normalized_url", "status"]).resample("s").agg({
    "metric_value": ["min", "max", "mean", "count"]
})

dfts.reset_index(inplace=True)
dfts.set_index("timestamp", inplace=True)

colors = [
    "blue",
    "red",
    "green",
    "purple",
    "orange",
    "teal",
    "pink",
    "yellow",
    "cyan",
    "maroon",
    "olive",
    "navy",
    "magenta",
    "brown",
    "brown",
    "red",
    "lavender",
    "coral",
    "turquoise",
    "gold",
    "blue",
    "red",
    "green",
    "purple",
    "orange",
    "teal",
    "pink",
    "yellow",
    "cyan",
    "maroon",
    "olive",
    "navy",
    "magenta",
    "brown",
    "brown",
    "red",
    "lavender",
    "coral",
    "turquoise",
    "gold",
]

p = figure(title="Req / second (status != 200)", x_axis_label='x', y_axis_label='y', frame_width=1500,
           tools="pan,wheel_zoom,box_zoom,reset,hover,crosshair",
           x_axis_type="datetime")

p.xaxis.major_label_orientation = pi/4

# Initialize dictionaries for tracking sums per URL
url_200_sums = defaultdict(int)
url_non_200_sums = defaultdict(int)

# Loop through the grouped data and collect sums for each URL
for idx, ((url, status), group) in enumerate(dfts.groupby(by=["normalized_url", "status"])):
    # Sum up counts for each status
    total_count = group["metric_value"]["count"].sum()

    # Were unable to get 200 for auth step so expecting it to return a 400
    if url == 'https://api.staging.scorer.gitcoin.co/ceramic-cache/authenticate' and status == 400:
        url_200_sums[url] += total_count

    
    # Check if the status is 200 or not and add to the appropriate sum
    if int(status) == 200:
        url_200_sums[url] += total_count
    else:
        url_non_200_sums[url] += total_count

# Print summary for each individual URL
for url in url_200_sums.keys() | url_non_200_sums.keys():
    sum_200 = url_200_sums[url]
    sum_non_200 = url_non_200_sums[url]
    total = sum_200 + sum_non_200
    success_rate = (float(sum_200) / total if total > 0 else 0) * 100

    print(f"URL: {url}")
    print(f"   200       {sum_200}")
    print(f"   non-200   {sum_non_200}")
    print(f"   success {success_rate:.2f}%\n")


    # Number of Passports Created
    # For each VU 4 requests are made to /ceramic-cache/stamps/bulk and /ceramic-cache/score/

    if "stamps/bulk" in url:
        print(f"   Passports Created: {sum_200 / 4}\n")

    if "ceramic-cache/score/" in url:
        print(f"   Passports Scored: {sum_200 / 4}\n")



# Optional: If you also want to show the plots
for idx, ((url, status), group) in enumerate(dfts.groupby(by=["normalized_url", "status"])):
    print(idx)
    if int(status) != 200:
        p.line(group.index, group["metric_value"]["count"], legend_label=f"{int(status)} - {url}", line_width=2, color=colors[idx])

show(p)

URL: https://api.staging.scorer.gitcoin.co/ceramic-cache/stamps/bulk
   200       73978
   non-200   20944
   success 77.94%

   Passports Created: 18494.5

URL: https://api.staging.scorer.gitcoin.co/ceramic-cache/stamp?address=
   200       20534
   non-200   3764
   success 84.51%

URL: https://api.staging.scorer.gitcoin.co/ceramic-cache/score/
   200       79590
   non-200   15013
   success 84.13%

   Passports Scored: 19897.5

URL: https://api.staging.scorer.gitcoin.co/account/nonce
   200       17447
   non-200   6852
   success 71.80%

URL: https://api.staging.scorer.gitcoin.co/passport-admin/banners
   200       17871
   non-200   6426
   success 73.55%

URL: https://api.staging.scorer.gitcoin.co/ceramic-cache/weights
   200       21401
   non-200   2898
   success 88.07%

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
