# Performance Analysis on CanDIGv2 Playwright API

## Import packages

In [None]:
import pandas as pd
import altair as alt
import zipfile
import os
import shutil

alt.data_transformers.enable("vegafusion")

# Set environment variables

In [6]:
# modify this accordingly to your local environment
path_to_playwright = '/path/to/folder/playwright-runbook'

# Filter matching value
filter_value = "http://localhost:5080/" # for tyk report

## Copy html report from playwright to folder playwright-html

In [7]:
def copy_file(src_file, dest_folder):
    try:
        if not os.path.exists(dest_folder):
            os.makedirs(dest_folder)
        shutil.copy(src_file, dest_folder)
    except Exception as e:
        print(f"Error occurred: {e}")

src_file = f'{path_to_playwright}/playwright-report/index.html'
dest_folder = os.path.join(os.getcwd(), "playwright-html")
copy_file(src_file, dest_folder)

## Unzip report.zip from playwright to playwright-data

In [None]:
def unzip_file(zip_file):
    extract_to = os.path.join(os.getcwd(), "playwright-data")
    os.makedirs(extract_to, exist_ok=True)

    with zipfile.ZipFile(zip_file, "r") as zip_ref:
        zip_ref.extractall(extract_to)

zip_file_path = f'{path_to_playwright}/blob-report/report.zip'
unzip_file(zip_file_path)

## Load data into dataframe

In [9]:
df_original = pd.read_json("./playwright-data/report.jsonl", lines=True)
# Skip the first 4 rows of metadata as not needed
df_without_metadata = df_original.iloc[4:].reset_index(drop=True)
# df_without_metadata.head()

# Get metadata

In [10]:
df_onEnd = df_original.loc[df_original["method"] == "onEnd"]
df_metadata = df_original.loc[df_original["method"] == "onProject"]
# df_metadata.head()

# Get title with testID

In [11]:
df_metadata_perf = pd.json_normalize(
    df_metadata.iloc[1]["params"],
    record_path=["project", "suites", "entries"],
)
df_title_with_id = df_metadata_perf[["testId", "title"]]
# df_title_with_id.head()

## Link duration for each test

In [12]:
df_params = pd.json_normalize(df_without_metadata["params"])

# Filter rows where 'step.title' matches the value
df_filtered_value = df_params[
    df_params["step.title"].str.contains(filter_value, na=False, regex=False)
]
df_start_time_with_id = df_filtered_value[["step.id", "step.startTime", "testId"]]
df_duration = df_params[df_params["step.duration"].notnull()]

# Merge
df_start_time_with_id_and_duration = df_start_time_with_id.merge(
    df_duration[["step.id", "step.duration"]], on="step.id", how="left"
)
df_duration_with_id = df_start_time_with_id_and_duration[
    ["step.id", "step.duration", "testId"]
]
# df_duration_with_id.head()

## Link testId with title for a readable dataset

In [13]:
df_duration_with_id = df_duration_with_id.merge(
    df_title_with_id, on="testId", how="left", suffixes=("", "_y")
)

df_duration_with_title = df_duration_with_id[["title", "step.duration"]]
df_duration_with_title = df_duration_with_title.rename(
    columns={"step.duration": "duration"}
)
# df_duration_with_title.head()

## Basic analysis

In [None]:
# Total time
df_normalized = pd.json_normalize(df_onEnd['params'])
total_time = df_normalized['result.duration'] / 1000
print(f"Total time: {total_time.iloc[0]:.2f} s")

# Define the custom percentiles function
def get_p90(x):
    return x.quantile(0.90)
def get_p95(x):
    return x.quantile(0.95)

# Group by 'title' and calculate the desired statistics
result = (
    df_duration_with_title.groupby("title")["duration"]
    .agg(min="min", max="max", median="median", p90=get_p90, p95=get_p95, count="count")
    .reset_index()
)
result.sort_values(by="median", ascending=True)

## Boxplot

In [None]:
box_plot = (
    alt.Chart(df_duration_with_title, title="API timings")
    .mark_boxplot(size=30)
    .encode(
        y=alt.Y("title:N").title("Endpoints"),
        x=alt.X("duration:Q").title("Duration (ms)"),
        color=alt.Color("title:N").legend(None),
        tooltip=["duration:Q"],
    )
    .properties(width=800, height=400)
)
box_plot.show()

# Histogram

In [None]:
histogram = (
    alt.Chart(df_duration_with_title)
    .mark_bar()
    .encode(
        alt.X("duration:Q", bin=True, title="Duration (ms)"),
        alt.Y("count():Q", title="Count"),
        alt.Color("title:N", legend=None),
        tooltip=[alt.Tooltip("count():Q", title="Count")],
    )
    .facet("title:N", columns=3, title="API timings")
    .resolve_axis(x='independent')
)

histogram.show()

# Timeline

In [17]:
df_with_timestamp = df_start_time_with_id_and_duration.merge(df_title_with_id, on='testId', how='left', suffixes=('', '_y'))
df_timeline = df_with_timestamp[['title','step.duration', 'step.startTime']]
df_timeline = df_timeline.rename(columns={'step.duration': 'duration'})
# df_duration_with_title_with_timestamp

# Line chart

In [None]:
df_timeline['relativeTime'] = (df_timeline['step.startTime'] - df_timeline['step.startTime'].min())
chart = alt.Chart(df_timeline).mark_line(point=True).encode(
    x=alt.X('relativeTime:Q', title='Period'),
    y=alt.Y('duration:Q', title='Response time (ms)'),
    color='title:N',
    tooltip=['title', 'duration', 'relativeTime']
).properties(
    title='Response time over period',
    width=800,
    height=500 
)

chart.display()