# Pipeclean Test Analysis

## Setup

Please run the following cell, enter the path to the performance test results, and click the Run button. Default value matches the `.env.example` file in the root of this repository. If you enter a path with multiple results, this notebook will analyze the most recent set of test results.

In [49]:
import ipywidgets as widgets
from IPython.display import display, HTML
import pandas as pd

from edfi_perf_test_analysis.ui_helpers import log_error, log_info, markdown, log_warning
from os import scandir, path
        

from typing import Optional
def display_df(df: pd.DataFrame, max_rows: Optional[int] = None) -> None:
    
    if df.shape[0] == 0:
        log_warning("No data to display")
        return
    
    display(HTML(
      df.fillna("")
        .style
        .format(precision=2)
        .set_properties(**{'text-align': 'left'})
        .set_table_styles([dict(selector='th', props=[('text-align', 'left')])])
        .to_html(index=False, max_rows=max_rows)
    ))



results_path = widgets.Text(value="../../testResults", description="Results Path:")
run = widgets.Button(
    description="Run", button_style="primary"
)

output: widgets.Output = widgets.Output()
display(results_path, run, output)

def on_run(button) -> None:
    with output:
        output.clear_output()
        file_path = results_path.value
        
        if not path.exists(file_path):
            log_error(f"Directory `{file_path}` does not exist or could not be read.")
            return       
        
        directories = sorted([
            f.path
            for f in scandir(file_path)
            if f.is_dir()
        ])
        
        results_dir: str
        if len(directories) == 0:
            # Assume this directory has the test results
            results_dir = file_path
        else:
            # Sorted oldest to newest, so analyze the _last_ item as newest
            results_dir = directories[-1]
        
        log_info(results_dir)
        
        
        exceptions = pd.read_csv(path.join(results_dir, "pipeclean_exceptions.csv"))
        exceptions.set_index("Message", inplace=True)
        
        failures = pd.read_csv(path.join(results_dir, "pipeclean_failures.csv"))
        failures.set_index("Name", inplace=True)
        
        stats = pd.read_csv(path.join(results_dir, "pipeclean_stats.csv"))
        
        # Remove the "Aggregated" row
        stats.drop(stats.index[stats["Name"] == "Aggregated"], inplace=True)
        
        # Create a new column combining Type and Name, then index on it
        stats["Request"] = stats["Type"] + " " + stats["Name"]                
        stats.set_index("Request", inplace=True)
        
        stats = stats[["Average Response Time", "Request Count", "Failure Count"]]
        stats.rename(columns={"Average Response Time": "Response Time"}, inplace=True)
                
        
        markdown("### Exceptions")
        markdown("First five exceptions")
        display_df(exceptions, 5)

        markdown("### Failures")
        markdown("First five failures")
        display_df(failures, 5)
                
        markdown("### Summary Stats")
        if (stats.shape[1] > 0):
            summary_stats = stats.aggregate(
                {
                    "Request Count": ["sum"],
                    "Failure Count": ["sum"],
                    "Response Time": ["mean", "min", "max"]
                }
            )
            display_df(summary_stats)
            
        markdown("### Ten Worst Average Response Times")
        display_df(stats.sort_values(by=["Response Time"], ascending=False), 10)
        
        markdown("### Ten Best Average Response Times")
        display_df(stats.sort_values(by=["Response Time"], ascending=True), 10)
        

        
run.on_click(on_run)

Text(value='../../testResults', description='Results Path:')

Button(button_style='primary', description='Run', style=ButtonStyle())

Output()

In [53]:
from edfi_perf_test_analysis.ui_helpers import select_dir_and_run

def pipeclean_analysis(results_dir: str) -> None:
    exceptions = pd.read_csv(path.join(results_dir, "pipeclean_exceptions.csv"))
    exceptions.set_index("Message", inplace=True)

    failures = pd.read_csv(path.join(results_dir, "pipeclean_failures.csv"))
    failures.set_index("Name", inplace=True)

    stats = pd.read_csv(path.join(results_dir, "pipeclean_stats.csv"))

    # Remove the "Aggregated" row
    stats.drop(stats.index[stats["Name"] == "Aggregated"], inplace=True)

    # Create a new column combining Type and Name, then index on it
    stats["Request"] = stats["Type"] + " " + stats["Name"]                
    stats.set_index("Request", inplace=True)

    stats = stats[["Average Response Time", "Request Count", "Failure Count"]]
    stats.rename(columns={"Average Response Time": "Response Time"}, inplace=True)


    markdown("### Exceptions")
    markdown("First five exceptions")
    display_df(exceptions, 5)

    markdown("### Failures")
    markdown("First five failures")
    display_df(failures, 5)

    markdown("### Summary Stats")
    if (stats.shape[1] > 0):
        summary_stats = stats.aggregate(
            {
                "Request Count": ["sum"],
                "Failure Count": ["sum"],
                "Response Time": ["mean", "min", "max"]
            }
        )
        display_df(summary_stats)

    markdown("### Ten Worst Average Response Times")
    display_df(stats.sort_values(by=["Response Time"], ascending=False), 10)

    markdown("### Ten Best Average Response Times")
    display_df(stats.sort_values(by=["Response Time"], ascending=True), 10)
    
select_dir_and_run(pipeclean_analysis)

Text(value='../../testResults', description='Results Path:')

Button(button_style='primary', description='Run', style=ButtonStyle())

Output()