# Introdution

Load test process generates a JSON file that contains:
* Some metadata about the test itself.
* The results of the test, in CSV format.

The CSV results contain the following columns:

* **Id**: A unique incremental id for every row in the file.
* **Elapsed**: Load test's time that has elapsed (in millisconds).
* **Total workers**: The number of active workers executing a flow.
* **State**: State is **Success** when the test has been executed successfully, in other case, the state's value is **Failure**
* **Duration**: The time that has been spent to execute the test (in milliseconds).

Finally, data frame (df) is sorted by elapsed time and the Elapsed and Duration fields are mapped to seconds.

In [None]:
results_file_path = None

In [5]:
%pip install pandas matplotlib
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import os.path
import glob
import json
import io

class LoadTestResults:
    def __init__(self, file_path):
        report = json.load(open(file_path))
        df = pd.read_csv(io.StringIO(report["content"]))
        if len(df) == 0:
            raise Exception("input file is empty")
        df['Elapsed in seconds'] = (df['Elapsed'] / 1000).round(0)
        df['Duration in seconds'] = (df['Duration'] / 1000).round(0)

        self.metadata = report["metadata"]
        self.df = df
        self.plot_kwargs = {
            "figsize": (15, 5),
        }
    
    def plot_active_workers_over_time(self):
        df = self.df.groupby(['Elapsed in seconds']).agg({ 'Total workers' : 'mean' }).reset_index()
        df.plot(
            kind = 'line',
            x = 'Elapsed in seconds',
            y = 'Total workers',
            title = 'Number of active workers',
            **self.plot_kwargs,
        )
        plt.show()

    def plot_test_result_over_time(self):
        ax = None
        for state in ['Success', 'Failure']:
            df = self.df.loc[self.df['State'] == state].groupby(['Elapsed in seconds']).agg({ 'Id' : 'count' }).reset_index()
            df = df.rename(columns = {'Id': state})
            this_ax = df.plot(
                kind = 'line',
                x = 'Elapsed in seconds',
                y = state,
                title = 'Test results',
                ax=ax,
                **self.plot_kwargs,
            )
            if ax is None:
                ax = this_ax
        plt.show()

    def plot_flow_duration_over_time(self):
        df = self.df.groupby(['Elapsed in seconds']).agg({ 'Duration' : 'mean' }).reset_index()
        df['Duration in seconds'] = (df['Duration'] / 1000)
        df.plot(
            kind = 'line',
            x = 'Elapsed in seconds',
            y = 'Duration in seconds',
            title = 'Average duration of each flow',
            **self.plot_kwargs,
        )
        plt.show()

    def plot_flows_per_second_over_time(self):
        df = self.df.groupby(['Elapsed in seconds']).agg({ 'Id' : 'count' }).reset_index()
        df = df.rename(columns={'Speed (Test/Second)' : 'Id'})
        df.plot(
            kind = 'line',
            x = 'Elapsed in seconds',
            y = 'Id',
            title = 'Flows/second',
            ylabel = 'Count',
            **self.plot_kwargs,
        )
        plt.show()

    def plot_secrets_per_second_over_time(self):
        df = self.df.copy()
        df['Secrets'] = self.metadata['secret_count']
        title = ''
        kind = self.metadata['kind']
        if kind == 'StoreValues':
            title = 'Stored values/second'
        elif kind == 'RetrieveValue':
            title = 'Retrieved valuess/second'
        elif kind == 'Compute':
            title = 'Compute secrets / second'
            
        df = df.groupby(['Elapsed in seconds']).agg({ 'Secrets' : 'sum' }).reset_index()
        df.plot(
            kind = 'line',
            x = 'Elapsed in seconds',
            y = 'Secrets',
            title = title,
            ylabel = 'Secret count',
            **self.plot_kwargs,
        )
        plt.show()

    def plot_throughput_over_time(self):
        df = self.df.copy()
        df['Kilobytes/s'] = self.metadata['secrets_size'] / 1024
        title = ''
        kind = self.metadata['kind']
        if kind == 'StoreValues' or kind == 'Compute':
            title = 'Storage throughput'
        elif kind == 'RetrieveValue':
            title = 'Retrieval throughput'
            
        df = df.groupby(['Elapsed in seconds']).agg({ 'Kilobytes/s' : 'sum' }).reset_index()
        df.plot(
            kind = 'line',
            x = 'Elapsed in seconds',
            y = 'Kilobytes/s',
            title = title,
            ylabel = 'Throughput (Kilobytes/s)',
            **self.plot_kwargs,
        )
        plt.show()
    
    def plot_flow_duration_histogram(self):
        column = 'Duration'
        for name, group in self.df.groupby(['Total workers']):
            values = group[column]
            group.plot(
                kind ='hist',
                range = (values.min(), values.max()),
                xlabel = 'Operation duration (ms)',
                column = column,
                title = f'Duration for {name[0]} workers',
                **self.plot_kwargs,
            )
        plt.show()
        
if results_file_path is None:
    raise Exception("results_file_path parameter not set")

results = LoadTestResults(results_file_path)

SyntaxError: '[' was never closed (2448118494.py, line 57)

## Workers running throughout the testing process

This shows how the number of workers changed as the test was running.

In [None]:
results.plot_active_workers_over_time()

## Test results

This shows the test resuls split (success/failure) over time.

In [None]:
results.plot_test_result_over_time()

## Average flow duration

This shows the average duration of flows over the duration of the test.

In [None]:
results.plot_flow_duration_over_time()

## Flows per second

This shows the flows per second over the duration of the test.

In [None]:
results.plot_flows_per_second_over_time()

## Secrets per second

This shows the stored / retrieved secrets per second depending on the type of test ran.

In [None]:
results.plot_secrets_per_second_over_time()

## Throughput

This shows the stored / retrieved secrets throughput.

In [None]:
results.plot_throughput_over_time()

## Number of flows grouped by duration

This shows a histogram of the duration of each flow grouped by the number of workers that were active.

In [None]:
results.plot_flow_duration_histogram()