In [1]:
import json
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

result_dir = "/Users/oruebel/Devel/nwb/nwb_benchmarks/results/"
result_filename = os.path.join(
    result_dir, 
    "results_timestamp-2024-04-24-13-52-49_machine-f0-2f-4b-15-53-89.dhcp.lbnl.us_environment-4a85d57591fe6e0e05654aa72b456df5dab8ecd5.json"
)

# Define helper functions for parsing the results

In [2]:
def parse_results(result_filename: str):
    """
    Parse the results and machine json file and return as dicts
    """
    with open(file=result_filename, mode="r") as io:
        raw_results = json.load(fp=io)
    machine_filename = os.path.join(result_dir, f"info_machine-{raw_results['machine_hash']}.json")
    with open(file=machine_filename, mode="r") as io:
        raw_machine_info = json.load(fp=io)
    return raw_results, raw_machine_info


def get_network_tracking_tests(raw_results: dict):
    """
    Get a pandas DataFrame with all network tracking result tests. Each row in the dataframe
    represents a particular test configuration in terms of the test parameters used and the
    columns are the names of the test case that was run with those parameters/ 
    """
    network_tracking_results = pd.DataFrame.from_records(
        {k: v for k, v in raw_results['results'].items() if k.startswith('network_tracking')}
    )
    return network_tracking_results

def get_time_remote_tests(raw_results: dict):
    """
    Get a pandas DataFrame with all time_remote result tests. Each row in the dataframe
    represents a particular test configuration in terms of the test parameters used and the
    columns are the names of the test case that was run with those parameters/ 
    """
    time_remote_results = pd.DataFrame.from_records(
        {k: v for k, v in raw_results['results'].items() if k.startswith('time_remote')}
    )
    return time_remote_results 

def get_networktest_row_as_dataframe(results_df: pd.DataFrame, index: int, drop_missing=True):
    """
    From the DataFrame generated by get_network_tracking_testsm get a single row (corresponding
    to a particluar parameter setting) as a DataFrame where each row corrresponds to a particular
    test case that was run with those parameters and the columns are the different metrics that
    were tracked. 
    """
    single_test_case = network_tracking_results.iloc[index]
    if drop_missing:
        single_test_case = single_test_case.dropna()
    single_test_case_df = pd.DataFrame.from_records(single_test_case, index=single_test_case.index)
    return single_test_case_df

# Load the results

In [3]:
raw_results, raw_machine_info = parse_results(result_filename=result_filename)
network_tracking_results = get_network_tracking_tests(raw_results=raw_results)
time_remote_results = get_time_remote_tests(raw_results=raw_results)

# Show the loaded results

In [4]:
display(network_tracking_results)

In [5]:
display(time_remote_results)

# Plot the network tracking reslts for a single parameter setting (i.e., file and slice)

In [6]:
rowindex= 1
single_test_case = get_networktest_row_as_dataframe(
    results_df=network_tracking_results, 
    index=rowindex, 
    drop_missing=True)
test_case_params = network_tracking_results.index[rowindex]

In [7]:
# Simplify the index by removing redundant text
single_test_case.index = [ v.replace("network_tracking_remote_file_reading.", "").replace(".track_network_activity_during_read", "")
                          for v in  single_test_case.index ]

In [8]:
print(test_case_params)
display(single_test_case)

In [9]:
single_test_case.plot.bar(subplots=True, 
                          figsize=(5,15), 
                          title=['' for i in range(len(single_test_case.columns))])
plt.show()

# Plot the timing reslts for a single parameter setting (i.e., file and slice)

In [10]:
# Get the results for a single parameter setting
rowindex = 1
single_time_result = time_remote_results.iloc[rowindex].dropna()
test_case_params = time_remote_results.index[rowindex]

In [11]:
# Simplify the index by removing redundant text
single_time_result.index = [ v.replace("time_remote_file_reading", "")
                            for v in  single_time_result.index ]

In [12]:
temp = pd.DataFrame.from_dict({'max': [np.max(v) for k ,v  in single_time_result.items()],
                        'median': [np.median(v) for k ,v  in single_time_result.items()], 
                        'min': [np.min(v) for k ,v  in single_time_result.items()]})
temp.index = single_time_result.index
temp.plot.bar(title=f"Times for: {test_case_params}")
plt.show()