# Setup

In [None]:
import pandas as pd
import seaborn as sns
import os
import linecache

from run_profile import load_pkl
from viz import plot_grouped_barchart, convert_shortened_bytes_to_int, bytes_to_readable_fmt
import tracemalloc
import warnings
import yappi
warnings.filterwarnings("ignore")
sns.set_style("whitegrid")

# Analyze Overall Results

In [None]:
# files_to_load = ['profiling_4', 'profiling_5', 'profiling_6']

files_to_load = ['profiling_3']

results = []
for file in files_to_load:
    results += load_pkl(f"{os.getcwd()}/profiles/{file}.pkl")


df = pd.DataFrame.from_records(results)

for col in ['total_runtime_seconds', 'setup_runtime_seconds', 'evaluation_runtime_seconds']:
    df[col.replace('_seconds', '_minutes')] = df[col] / 60

# parse docker output into bytes
for col in ['pgvalor_disk_space']:
    split_df = df.loc[:, col].str.split('(', expand=True)
    split_df[1] = split_df[1].replace(to_replace='\)', value="", regex=True).replace(to_replace='virtual ', value="", regex=True)
    split_df = split_df.applymap(convert_shortened_bytes_to_int)
    split_df.columns = [f'{col}_used', f'{col}_virtual']
    split_df[f'{col}_total'] = split_df.sum(axis=1)
    df = pd.concat([df, split_df], axis=1)

for col in ['pgvalor_cpu_util', 'pgvalor_mem_util']:
    df[col] = df[col].str.rstrip('%').astype('float') / 100.0


# combine tracemalloc columns
df['client_tracemalloc_top10'] = df[[i for i in range(10)]].apply(dict, axis=1)
df.drop([i for i in range(10)], axis=1, inplace=True)
df

In [None]:
df.columns

## Total Runtime

In [None]:
plot_grouped_barchart(df=df, x="n_images", y="total_runtime_minutes", hue=None)


In [None]:
plot_grouped_barchart(df=df, x="n_images", y="setup_runtime_minutes", hue=None)


In [None]:
plot_grouped_barchart(df=df, x="n_images", y="evaluation_runtime_minutes", hue=None)


In [None]:
plot_grouped_barchart(df=df, x="n_images", y="total_runtime_minutes", hue='n_annotations')


## Docker Stats

### postgis

In [None]:
plot_grouped_barchart(df=df, x="n_images", y="pgvalor_disk_space_total", hue=None, y_axis_label='total disk space used by postgresql', convert_bytes=True)


In [None]:
# NOTE: mem_util is the memory usage at the time the snapshot was taken, not the overall peak memory usage
plot_grouped_barchart(df=df, x="n_images", y="pgvalor_mem_util", hue=None, y_axis_label='memory used by pgvalor after profiling function call', convert_perc=True)

# Analyze Backend Profiles

## cprofile

Use this command to analyze CProfile reports in snakeviz:

```
snakeviz utils/profiles/create_groundtruths.cprofile
snakeviz utils/profiles/create_predictions.cprofile
snakeviz utils/profiles/create_detection_metrics.cprofile

```

## tracemalloc

### create_groundtruth

In [None]:
function_to_analyze = 'create_groundtruths'

def _print_tracemalloc_peaks(dct:dict):
    size_pct = (dct["second_size"]-dct["first_size"])/dct["first_size"]
    print(f'Original size: {bytes_to_readable_fmt(dct["first_size"], 0)}')
    print(f'Final size: {bytes_to_readable_fmt(dct["second_size"], 0)}')
    print(f'Percent Change: {size_pct:2%}')
    print('')

tracemalloc_path = f'profiles/{function_to_analyze}.tracemalloc'
snapshot = tracemalloc.Snapshot.load(tracemalloc_path)
tracemalloc_dct = load_pkl(tracemalloc_path + '.pkl')


_print_tracemalloc_peaks(tracemalloc_dct)

In [None]:

def _display_top_tracemalloc(snapshot:tracemalloc.Snapshot, key_type:str='lineno', limit:int=10):
    snapshot = snapshot.filter_traces((
        tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
        tracemalloc.Filter(False, "<unknown>"),
        tracemalloc.Filter(True, '*/valor/*')
    ))
    top_stats = snapshot.statistics(key_type)

    print("Top %s lines" % limit)
    for index, stat in enumerate(top_stats[:limit], 1):
        frame = stat.traceback[0]
        print("#%s: %s:%s: %.1f KiB"
              % (index, frame.filename, frame.lineno, stat.size / 1024))
        line = linecache.getline(frame.filename, frame.lineno).strip()
        if line:
            print('    loc: %s:%s' % (frame.filename, frame.lineno))
            print('    func: %s' % line)

    other = top_stats[limit:]
    if other:
        size = sum(stat.size for stat in other)
        print("%s other: %.1f KiB" % (len(other), size / 1024))
    total = sum(stat.size for stat in top_stats)
    print("Total allocated size: %.1f KiB" % (total / 1024))

_display_top_tracemalloc(snapshot=snapshot)

### create_predictions

In [None]:
function_to_analyze = 'create_predictions'

tracemalloc_path = f'profiles/{function_to_analyze}.tracemalloc'
snapshot = tracemalloc.Snapshot.load(tracemalloc_path)
tracemalloc_dct = load_pkl(tracemalloc_path + '.pkl')


_print_tracemalloc_peaks(tracemalloc_dct)

In [None]:
_display_top_tracemalloc(snapshot=snapshot)

### create_detection_metrics

In [None]:
function_to_analyze = 'create_detection_metrics'

tracemalloc_path = f'profiles/{function_to_analyze}.tracemalloc'
snapshot = tracemalloc.Snapshot.load(tracemalloc_path)
tracemalloc_dct = load_pkl(tracemalloc_path + '.pkl')


_print_tracemalloc_peaks(tracemalloc_dct)

In [None]:
_display_top_tracemalloc(snapshot=snapshot)

## yappi

NOTE: This profiler is difficult to visualize without kCacheGrind. Prefer cprofile for now.

In [None]:
stats = yappi.get_func_stats()
stats.add("profiles/create_groundtruths.yappi")

stats.sort("tsub", "desc").print_all()

In [None]:
stats = yappi.get_func_stats()
stats.add("profiles/create_predictions.yappi")

stats.sort("tsub", "desc").print_all()

In [None]:
stats = yappi.get_func_stats()
stats.add("profiles/create_detection_metrics.yappi")

stats.sort("tsub", "desc").print_all()