In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import plotly.express as px
import os
import matplotlib.pyplot as plt

In [None]:
MODELS_PATH = Path("/data/toulouse/bicycle/notebooks/experiments/bottleneck/data/models")
PLOTS_PATH = Path("/data/toulouse/bicycle/notebooks/experiments/bottleneck/data/plots")
ANALYSIS_PATH = Path("/data/toulouse/bicycle/notebooks/experiments/bottleneck/data/analysis")
exclude=["test_run_00013", "test_run_00014", "figures"]


In [None]:
# run_id        profile scale compile full dynamic mode     name
parameters= pd.read_csv(ANALYSIS_PATH/"parameters.csv").set_index("run_id").sort_index()

wrapper_keys = ["test_run_00081",
    "test_run_00082",
    "test_run_00084",
    "test_run_00085",
    ]
wrapper_names = ["wrapper_non_compiled",
                 "wrapper_compiled",
                 "wrapper_compiled_with_count",
                 "no_wrapper_non_compiled"]
parameters = parameters.loc[wrapper_keys]
parameters[parameters.isna()] = np.nan

parameters["name"] = wrapper_names
parameters

In [None]:
compiled_key = "test_run_00060"
manual_params = pd.read_csv(ANALYSIS_PATH/"manual_params.csv").set_index("run_id").loc[compiled_key]
parameters.loc[compiled_key] = manual_params
wrapper_keys.append(compiled_key)

In [None]:
parameters

## Summary profiles

In [None]:
training_profiles = dict()
for dir in ANALYSIS_PATH.iterdir():
    if str(dir.name) in exclude:
        continue
    if dir.is_dir() and dir.name in wrapper_keys:
        training_profiles[str(dir.name)] = pd.read_csv(dir.joinpath("training_profile.csv"))


In [None]:
# calculate filtered profiled time
times = list()
for key in wrapper_keys:
    print(key)
    times.append(training_profiles[key]["Time"].sum())
parameters["Profile_Time"] = times

In [None]:
fig = px.bar(parameters.sort_values("Profile_Time").reset_index(),
       x="run_id",
       y="Profile_Time",
       text="name",
       color="compile",
       title="Comparison of profiled time with different .to()-wrapper options"
       )
fig.show()
fig.write_image(ANALYSIS_PATH/"figures"/"Wrapper_model_runtime.pdf", scale=10)

In [None]:
aggregator = {
    "Class": lambda x: x.iloc[0],
    "Function": lambda x: x.iloc[0],
    "Class_Function_etc": lambda x: x.iloc[0],
    "Class_Function": lambda x: x.iloc[0],
    "Summary_index": lambda x: x.iloc[0],
    "filename_lineno(function)": lambda x: x.iloc[0],
    "is_callback": lambda x: x.iloc[0],
    "Call_num": "sum",
    "Primitive_Call_num": "sum",
    "Time": "sum",
    "ncalls": "sum",
    "tot_time": "sum",
    "tot_percall": "mean",
    "cum_time": "sum",
    "cum_percall": "mean",    
}

In [None]:
full_profiles = dict()
for dir in ANALYSIS_PATH.iterdir():
    if str(dir.name) in exclude:
        continue
    if dir.is_dir() and dir.name in wrapper_keys.to_list():
        df = pd.read_csv(dir.joinpath("full_training_profile.csv")).drop(columns = ["Rank"])
        df = df.groupby(["Class_Function_etc"], as_index=False, ).agg(aggregator).reset_index()
        df["in_model"] = df["Class_Function_etc"].apply(lambda x: "model.py" in str(x).casefold())
        df["in_bicycle"] = df["Class_Function_etc"].apply(lambda x: "bicycle" in str(x).casefold())
        df = df.sort_values("filename_lineno(function)").set_index(pd.Index(np.arange(len(df))),drop=True)
        #numericals = ["Call_num","Primitive_Call_num","Time","ncalls","tot_time","tot_percall","cum_time","cum_percall"]
        #df[numericals] = df[numericals]/sum(df[numericals], axis=0)
        full_profiles[str(dir.name)] = df

In [None]:
metrics = ["ncalls","tot_time","tot_percall","cum_time","cum_percall"]
#compiler_params=compiler_params.set_index("run_id")
condition="in_bicycle"
top_df = pd.DataFrame(columns=df.columns.append(parameters.columns))
for metric in metrics:
    for n, df in full_profiles.items():
        data = df.query(condition).sort_values(metric, ignore_index=True, ascending = False).iloc[:10]
        for _, row in data.iterrows():
            top_df.loc[len(top_df)] = np.concatenate([row, parameters.loc[n]], axis=0)

In [None]:
for metric in metrics:
    title = f"Compiled {metric} of functions with {condition}"
    fig = px.bar(top_df.sort_values(metric),
    x = "filename_lineno(function)",
    y = metric,
    color= "name",
#    log_y = True,
    text="Function",
    title=title,
    barmode="group"
    )
    fig.show()