# Analyze Model Results

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from MemoryAutoScaling import analysis

pd.set_option('display.max_columns', None)

In [None]:
def build_model_result_CDFs(model_results_df, model_name):
    """Builds CDFs of model result statistics for `model_name` from `model_results_df`.
    
    The CDFs are built for each model result statistic across all the traces modeled.
    
    Parameters
    ----------
    model_results_df: pd.DataFrame
        A pandas DataFrame containing the model results for all traces.
    model_name: str
        A string representing the name of the model fit to the traces.
    
    Returns
    -------
    None
    
    """
    fig, axes = plt.subplots(2, 2, figsize=(20, 20))
    colors = ["blue", "black", "green", "red"]
    col_lst = ["test_mase", "under_mase", "prop_under_preds", "max_under_pred"]
    for idx in range(len(colors)):
        col_name = "{0}_{1}".format(col_lst[idx], model_name)
        data_vals = model_results_df[col_name].values
        row = idx // 2
        col = idx % 2
        analysis.plot_cumulative_distribution_function(data_vals, axes[row, col], col_name, colors[idx], "CDF")

In [None]:
def get_model_results(data_dir, model_name):
    """Retrieves the model results for `model_name` from `data_dir`.
    
    Parameters
    ----------
    data_dir: str
        A string representing the directory containing the model results.
    model_name: str
        A string representing the name of the model for which results are retrieved.
    
    Returns
    -------
    pd.DataFrame
        A pandas DataFrame containing the model results.
    
    """
    model_df = pd.read_csv(os.path.join(data_dir, "{}_results.csv".format(model_name)))
    print(model_df.describe())
    build_model_result_CDFs(model_df, model_name)
    return model_df

### Maximum Memory Usage - 3 Period Aggregation

In [None]:
max_mem_3_dir = "/Users/mattb/Desktop/Courses/MemoryAutoScaling/output_data/max_mem_3"

ma_df = get_model_results(max_mem_3_dir, "ma")