In [None]:
import glob
import json
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np
import ast
import shutil

sns.set_style("white")
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [None]:
# Root directory
root_dir = "../../models/experiments"

# Get all subdirectories
experiment_dirs = glob.glob(f"{root_dir}/*/*/")

# Initialize an empty list to hold all experiment data
experiments_data = []

# Loop over each subdirectory
for exp_dir in experiment_dirs:
    # Config file path
    config_file = f"{exp_dir}hyperparameters.json"

    # If config file doesn't exist, continue to next directory
    if not glob.glob(config_file):
        print("config file does not exist.")
        continue

    # Load config data
    with open(config_file, 'r') as f:
        config_data = json.load(f)
        
    # Result file patterns
    result_file = f"{exp_dir}results/use_case_results.json"

    if not glob.glob(result_file):
        print("result file not found")
        continue

    # Load result data
    with open(result_file, 'r') as f:
        use_case_result_data = json.load(f)

    # Merge config data and result data
    experiment_data = {**config_data, **use_case_result_data}

    # Add folder information
    experiment_data["results_folder"] = '/'.join(exp_dir.split('/')[1:-1])

    # Append to list
    experiments_data.append(experiment_data)

# Convert list of dictionaries to pandas DataFrame
df = pd.DataFrame(experiments_data)


In [None]:
df

In [None]:
df.to_csv('experimental_results.csv', index=False)

## Start here 

In [None]:
df = pd.read_csv('experimental_results.csv', index_col=False)

In [None]:
df = df[~((df['method'] == 'loss') & (df['lambda_soc'] != 1))]

In [None]:
# Define the custom order of 'method' column
custom_order = ['data_baseline', 'loss', 'pretrain', 'architecture', 'hybrid', 'residual']

# Convert 'method' column to categorical with custom order
df['method'] = pd.Categorical(df['method'], categories=custom_order, ordered=True)

# Define the mapping for renaming categorical values
mapping = {'data_baseline': 'Data Baseline', 'loss': 'Loss', 'pretrain': 'Initialization', 'architecture': 'Architecture', 'hybrid': 'Hybrid', 'residual': 'Residual'}

# Use the replace() method to rename categorical values
df['method'] = df['method'].replace(mapping)


In [None]:
df['method'].unique()

In [None]:
df.columns

In [None]:
loss_df = df[df['method'] == 'loss']
#loss_df['lambda_soc'] = loss_df['lambda_soc'].astype(str)
grouped_loss = loss_df.groupby(['lambda_soc', 'n_features', 'n_lstm_layers', 'n_epochs'])[['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse']].mean()

In [None]:
df['lambda_soc'].unique().tolist()

In [None]:
# # Filter data for n_features = 4
# filtered_df = df[(df['n_epochs'] == 20) & (df['method'] == 'loss')]

# # Group by 'lambda_soc' and calculate the mean RMSE
# grouped_lambda = filtered_df.groupby('lambda_soc').mean()

# # Find the lambda with the lowest mean RMSE
# best_lambda = grouped_lambda['avg_rmse'].idxmin()

# print(f"The best performing lambda  is: {best_lambda}")

In [None]:
# # Filter data for n_features = 4 (you can adjust this filter as needed)
# filtered_df = df[(df['n_features'] == 1) & (df['method'] == 'residual')]

# # Group by 'n_lstm_layers' and calculate the mean RMSE
# grouped_lstm = filtered_df.groupby('n_lstm_layers').mean()

# # Find the n_lstm_layers value with the lowest mean RMSE
# best_lstm_layers = grouped_lstm['avg_rmse'].idxmin()

# print(f"The best performing n_lstm_layers value is: {best_lstm_layers}")

In [None]:
results_loss = grouped_loss.reset_index().pivot(index=['n_features', 'lambda_soc'], columns=['n_lstm_layers', 'n_epochs'])

In [None]:
from itertools import product

# Define the values for n_features, n_lstm_layers, and n_epochs
n_features_values = [1, 2, 3, 4]
n_lstm_layers_values = [1, 2]
n_epochs_values = [10, 20, 50]

# Generate all combinations of the values
combinations = list(product(n_features_values, n_lstm_layers_values, n_epochs_values))

# Define the RMSE values for use cases and average
method = 'Theory Baseline'
use_case_1_rmse = 4.4
use_case_2_rmse = 13.4
use_case_3_rmse = 15.7
avg_rmse = 11.2  # Average of the three use cases

# Create a list to hold the data for the DataFrame
data = []

# Iterate over each combination and create a row with the same RMSE values
for n_features, n_lstm_layers, n_epochs in combinations:
    data.append([method, n_features, n_lstm_layers, n_epochs, use_case_1_rmse, use_case_2_rmse, use_case_3_rmse, avg_rmse])

# Create the DataFrame
columns = ['method', 'n_features', 'n_lstm_layers', 'n_epochs', 'use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse']
theory_baseline_df = pd.DataFrame(data, columns=columns)

# Display the DataFrame


In [None]:
performance_list = []
for idx, rmse in enumerate(['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse']):

    # Calculate the mean of avg_rmse for each group
    grouped_means = pd.concat([theory_baseline_df, df]).groupby(['method', 'n_features', 'n_lstm_layers', 'n_epochs'])[rmse].mean().reset_index()

    # Function to determine best, second best, and worst performing methods for each group
    def determine_performance(group):
        group = group.sort_values(rmse)  # Sort the group by avg_rmse
        best = group.iloc[0]['method']
        worst = group.iloc[-1]['method']

        second_best = group.iloc[1]['method'] if len(group) > 1 else None  # Check if there is a second best method
        third_best = group.iloc[2]['method'] if len(group) > 2 else None
        fourth_best = group.iloc[3]['method'] if len(group) > 3 else None
        fifth_best = group.iloc[4]['method'] if len(group) > 4 else None
        sixth_best = group.iloc[5]['method'] if len(group) > 5 else None

        return pd.Series({'Best': best, '2nd best': second_best, 'Worst': worst,
                          '3rd best': third_best, '4th best': fourth_best,
                          '5th best': fifth_best, '6th best': sixth_best})

    # Apply the function to each group and reset the index
    performance_df = grouped_means.groupby(['n_features', 'n_lstm_layers', 'n_epochs']).apply(determine_performance).reset_index()

    # Merge output columns into multi-columns of n_features, n_lstm_layers, and n_epochs
    performance_df.columns = pd.MultiIndex.from_tuples([('n_features', ''), ('n_lstm_layers', ''), ('n_epochs', ''),
                                                        ('Best', ''), ('2nd best', ''), ('3rd best', ''),
                                                        ('4th best', ''), ('5th best', ''), ('6th best', ''),
                                                        ('Worst', '')])

    performance_list.append(performance_df)


In [None]:
method_counts_list = []

for performance_df in performance_list:
    # Flatten the 'Best' and 'Worst' columns
    best_flat = performance_df['Best'].dropna()
    worst_flat = performance_df['Worst'].dropna()

    # Count the occurrences of each method in the flattened columns
    best_counts = best_flat.value_counts().astype(int)
    worst_counts = worst_flat.value_counts().astype(int)

    # Create a DataFrame with the method names and their corresponding best and worst counts
    method_counts_df = pd.DataFrame({'Best Count': best_counts, 'Worst Count': worst_counts})
    method_counts_df.index.name = 'Method'

    # Append the counts DataFrame to the list
    method_counts_list.append(method_counts_df)

# Print the list of DataFrames containing best and worst counts for each performance DataFrame
for idx, method_counts_df in enumerate(method_counts_list, 1):
    print(f"Performance DataFrame {idx}:")
    print(method_counts_df)
    print()

In [None]:

best_counts = []
worst_counts = []

for performance_df in performance_list:
    # Flatten the 'Best' and 'Worst' columns
    best_flat = performance_df['Best'].dropna()
    worst_flat = performance_df['Worst'].dropna()

    # Count the occurrences of each method in the flattened columns
    best_counts.append(best_flat.value_counts())
    worst_counts.append(worst_flat.value_counts())

# Concatenate the counts for all DataFrames in 'performance_list'
best_counts_df = pd.concat(best_counts, axis=1).fillna(0).sum(axis=1).astype(int)
worst_counts_df = pd.concat(worst_counts, axis=1).fillna(0).sum(axis=1).astype(int)

# Create a DataFrame with the method names and their corresponding best and worst counts
method_counts_df = pd.DataFrame({'Best Count': best_counts_df, 'Worst Count': worst_counts_df})
method_counts_df.index.name = 'Method'

method_counts_df

In [None]:
performance_list[0] # 0 -3 for Reproduction, ... , AVG

In [None]:
grouped_df = df.groupby(['method', 'n_features', 'n_lstm_layers', 'n_epochs'])[['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse']].mean()
#grouped_df = df.groupby(['method', 'n_features', 'n_lstm_layers', 'n_epochs'])[['avg_rmse']].mean()

In [None]:
grouped_df

In [None]:
results = grouped_df.reset_index().pivot(index=['method', 'n_features'], columns=['n_lstm_layers', 'n_epochs'])
results

In [None]:
#results = grouped_df.reset_index().pivot(index=['method', 'n_features'], columns=['n_lstm_layers', 'n_epochs'])
results = grouped_df.reset_index().pivot(index=['method'], columns=['n_lstm_layers', 'n_epochs', 'n_features'])

In [None]:
results

In [None]:

SMALL_SIZE = 14
MEDIUM_SIZE = 14
BIGGER_SIZE = 20

plt.rc('font', size=SMALL_SIZE)
plt.rc('axes', titlesize=SMALL_SIZE)
plt.rc('axes', labelsize=MEDIUM_SIZE)
plt.rc('xtick', labelsize=SMALL_SIZE)
plt.rc('ytick', labelsize=SMALL_SIZE)
plt.rc('legend', fontsize=12)
plt.rc('figure', titlesize=BIGGER_SIZE)

titles = ['Reproduction', 'Abstraction', 'Generalization']

melted_df = df.melt(id_vars=['method', 'n_features'], value_vars=['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse'],
                    var_name='number_of_features', value_name='rmse')

theory_baseline = [4.4, 13.4, 15.7, 11.2]

for i, feature in enumerate(melted_df['number_of_features'].unique(), 1):
    fig, ax = plt.subplots(figsize=(10, 2.5), dpi=140)
    
    plt.axhline(y=theory_baseline[i-1], color='red', linestyle='--', linewidth=0.6)
    
    if i == 1:
        plt.text(-0.8, theory_baseline[i-1] + 2, "Theory", fontsize=14)
    else:
        plt.text(-0.8, theory_baseline[i-1] - 7 , "Theory", fontsize=14)
    
    if i == 1:
        plt.text(-0.34, 40, "Number of Features:", fontsize=12)
        plt.text(-0.34, 32, "1", fontsize=12)
        plt.text(-0.14, 32, "2", fontsize=12)
        plt.text(0.06, 32, "3", fontsize=12)
        plt.text(0.24, 32, "4", fontsize=12)
        

    ax = sns.boxplot(data=melted_df[melted_df['number_of_features'] == feature], x='method', y='rmse', hue='n_features', fill=None, legend=False, showbox=True, showfliers=False, linewidth=0.7)
    #plt.setp(ax.artists, edgecolor = 'k', facecolor='w')
    plt.setp(ax.lines, color='k')    
    if i != 3:
        plt.gca().set_xticklabels([])
        

    
    plt.text(5.4, 45, titles[i-1], fontsize=14, horizontalalignment='right')
    plt.xlabel(None)
    plt.ylabel('RMSE')
    plt.ylim(0, 60)
    plt.xlim(-0.9, 5.5)
    plt.tick_params(bottom=True, top=False, left=True, right=False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.tight_layout()
    plt.show()
    #fig.savefig(f'boxplot_results_{i}.pdf', format='pdf')

In [None]:
df[df['method'] == 'Residual'][['use_case_3_rmse', 'results_folder']]

In [None]:
# Group by 'method' and find the minimum RMSE per column
lowest_rmse_per_method = df.groupby('method').min()

# Reset the index to make 'method' a regular column
lowest_rmse_per_method.reset_index(inplace=True)

print("Lowest RMSE per use case and avg_rmse per method:")
lowest_rmse_per_method[['method', 'use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse']]

In [None]:
best = []

for rmse in ['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse']:
    # Calculate the average avg_rmse per method, n_features, n_lstm_layers, and n_epochs
    df_grouped = df.groupby(['method', 'n_features', 'n_lstm_layers', 'n_epochs']).agg({rmse: ['mean', 'std']}).reset_index()
    
    # Sort by mean value
    df_sorted = df_grouped.sort_values((rmse, 'mean'))
    
    # Select the configuration with the lowest mean for each method
    best.append(df_sorted.groupby('method').first().reset_index())

In [None]:
best[0]

## Add std to the table

In [None]:
df[(df['method'] == 'Data Baseline') & (df['n_features'] == 1) & (df['n_lstm_layers'] == 1) & (df['n_epochs'] == 20) ][['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse', 'results_folder']]

In [None]:
df[(df['method'] == 'Hybrid') & (df['n_features'] == 1) & (df['n_lstm_layers'] == 1) & (df['n_epochs'] == 50) ][['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse', 'results_folder']]

In [None]:
df[(df['method'] == 'Residual') & (df['n_features'] == 3) & (df['n_lstm_layers'] == 2) & (df['n_epochs'] == 50) ][['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse', 'results_folder']]

In [None]:
best[3]

In [None]:
best[3].iloc[:, [0, 4, 5]].round(1)

In [None]:
best = []

for rmse in ['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse']:
    filtered_df = df[df['n_features'] == 4]
    # Calculate the average avg_rmse per method, n_features, n_lstm_layers, and n_epochs
    best.append(filtered_df.groupby(['method', 'n_features', 'n_lstm_layers', 'n_epochs']).agg({rmse: ['mean', 'std']}).reset_index())


In [None]:
best[0]

In [None]:
best[1]

In [None]:
best[2]

In [None]:
best[3]

In [None]:
best = []
for rmse in ['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse']:
    # Calculate the average avg_rmse per method, n_features, n_lstm_layers, and n_epochs
    grouped_avg_rmse = df.groupby(['method', 'n_features', 'n_lstm_layers', 'n_epochs'])[rmse].mean().reset_index()

    # Find the best performing (lowest avg_rmse) per method
    best_performing = grouped_avg_rmse.groupby('method').apply(lambda x: x[x[rmse] == x[rmse].min()])
    


    best.append(best_performing)

In [None]:
best[0]

In [None]:
best[1]

In [None]:
best[2]

In [None]:
best[3]

In [None]:
# Concatenate the results and create the final DataFrame
best_df = pd.concat(best).reset_index(drop=True)

# # Rename columns to match the desired table
# best_df = best_df.rename(columns={'use_case_1_rmse': 'best use case 1 rmse',
#                                    'use_case_2_rmse': 'best use case 2 rmse',
#                                    'use_case_3_rmse': 'best use case 3 rmse',
#                                    'avg_rmse': 'best avg_rmse'})

# Select and reorder columns
best_df = best_df[['method', 'use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse']]

print("Best performing RMSE per method:")


In [None]:
best_combinations = best_df.groupby('method')[['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse']].mean()
best_combinations

In [None]:
theory_baseline_row = pd.DataFrame([['Theory Baseline', 4.4, 13.4, 15.7, 11.2]], columns=['method', 'use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse'])
print_result = pd.concat([theory_baseline_row, best_combinations.reset_index()], ignore_index=True).round(1)
print_result

In [None]:
print(print_result.iloc[:, 1:5].style.format('{:.1f}'.format).to_latex(hrules=True, multirow_align='t'))



In [None]:
best_4_features = []
for rmse in ['use_case_1_rmse', 'use_case_2_rmse', 'use_case_3_rmse', 'avg_rmse']:
    filtered_df = df[df['n_features'] == 4]
    # Calculate the average avg_rmse per method, n_features, n_lstm_layers, and n_epochs
    grouped_avg_rmse_4 = filtered_df.groupby(['method', 'n_features', 'n_lstm_layers', 'n_epochs'])[rmse].mean().reset_index()

    # Find the best performing (lowest avg_rmse) per method
    best_performing_4 = grouped_avg_rmse_4.groupby('method').apply(lambda x: x[x[rmse] == x[rmse].min()])
    


    best_4_features.append(best_performing_4)

In [None]:
best_4_features[0]

In [None]:
best_4_features[1]

In [None]:
best_4_features[2]

In [None]:
best_4_features[3]