In [1]:
#Imports
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.ticker as ticker
from matplotlib.transforms import ScaledTranslation
import matplotlib.patches as mpatches

In [2]:
# Get the current working directory
cwd = os.path.normpath(os.path.dirname(os.getcwd()))

def load_csv(file_path):
    """
    Function to load a CSV file into a pandas DataFrame.
    """
    full_path = os.path.join(cwd, file_path)
    return pd.read_csv(full_path, index_col=0)

In [3]:
# Define file paths for each CSV file
file_paths = {
    'FL_Grossload': [
        'Federatedlearning/TS_FL_Grossload_Forecasting_Benchmark/wandb/global_bilstm_all_results.csv', 
        'Federatedlearning/TS_FL_Grossload_Forecasting_Benchmark/wandb/global_cnn_all_results.csv',
        'Federatedlearning/TS_FL_Grossload_Forecasting_Benchmark/wandb/global_transformer_all_results.csv',
        'Federatedlearning/TS_FL_Grossload_Forecasting_Moe/wandb/global_soft_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_Grossload_Forecasting_Moe/wandb/global_topk_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_Grossload_Forecasting_Moe/wandb/global_soft_dense_all_results.csv',
        'Federatedlearning/TS_FL_Grossload_Forecasting_Moe/wandb/global_topk_dense_all_results.csv'
    ],
    'LL_Grossload': [
        'Locallearning/TS_LL_Grossload_Forecasting_Benchmark/wandb/bilstm_all_results.csv',
        'Locallearning/TS_LL_Grossload_Forecasting_Benchmark/wandb/cnn_all_results.csv',
        'Locallearning/TS_LL_Grossload_Forecasting_Benchmark/wandb/transformer_all_results.csv',
        'Locallearning/TS_LL_Grossload_Forecasting_Moe/wandb/lstm_soft_moe_all_results.csv',
        'Locallearning/TS_LL_Grossload_Forecasting_Moe/wandb/lstm_topk_moe_all_results.csv',
        'Locallearning/TS_LL_Grossload_Forecasting_Moe/wandb/dense_soft_moe_all_results.csv',
        'Locallearning/TS_LL_Grossload_Forecasting_Moe/wandb/dense_topk_moe_all_results.csv'
    ],
    'FL_Prosumption': [
        'Federatedlearning/TS_FL_Prosumption_Forecasting_Benchmark/wandb/global_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_Prosumption_Forecasting_Benchmark/wandb/global_cnn_all_results.csv',
        'Federatedlearning/TS_FL_Prosumption_Forecasting_Benchmark/wandb/global_transformer_all_results.csv',
        'Federatedlearning/TS_FL_Prosumption_Forecasting_Moe/wandb/global_soft_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_Prosumption_Forecasting_Moe/wandb/global_topk_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_Prosumption_Forecasting_Moe/wandb/global_soft_dense_all_results.csv',
        'Federatedlearning/TS_FL_Prosumption_Forecasting_Moe/wandb/global_topk_dense_all_results.csv'
    ],
    'LL_Prosumption': [
        'Locallearning/TS_LL_Prosumption_Forecasting_Benchmark/wandb/bilstm_all_results.csv',
        'Locallearning/TS_LL_Prosumption_Forecasting_Benchmark/wandb/cnn_all_results.csv',
        'Locallearning/TS_LL_Prosumption_Forecasting_Benchmark/wandb/transformer_all_results.csv',
        'Locallearning/TS_LL_Prosumption_Forecasting_Moe/wandb/lstm_soft_moe_all_results.csv',
        'Locallearning/TS_LL_Prosumption_Forecasting_Moe/wandb/lstm_topk_moe_all_results.csv',
        'Locallearning/TS_LL_Prosumption_Forecasting_Moe/wandb/dense_soft_moe_all_results.csv',
        'Locallearning/TS_LL_Prosumption_Forecasting_Moe/wandb/dense_topk_moe_all_results.csv'
    ],
    'FL_PV': [
        'Federatedlearning/TS_FL_PV_Forecasting_Benchmark/wandb/global_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_PV_Forecasting_Benchmark/wandb/global_cnn_all_results.csv',
        'Federatedlearning/TS_FL_PV_Forecasting_Benchmark/wandb/global_transformer_all_results.csv',
        'Federatedlearning/TS_FL_PV_Forecasting_Moe/wandb/global_soft_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_PV_Forecasting_Moe/wandb/global_topk_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_PV_Forecasting_Moe/wandb/global_soft_dense_all_results.csv',
        'Federatedlearning/TS_FL_PV_Forecasting_Moe/wandb/global_topk_dense_all_results.csv'
    ],
    'LL_PV': [
        'Locallearning/TS_LL_PV_Forecasting_Benchmark/wandb/bilstm_all_results.csv',
        'Locallearning/TS_LL_PV_Forecasting_Benchmark/wandb/cnn_all_results.csv',
        'Locallearning/TS_LL_PV_Forecasting_Benchmark/wandb/transformer_all_results.csv',
        'Locallearning/TS_LL_PV_Forecasting_Moe/wandb/lstm_soft_moe_all_results.csv',
        'Locallearning/TS_LL_PV_Forecasting_Moe/wandb/lstm_topk_moe_all_results.csv',
        'Locallearning/TS_LL_PV_Forecasting_Moe/wandb/dense_soft_moe_all_results.csv',
        'Locallearning/TS_LL_PV_Forecasting_Moe/wandb/dense_topk_moe_all_results.csv'
    ],
    'FL_Totalload': [
        'Federatedlearning/TS_FL_Totalload_Forecasting_Benchmark/wandb/global_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_Totalload_Forecasting_Benchmark/wandb/global_cnn_all_results.csv',
        'Federatedlearning/TS_FL_Totalload_Forecasting_Benchmark/wandb/global_transformer_all_results.csv',
        'Federatedlearning/TS_FL_Totalload_Forecasting_Moe/wandb/global_soft_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_Totalload_Forecasting_Moe/wandb/global_topk_bilstm_all_results.csv',
        'Federatedlearning/TS_FL_Totalload_Forecasting_Moe/wandb/global_soft_dense_all_results.csv',
        'Federatedlearning/TS_FL_Totalload_Forecasting_Moe/wandb/global_topk_dense_all_results.csv'
    ],
    'LL_Totalload': [
        'Locallearning/TS_LL_Totalload_Forecasting_Benchmark/wandb/bilstm_all_results.csv',
        'Locallearning/TS_LL_Totalload_Forecasting_Benchmark/wandb/cnn_all_results.csv',
        'Locallearning/TS_LL_Totalload_Forecasting_Benchmark/wandb/transformer_all_results.csv',
        'Locallearning/TS_LL_Totalload_Forecasting_Moe/wandb/lstm_soft_moe_all_results.csv',
        'Locallearning/TS_LL_Totalload_Forecasting_Moe/wandb/lstm_topk_moe_all_results.csv',
        'Locallearning/TS_LL_Totalload_Forecasting_Moe/wandb/dense_soft_moe_all_results.csv',
        'Locallearning/TS_LL_Totalload_Forecasting_Moe/wandb/dense_topk_moe_all_results.csv'
    ]
}


In [4]:
y = np.loadtxt(f'../../../data/3final_data/Clusters_KMeans10_dtw.csv', delimiter=',').astype(int)
num_clusters = 10
cluster_users = {i: [] for i in range(num_clusters)}

# Iterate through each cluster
for cluster_number in range(num_clusters):
    users_in_cluster = np.where(y == cluster_number)[0] +1
    cluster_users[cluster_number] = users_in_cluster
cluster_users

{0: array([ 7, 14, 18, 22, 23, 25, 29], dtype=int64),
 1: array([6], dtype=int64),
 2: array([ 3,  4,  9, 13, 15, 19, 20, 30], dtype=int64),
 3: array([1], dtype=int64),
 4: array([21], dtype=int64),
 5: array([ 2, 28], dtype=int64),
 6: array([ 5, 10, 11, 12, 24, 26, 27], dtype=int64),
 7: array([8], dtype=int64),
 8: array([17], dtype=int64),
 9: array([16], dtype=int64)}

In [24]:
def get_time_stats_df_clusterwise(df, cluster_users, scenario, cluster_id):
    # Filter DataFrame for users in the cluster
    df_cluster = df[df['user'].isin(cluster_users)]

    # Calculate mean and standard deviation for train_time and avg_time_epoch
    time_stats = df_cluster.groupby('architecture').agg({
        'train_time': ['mean', 'std'],
        'avg_time_epoch': ['mean', 'std']
    }).reset_index()

    # Add model_type and cluster columns
    time_stats['model_type'] = scenario
    time_stats['cluster'] = cluster_id

    return time_stats


# Load and process all DataFrames
cwd = os.path.normpath(os.path.dirname(os.getcwd()))

all_results = []
for cluster_id, users in cluster_users.items():
    user_identifiers = ['user' + str(user_id) for user_id in users]
    for category, paths in file_paths.items():
        learning_type = 'Federated' if 'FL' in category else 'Local'
        for path in paths:
            df = load_csv(cwd +'/'+ path)
            # Extract model type from file path
            model_type = path.split('/')[-1].split('_')[1]
            full_scenario = f"{learning_type}_{model_type}_{category.split('_')[-1]}"
            processed_df = get_time_stats_df_clusterwise(df, user_identifiers, full_scenario, cluster_id)
            all_results.append(processed_df)

# Concatenate all results into one DataFrame
combined_average_metrics = pd.concat(all_results, ignore_index=True)

model_name_mapping = {
    'global_bilstm': 'Lstm',
    'global_cnn': 'Cnn',
    'global_transformer': 'Transformer',
    'global_soft_bilstm': 'Soft_Lstm',
    'global_topk_bilstm': 'Topk_Lstm',
    'global_soft_dense': 'Soft_Dense',
    'global_topk_dense': 'Topk_Dense',
    'bilstm': 'Lstm',
    'cnn': 'Cnn',
    'transformer': 'Transformer',
    'lstm_soft_moe': 'Soft_Lstm',
    'lstm_topk_moe': 'Topk_Lstm',
    'dense_soft_moe': 'Soft_Dense',
    'dense_topk_moe': 'Topk_Dense'
}
combined_average_metrics['model'] = combined_average_metrics['architecture'].map(model_name_mapping)

# Drop the 'architecture' column
combined_average_metrics.drop(columns=['architecture'], inplace=True)

# Split the 'model_type' column into 'learning' and 'data' columns
combined_average_metrics[['learning', 'data']] = combined_average_metrics['model_type'].str.rsplit('_', n=1, expand=True)

# Extract only 'Federated' from the 'learning' column
combined_average_metrics['learning'] = combined_average_metrics['learning'].str.split('_').str[0]

# Now, you can drop the 'model_type' column if it's no longer needed
combined_average_metrics.drop(columns=['model_type'], inplace=True)

agg_results = combined_average_metrics.groupby(['data', 'learning', 'model']).agg(
    train_time_mean=pd.NamedAgg(column=('train_time', 'mean'), aggfunc='mean'),
    train_time_std=pd.NamedAgg(column=('train_time', 'std'), aggfunc='mean'),
    avg_time_epoch_mean=pd.NamedAgg(column=('avg_time_epoch', 'mean'), aggfunc='mean'),
    avg_time_epoch_std=pd.NamedAgg(column=('avg_time_epoch', 'std'), aggfunc='mean')
).reset_index()
agg_results.head(5)

  combined_average_metrics.drop(columns=['architecture'], inplace=True)


Unnamed: 0,data,learning,model,train_time_mean,train_time_std,avg_time_epoch_mean,avg_time_epoch_std
0,Grossload,Federated,Cnn,1.56817,0.014896,1.466463,0.014697
1,Grossload,Federated,Lstm,6.488047,0.645943,6.323969,0.640483
2,Grossload,Federated,Soft_Dense,1.925815,0.018928,1.815167,0.018764
3,Grossload,Federated,Soft_Lstm,4.56565,0.02646,4.413585,0.025721
4,Grossload,Federated,Topk_Dense,2.753776,0.020035,2.601665,0.019648


In [29]:
# Filter the DataFrame for 'PV' data
filtered_results = agg_results[(agg_results['data'] == 'PV')]

# Create a custom order for the models
custom_order = ['Lstm', 'Cnn', 'Transformer', 'Soft_Dense', 'Topk_Dense', 'Soft_Lstm', 'Topk_Lstm']

# Pivot the DataFrame to reshape it
reshaped_results = filtered_results.pivot(index='model', columns='learning', values=[
    'train_time_mean', 'train_time_std', 'avg_time_epoch_mean', 'avg_time_epoch_std'
])

# Rename the columns for clarity
reshaped_results.columns = [f'{metric}_{learning}' for metric, learning in reshaped_results.columns]

# Reorder the rows based on the custom order
reshaped_results = reshaped_results.loc[custom_order]

# Reorder and select the columns
column_order = [
    'train_time_mean_Local', 'train_time_std_Local', 
    'train_time_mean_Federated', 'train_time_std_Federated',
    'avg_time_epoch_mean_Local', 'avg_time_epoch_std_Local', 
    'avg_time_epoch_mean_Federated', 'avg_time_epoch_std_Federated'
]
reshaped_results = reshaped_results[column_order]
reshaped_results = reshaped_results.round(4)

# DataFrame with 'train_time' values
train_time_df = reshaped_results.filter(like='train_time').copy()

# DataFrame with 'avg_time_epoch' values
avg_time_epoch_df = reshaped_results.filter(like='avg_time_epoch').copy()


In [30]:
train_time_df

Unnamed: 0_level_0,train_time_mean_Local,train_time_std_Local,train_time_mean_Federated,train_time_std_Federated
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Lstm,88.8032,36.3722,6.5961,1.0481
Cnn,14.1349,3.9835,1.5511,0.0173
Transformer,520.8173,298.5608,47.2864,1.1556
Soft_Dense,21.6787,6.3153,1.8848,0.0142
Topk_Dense,34.5001,11.3911,2.7265,0.021
Soft_Lstm,72.9347,25.5898,4.535,0.0348
Topk_Lstm,101.1475,34.5978,5.1313,0.5021


In [31]:
avg_time_epoch_df

Unnamed: 0_level_0,avg_time_epoch_mean_Local,avg_time_epoch_std_Local,avg_time_epoch_mean_Federated,avg_time_epoch_std_Federated
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Lstm,2.3219,0.0662,6.433,1.0418
Cnn,0.7288,0.019,1.4507,0.0158
Transformer,14.865,3.0132,45.07,1.1025
Soft_Dense,0.7463,0.0124,1.775,0.0157
Topk_Dense,1.2291,0.0224,2.5754,0.0205
Soft_Lstm,2.3708,0.1442,4.3853,0.0334
Topk_Lstm,3.1749,0.0781,4.9194,0.504
