In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Activation, Dense, LeakyReLU
from tensorflow.keras.activations import relu, sigmoid, tanh  
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from keras.initializers import HeNormal


import AC_Import_CG_AE_Test as chase_ae_cg_tune
import AD_Import_FG_AE_Tune as chase_ae_fg_tune

2024-05-10 13:42:06.777152: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-10 13:42:06.777190: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-10 13:42:06.778602: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-10 13:42:06.906253: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Data Import and Prep

In [2]:
# Load and concatenate dataframes
wt_f = chase_ae_cg_tune.load_and_concatenate('wt', 'wt')
D132H_f = chase_ae_cg_tune.load_and_concatenate('D132H', 'D132H')
D132H_f.index = range(40000, 40000 + len(D132H_f))

# Preprocessing
X_train_f, X_valid_f, y_train_f, y_valid_f = chase_ae_cg_tune.preprocessing_alt(wt_f, D132H_f)

(64000, 153)
(16000, 153)
(64000, 1)
(16000, 1)


# Autoencoder Models
1. 3 layers (672: ReLU, 672, LeReLU:672, LeReLU:672) LR: 5E-5, BS: 765
2. 6 layers (608: LeReLU, 640:ReLU, 608: ReLU, 256:TanH, 192:ReLU, 480:Sigmoid) LR:1.96048E-5, BS:765
3. 5 layers (672: ReLU, 64: LeReLU, 640: TanH, 480: LeReLU, 128: ReLU) LR:3.07378E-5, BS:459
4. 6 layers (672: LeReLU, 672: ReLU, 576: ReLU, 32: LeReLU, 352: ReLU, 608: ReLU) LR:1E-5, BS:765
5. 6 layers (672: LeReLU, 672: ReLU, 672: ReLU, 32: LeReLU, 32: ReLU, 672: TanH) LR:1E-5, BS:765
6. 5 layers (256: LeReLU, 128: ReLU, 416: ReLU, 256: ReLU, 32:LeReLU) LR:1.22209E-5, BS:765
7. 4 layers (672: LeReLU, 32: LeReLU, 672: LeReLU, 672: LeReLU) LR:1E-5, BS:612
8. 6 layers (672: ReLU, 32: LeReLU, 672: LeReLU, 352: LeReLU, 32: ReLU, 32: LeReLU) LR:1E-5, BS:153
9. 6 layers (672: LeReLU, 672: ReLU, 352: ReLU, 32: LeReLU, 32: ReLU, 224: TanH) LR:1E-5, BS:765

# Trial Analysis:

### Random Seed 1

In [16]:
# Define the base directory where the models' training histories are stored
base_directory = "AE_Testing_RS_1"

# Epochs of interest for comparison
epochs_of_interest = [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]

# Function to load training history and extract validation loss at specified epochs
def get_validation_losses(model_id):
    model_folder = os.path.join(base_directory, f"Model_{model_id}")
    history_path = os.path.join(model_folder, 'Training_History.csv')
    
    # Check if the history file exists
    if os.path.exists(history_path):
        history_data = pd.read_csv(history_path)
        
        # Extract validation losses at the epochs of interest
        validation_losses = {}
        for epoch in epochs_of_interest:
            if epoch - 1 < len(history_data):
                validation_losses[epoch] = history_data.loc[epoch - 1, 'val_loss']
            else:
                validation_losses[epoch] = None
        return validation_losses
    else:
        print(f"No history data found for Model_{model_id}")
        return None

# Load validation losses for all models and rank them
rankings = []
for model_id in range(1, 10):  
    val_losses = get_validation_losses(model_id)
    if val_losses:
        rankings.append((model_id, val_losses))

# Sort and print rankings for each specified epoch
for epoch in epochs_of_interest:
    print(f"Random Seed Ranking at Epoch {epoch}:")
    sorted_rankings = sorted(rankings, key=lambda x: x[1][epoch] if x[1][epoch] is not None else float('inf'))
    for rank, (model_id, losses) in enumerate(sorted_rankings, 1):
        loss_value = losses[epoch]
        if loss_value is not None:
            print(f"  Rank {rank}: Model {model_id} with Validation Loss: {loss_value}")
        else:
            print(f"  Rank {rank}: Model {model_id} - No Data Available")
    print("\n")

Random Seed Ranking at Epoch 200:
  Rank 1: Model 4 with Validation Loss: 0.0009515552665106
  Rank 2: Model 2 with Validation Loss: 0.0009639551280997
  Rank 3: Model 9 with Validation Loss: 0.0009740320965647
  Rank 4: Model 5 with Validation Loss: 0.000990278320387
  Rank 5: Model 1 with Validation Loss: 0.0009984056232497
  Rank 6: Model 3 with Validation Loss: 0.0010074815945699
  Rank 7: Model 7 with Validation Loss: 0.0010449995752424
  Rank 8: Model 8 with Validation Loss: 0.0010664011351764
  Rank 9: Model 6 with Validation Loss: 0.0010801778407767


Random Seed Ranking at Epoch 400:
  Rank 1: Model 4 with Validation Loss: 0.0008682572515681
  Rank 2: Model 9 with Validation Loss: 0.0009103764896281
  Rank 3: Model 1 with Validation Loss: 0.0009271960007026
  Rank 4: Model 2 with Validation Loss: 0.0009314069175161
  Rank 5: Model 3 with Validation Loss: 0.0009323523845523
  Rank 6: Model 5 with Validation Loss: 0.0009427060140296
  Rank 7: Model 7 with Validation Loss: 0.0009

In [17]:
# Define the base directory where the models' training histories are stored
base_directory = "AE_Testing_RS_1"

# Epochs of interest for comparison
epochs_of_interest = [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]

# Function to load training history and extract validation loss at specified epochs
def get_losses(model_id):
    model_folder = os.path.join(base_directory, f"Model_{model_id}")
    history_path = os.path.join(model_folder, 'Training_History.csv')
    
    # Check if the history file exists
    if os.path.exists(history_path):
        history_data = pd.read_csv(history_path)
        
        # Extract validation losses at the epochs of interest
        losses = {}
        for epoch in epochs_of_interest:
            if epoch - 1 < len(history_data):
                losses[epoch] = history_data.loc[epoch - 1, 'loss']
            else:
                losses[epoch] = None
        return losses
    else:
        print(f"No history data found for Model_{model_id}")
        return None

# Load validation losses for all models and rank them
rankings = []
for model_id in range(1, 10):  
    losses = get_losses(model_id)
    if losses:
        rankings.append((model_id, losses))

# Sort and print rankings for each specified epoch
for epoch in epochs_of_interest:
    print(f"Random Seed Ranking at Epoch {epoch}:")
    sorted_rankings = sorted(rankings, key=lambda x: x[1][epoch] if x[1][epoch] is not None else float('inf'))
    for rank, (model_id, losses) in enumerate(sorted_rankings, 1):
        loss_value = losses[epoch]
        if loss_value is not None:
            print(f"  Rank {rank}: Model {model_id} with Loss: {loss_value}")
        else:
            print(f"  Rank {rank}: Model {model_id} - No Data Available")
    print("\n")

Random Seed Ranking at Epoch 200:
  Rank 1: Model 4 with Loss: 0.0009231267031282
  Rank 2: Model 9 with Loss: 0.0009415397653356
  Rank 3: Model 2 with Loss: 0.0009472523233853
  Rank 4: Model 5 with Loss: 0.0009682255913503
  Rank 5: Model 1 with Loss: 0.0009788372553884
  Rank 6: Model 3 with Loss: 0.0009950962848961
  Rank 7: Model 7 with Loss: 0.0010329589713364
  Rank 8: Model 6 with Loss: 0.0010580122470855
  Rank 9: Model 8 with Loss: 0.0010600409004837


Random Seed Ranking at Epoch 400:
  Rank 1: Model 4 with Loss: 0.0008337060571648
  Rank 2: Model 9 with Loss: 0.0008517408859916
  Rank 3: Model 1 with Loss: 0.000896344485227
  Rank 4: Model 2 with Loss: 0.0009131194092333
  Rank 5: Model 3 with Loss: 0.0009193171863444
  Rank 6: Model 5 with Loss: 0.0009473410318605
  Rank 7: Model 7 with Loss: 0.0009581284830346
  Rank 8: Model 6 with Loss: 0.0009661793010309
  Rank 9: Model 8 with Loss: 0.0009707977878861


Random Seed Ranking at Epoch 600:
  Rank 1: Model 4 with Loss: 0.

#### He Weight Initialization

In [18]:
# Define the base directory where the models' training histories are stored
base_directory = "AE_Testing_RS_He"

# Epochs of interest for comparison
epochs_of_interest = [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]

# Function to load training history and extract validation loss at specified epochs
def get_validation_losses(model_id):
    model_folder = os.path.join(base_directory, f"Model_{model_id}")
    history_path = os.path.join(model_folder, 'Training_History.csv')
    
    # Check if the history file exists
    if os.path.exists(history_path):
        history_data = pd.read_csv(history_path)
        
        # Extract validation losses at the epochs of interest
        validation_losses = {}
        for epoch in epochs_of_interest:
            if epoch - 1 < len(history_data):
                validation_losses[epoch] = history_data.loc[epoch - 1, 'val_loss']
            else:
                validation_losses[epoch] = None  # In case the history does not reach the specified epoch
        return validation_losses
    else:
        print(f"No history data found for Model_{model_id}")
        return None

# Load validation losses for all models and rank them
rankings = []
for model_id in range(1, 10):  # Assuming model IDs are 1 through 9
    val_losses = get_validation_losses(model_id)
    if val_losses:
        rankings.append((model_id, val_losses))

# Sort and print rankings for each specified epoch
for epoch in epochs_of_interest:
    print(f"He Weight Initialization Ranking at Epoch {epoch}:")
    sorted_rankings = sorted(rankings, key=lambda x: x[1][epoch] if x[1][epoch] is not None else float('inf'))
    for rank, (model_id, losses) in enumerate(sorted_rankings, 1):
        loss_value = losses[epoch]
        if loss_value is not None:
            print(f"  Rank {rank}: Model {model_id} with Validation Loss: {loss_value}")
        else:
            print(f"  Rank {rank}: Model {model_id} - No Data Available")
    print("\n")

He Weight Initialization Ranking at Epoch 200:
  Rank 1: Model 4 with Validation Loss: 0.0009397438261657
  Rank 2: Model 2 with Validation Loss: 0.0009763558628037
  Rank 3: Model 5 with Validation Loss: 0.0010077318875119
  Rank 4: Model 3 with Validation Loss: 0.0010276496177539
  Rank 5: Model 9 with Validation Loss: 0.0010441904887557
  Rank 6: Model 8 with Validation Loss: 0.001055012922734
  Rank 7: Model 1 with Validation Loss: 0.00106415245682
  Rank 8: Model 7 with Validation Loss: 0.0010659013641998
  Rank 9: Model 6 with Validation Loss: 0.0010865200310945


He Weight Initialization Ranking at Epoch 400:
  Rank 1: Model 4 with Validation Loss: 0.0008798991912044
  Rank 2: Model 2 with Validation Loss: 0.00094357249327
  Rank 3: Model 3 with Validation Loss: 0.0009581069462001
  Rank 4: Model 5 with Validation Loss: 0.0009632827714085
  Rank 5: Model 8 with Validation Loss: 0.0009969380917027
  Rank 6: Model 7 with Validation Loss: 0.0010129819856956
  Rank 7: Model 6 with V

In [19]:
# Define the base directory where the models' training histories are stored
base_directory = "AE_Testing_RS_He"

# Epochs of interest for comparison
epochs_of_interest = [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]

# Function to load training history and extract validation loss at specified epochs
def get_losses(model_id):
    model_folder = os.path.join(base_directory, f"Model_{model_id}")
    history_path = os.path.join(model_folder, 'Training_History.csv')
    
    # Check if the history file exists
    if os.path.exists(history_path):
        history_data = pd.read_csv(history_path)
        
        # Extract validation losses at the epochs of interest
        losses = {}
        for epoch in epochs_of_interest:
            if epoch - 1 < len(history_data):
                losses[epoch] = history_data.loc[epoch - 1, 'loss']
            else:
                losses[epoch] = None
        return losses
    else:
        print(f"No history data found for Model_{model_id}")
        return None

# Load validation losses for all models and rank them
rankings = []
for model_id in range(1, 10):  
    losses = get_losses(model_id)
    if losses:
        rankings.append((model_id, losses))

# Sort and print rankings for each specified epoch
for epoch in epochs_of_interest:
    print(f"He Weight Initialization Ranking at Epoch {epoch}:")
    sorted_rankings = sorted(rankings, key=lambda x: x[1][epoch] if x[1][epoch] is not None else float('inf'))
    for rank, (model_id, losses) in enumerate(sorted_rankings, 1):
        loss_value = losses[epoch]
        if loss_value is not None:
            print(f"  Rank {rank}: Model {model_id} with Loss: {loss_value}")
        else:
            print(f"  Rank {rank}: Model {model_id} - No Data Available")
    print("\n")

He Weight Initialization Ranking at Epoch 200:
  Rank 1: Model 2 with Loss: 0.0008984691812656
  Rank 2: Model 4 with Loss: 0.0009245949331671
  Rank 3: Model 5 with Loss: 0.0009739966480992
  Rank 4: Model 1 with Loss: 0.0009793866192921
  Rank 5: Model 3 with Loss: 0.0010151712922379
  Rank 6: Model 9 with Loss: 0.0010275864042341
  Rank 7: Model 8 with Loss: 0.001036527683027
  Rank 8: Model 7 with Loss: 0.0010586376301944
  Rank 9: Model 6 with Loss: 0.0010794787667691


He Weight Initialization Ranking at Epoch 400:
  Rank 1: Model 4 with Loss: 0.0008342639775946
  Rank 2: Model 2 with Loss: 0.0008412000024691
  Rank 3: Model 5 with Loss: 0.0008857836364768
  Rank 4: Model 1 with Loss: 0.0009265818516723
  Rank 5: Model 3 with Loss: 0.0009342361008748
  Rank 6: Model 8 with Loss: 0.000967348634731
  Rank 7: Model 7 with Loss: 0.0009723188122734
  Rank 8: Model 6 with Loss: 0.0010002328781411
  Rank 9: Model 9 with Loss: 0.0010336514096707


He Weight Initialization Ranking at Epoc

## Model Analysis:

In [8]:
# Base directory paths
base_path_rs_1 = 'AE_Testing_RS_1'
base_path_rs_he = 'AE_Testing_RS_He'

# Color mappings for plotting
colors_rs_1 = ('darkred', 'lightcoral')  # loss, val_loss
colors_rs_he = ('darkblue', 'lightblue')  # loss, val_loss

# Number of models
num_models = 9

# Epoch limit
epoch_limit = 2000

# Create a directory to save plots
output_dir = 'AE_Testing_Overlaid_Plots_0_2000'
os.makedirs(output_dir, exist_ok=True)

# Function to read CSV and return training history data
def get_training_data(file_path):
    df = pd.read_csv(file_path)
    return df.iloc[:epoch_limit]['loss'], df.iloc[:epoch_limit]['val_loss']

# Plotting for each model
for model_num in range(1, num_models + 1):
    model_folder_name = f'Model_{model_num}'
    file_rs_1 = os.path.join(base_path_rs_1, model_folder_name, 'Training_History.csv')
    file_rs_he = os.path.join(base_path_rs_he, model_folder_name, 'Training_History.csv')
    
    # Fetch data for each file
    loss_rs_1, val_loss_rs_1 = get_training_data(file_rs_1)
    loss_rs_he, val_loss_rs_he = get_training_data(file_rs_he)

    # Plot data
    plt.figure(figsize=(10, 6))
    epochs = range(len(loss_rs_1))
    plt.plot(epochs, loss_rs_1, label='RS 1 Loss', color=colors_rs_1[0])
    plt.plot(epochs, val_loss_rs_1, label='RS 1 Val Loss', color=colors_rs_1[1])
    plt.plot(epochs, loss_rs_he, label='He Loss', color=colors_rs_he[0])
    plt.plot(epochs, val_loss_rs_he, label='He Val Loss', color=colors_rs_he[1])

    # Adding plot details
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Loss & Validation Loss Comparison for {model_folder_name}')
    plt.legend()
    plt.grid(True)

    # Save the plot to the output directory
    plt.savefig(os.path.join(output_dir, f'Loss_Comparison_{model_folder_name}.png'))
    plt.close()

In [9]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Base directory paths
base_path_rs_1 = 'AE_Testing_RS_1'
base_path_rs_he = 'AE_Testing_RS_He'

# Color mappings for plotting
colors_rs_1 = ('darkred', 'lightcoral')  # loss, val_loss
colors_rs_he = ('darkblue', 'lightblue')  # loss, val_loss

# Number of models
num_models = 9

# Epoch range to plot
epoch_start = 50
epoch_limit = 2000

# Create a directory to save plots
output_dir = 'AE_Testing_Overlaid_Plots_51_2000'
os.makedirs(output_dir, exist_ok=True)

# Function to read CSV and return training history data
def get_training_data(file_path, start_epoch, max_epochs):
    df = pd.read_csv(file_path)
    # Ensure epochs beyond available data are not accessed
    end_epoch = min(max_epochs, len(df))
    return df.iloc[start_epoch:end_epoch]['loss'], df.iloc[start_epoch:end_epoch]['val_loss']

# Plotting for each model
for model_num in range(1, num_models + 1):
    model_folder_name = f'Model_{model_num}'
    file_rs_1 = os.path.join(base_path_rs_1, model_folder_name, 'Training_History.csv')
    file_rs_he = os.path.join(base_path_rs_he, model_folder_name, 'Training_History.csv')
    
    # Fetch data for each file starting from epoch 50
    loss_rs_1, val_loss_rs_1 = get_training_data(file_rs_1, epoch_start, epoch_limit)
    loss_rs_he, val_loss_rs_he = get_training_data(file_rs_he, epoch_start, epoch_limit)

    # Plot data
    plt.figure(figsize=(10, 6))
    epochs = range(epoch_start, epoch_start + len(loss_rs_1))
    plt.plot(epochs, loss_rs_1, label='RS 1 Loss', color=colors_rs_1[0])
    plt.plot(epochs, val_loss_rs_1, label='RS 1 Val Loss', color=colors_rs_1[1])
    plt.plot(epochs, loss_rs_he, label='He Loss', color=colors_rs_he[0])
    plt.plot(epochs, val_loss_rs_he, label='He Val Loss', color=colors_rs_he[1])

    # Adding plot details
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Loss Comparison for {model_folder_name} (Epochs {epoch_start}-{epoch_limit - 1})')
    plt.legend()
    plt.grid(True)

    # Save the plot to the output directory
    plt.savefig(os.path.join(output_dir, f'Loss_Comparison_{model_folder_name}.png'))
    plt.close()


In [12]:
# Base directory paths
base_path_rs_1 = 'AE_Testing_RS_1'
base_path_rs_he = 'AE_Testing_RS_He'

# Number of models
num_models = 9

# Function to read the entire CSV and return training history data
def get_training_data(file_path):
    return pd.read_csv(file_path)['loss'], pd.read_csv(file_path)['val_loss']

# Function to print minimum loss and validation loss values
def print_min_values(loss, val_loss, label_prefix):
    min_loss = loss.min()
    min_loss_epoch = loss.idxmin()
    min_val_loss = val_loss.min()
    min_val_loss_epoch = val_loss.idxmin()
    print(f"{label_prefix} Minimum Loss: {min_loss:.6f} at epoch {min_loss_epoch}")
    print(f"{label_prefix} Minimum Validation Loss: {min_val_loss:.6f} at epoch {min_val_loss_epoch}")

# Extract and print minimum values for each model
for model_num in range(1, num_models + 1):
    model_folder_name = f'Model_{model_num}'
    file_rs_1 = os.path.join(base_path_rs_1, model_folder_name, 'Training_History.csv')
    file_rs_he = os.path.join(base_path_rs_he, model_folder_name, 'Training_History.csv')
    
    # Fetch full data set for each file
    loss_rs_1, val_loss_rs_1 = get_training_data(file_rs_1)
    loss_rs_he, val_loss_rs_he = get_training_data(file_rs_he)

    # Print minimum values for both RS_1 and RS_He
    print(f"\nResults for {model_folder_name}:")
    print_min_values(loss_rs_1, val_loss_rs_1, "RS_1")
    print_min_values(loss_rs_he, val_loss_rs_he, "He_1")



Results for Model_1:
RS_1 Minimum Loss: 0.000714 at epoch 1988
RS_1 Minimum Validation Loss: 0.000859 at epoch 1833
He_1 Minimum Loss: 0.000750 at epoch 1988
He_1 Minimum Validation Loss: 0.001013 at epoch 887

Results for Model_2:
RS_1 Minimum Loss: 0.000690 at epoch 1971
RS_1 Minimum Validation Loss: 0.000764 at epoch 1978
He_1 Minimum Loss: 0.000638 at epoch 1995
He_1 Minimum Validation Loss: 0.000914 at epoch 1002

Results for Model_3:
RS_1 Minimum Loss: 0.000768 at epoch 1957
RS_1 Minimum Validation Loss: 0.000788 at epoch 1928
He_1 Minimum Loss: 0.000780 at epoch 1970
He_1 Minimum Validation Loss: 0.000813 at epoch 1980

Results for Model_4:
RS_1 Minimum Loss: 0.000671 at epoch 1948
RS_1 Minimum Validation Loss: 0.000717 at epoch 1999
He_1 Minimum Loss: 0.000666 at epoch 1991
He_1 Minimum Validation Loss: 0.000743 at epoch 1995

Results for Model_5:
RS_1 Minimum Loss: 0.000681 at epoch 1974
RS_1 Minimum Validation Loss: 0.000753 at epoch 1974
He_1 Minimum Loss: 0.000675 at epoch

In [14]:
import os
import pandas as pd
import numpy as np

# Base directory paths
base_path_rs_1 = 'AE_Testing_RS_1'
base_path_rs_he = 'AE_Testing_RS_He'

# Number of models
num_models = 9

# Epoch ranges for median calculations
epoch_ranges = [(0, 199), (200, 399), (400, 599), (600, 799),
                (800, 999), (1000, 1199), (1200, 1399), (1400, 1599),
                (1600, 1799), (1800, 1999)]

# Function to read the entire CSV and return training history data
def get_training_data(file_path):
    return pd.read_csv(file_path)

# Function to print median loss and validation loss values
def print_median_values(df, range_start, range_end, label_prefix):
    range_df = df.iloc[range_start:range_end+1]
    median_loss = np.median(range_df['loss'])
    median_val_loss = np.median(range_df['val_loss'])

    # Find indices of the closest values to the median if exact match is not found
    median_loss_epoch = (np.abs(range_df['loss'] - median_loss)).idxmin()
    median_val_loss_epoch = (np.abs(range_df['val_loss'] - median_val_loss)).idxmin()

    print(f"{label_prefix} Range {range_start}-{range_end}: Median Loss {median_loss:.6f} at epoch {median_loss_epoch}, Median Validation Loss {median_val_loss:.6f} at epoch {median_val_loss_epoch}")

# Analyze and print values for each model and range
for model_num in range(1, num_models + 1):
    model_folder_name = f'Model_{model_num}'
    file_rs_1 = os.path.join(base_path_rs_1, model_folder_name, 'Training_History.csv')
    file_rs_he = os.path.join(base_path_rs_he, model_folder_name, 'Training_History.csv')
    
    # Fetch full data set for each file
    data_rs_1 = get_training_data(file_rs_1)
    data_rs_he = get_training_data(file_rs_he)

    # Print median values for both RS_1 and RS_He
    print(f"\nResults for {model_folder_name} (RS_1):")
    for start, end in epoch_ranges:
        print_median_values(data_rs_1, start, end, "RS_1")
    print(f"Results for {model_folder_name} (He_1):")
    for start, end in epoch_ranges:
        print_median_values(data_rs_he, start, end, "He_1")



Results for Model_1 (RS_1):
RS_1 Range 0-199: Median Loss 0.001060 at epoch 99, Median Validation Loss 0.001059 at epoch 99
RS_1 Range 200-399: Median Loss 0.000932 at epoch 301, Median Validation Loss 0.000956 at epoch 296
RS_1 Range 400-599: Median Loss 0.000868 at epoch 501, Median Validation Loss 0.000914 at epoch 493
RS_1 Range 600-799: Median Loss 0.000828 at epoch 697, Median Validation Loss 0.000892 at epoch 739
RS_1 Range 800-999: Median Loss 0.000800 at epoch 893, Median Validation Loss 0.000880 at epoch 900
RS_1 Range 1000-1199: Median Loss 0.000779 at epoch 1098, Median Validation Loss 0.000872 at epoch 1132
RS_1 Range 1200-1399: Median Loss 0.000762 at epoch 1287, Median Validation Loss 0.000868 at epoch 1221
RS_1 Range 1400-1599: Median Loss 0.000746 at epoch 1489, Median Validation Loss 0.000865 at epoch 1463
RS_1 Range 1600-1799: Median Loss 0.000733 at epoch 1704, Median Validation Loss 0.000864 at epoch 1733
RS_1 Range 1800-1999: Median Loss 0.000720 at epoch 1897, M

In [15]:
import os
import pandas as pd
import numpy as np

# Base directory paths
base_path_rs_1 = 'AE_Testing_RS_1'
base_path_rs_he = 'AE_Testing_RS_He'

# Number of models
num_models = 9

# Epoch ranges for median and standard deviation calculations
epoch_ranges = [(0, 199), (200, 399), (400, 599), (600, 799),
                (800, 999), (1000, 1199), (1200, 1399), (1400, 1599),
                (1600, 1799), (1800, 1999)]

# Function to read the entire CSV and return training history data
def get_training_data(file_path):
    return pd.read_csv(file_path)

# Function to print median and standard deviation of loss and validation loss values
def print_statistics(df, range_start, range_end, label_prefix):
    range_df = df.iloc[range_start:range_end+1]
    median_loss = np.median(range_df['loss'])
    median_val_loss = np.median(range_df['val_loss'])

    # Find indices of the closest values to the median if exact match is not found
    median_loss_epoch = (np.abs(range_df['loss'] - median_loss)).idxmin()
    median_val_loss_epoch = (np.abs(range_df['val_loss'] - median_val_loss)).idxmin()

    # Calculate standard deviations
    std_loss = np.std(range_df['loss'])
    std_val_loss = np.std(range_df['val_loss'])

    print(f"{label_prefix} Range {range_start}-{range_end}:")
    print(f"- Median Loss {median_loss:.6f} at epoch {median_loss_epoch}, Std Dev {std_loss:.6f}")
    print(f"- Median Validation Loss {median_val_loss:.6f} at epoch {median_val_loss_epoch}, Std Dev {std_val_loss:.6f}")

# Analyze and print values for each model and range
for model_num in range(1, num_models + 1):
    model_folder_name = f'Model_{model_num}'
    file_rs_1 = os.path.join(base_path_rs_1, model_folder_name, 'Training_History.csv')
    file_rs_he = os.path.join(base_path_rs_he, model_folder_name, 'Training_History.csv')
    
    # Fetch full data set for each file
    data_rs_1 = get_training_data(file_rs_1)
    data_rs_he = get_training_data(file_rs_he)

    # Print statistics for both RS_1 and RS_He
    print(f"\nResults for {model_folder_name} (RS_1):")
    for start, end in epoch_ranges:
        print_statistics(data_rs_1, start, end, "RS_1")
    print(f"\nResults for {model_folder_name} (He_1):")
    for start, end in epoch_ranges:
        print_statistics(data_rs_he, start, end, "He_1")


Results for Model_1 (RS_1):
RS_1 Range 0-199:
- Median Loss 0.001060 at epoch 99, Std Dev 0.000244
- Median Validation Loss 0.001059 at epoch 99, Std Dev 0.000116
RS_1 Range 200-399:
- Median Loss 0.000932 at epoch 301, Std Dev 0.000024
- Median Validation Loss 0.000956 at epoch 296, Std Dev 0.000017
RS_1 Range 400-599:
- Median Loss 0.000868 at epoch 501, Std Dev 0.000014
- Median Validation Loss 0.000914 at epoch 493, Std Dev 0.000009
RS_1 Range 600-799:
- Median Loss 0.000828 at epoch 697, Std Dev 0.000010
- Median Validation Loss 0.000892 at epoch 739, Std Dev 0.000005
RS_1 Range 800-999:
- Median Loss 0.000800 at epoch 893, Std Dev 0.000007
- Median Validation Loss 0.000880 at epoch 900, Std Dev 0.000004
RS_1 Range 1000-1199:
- Median Loss 0.000779 at epoch 1098, Std Dev 0.000005
- Median Validation Loss 0.000872 at epoch 1132, Std Dev 0.000003
RS_1 Range 1200-1399:
- Median Loss 0.000762 at epoch 1287, Std Dev 0.000005
- Median Validation Loss 0.000868 at epoch 1221, Std Dev 0.0

## Training Time:
- Model 1: 30s/epoch
- Model 2: 52s/epoch
- Model 3: 15.5s/epoch
- Model 4: 33s/epoch
- Model 5: 30s/epoch
- Model 6: 8s/epoch
- Model 7: 17s/epoch
- Model 8: 12s/epoch
- Model 9: s/epoch