# Libraries

In [None]:
import numpy as np
import chardet
import joblib
from collections import defaultdict
import os
import re
import json

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt 
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
import plotly.colors as pc
%matplotlib inline  
import psutil
from pathlib import Path
#from Functions import *
import glob
from tqdm import tqdm
import time
import psutil
from sklearn.metrics import mean_squared_error, r2_score



# Test error acumlation over Data Lenght

In [None]:
# Loading Data
# ARX
ARX_testing_heave_true=pd.read_csv('Results/testing for comparison/ARX_true_heave.csv')[:-5]
ARX_testing_heave_pred=pd.read_csv('Results/testing for comparison/ARX_pred_heave.csv')[:-5]

ARX_testing_pitch_true=pd.read_csv('Results/testing for comparison/ARX_true_pitch.csv')[:-5]
ARX_testing_pitch_pred=pd.read_csv('Results/testing for comparison/ARX_pred_pitch.csv')[:-5]

ARX_testing_pendulum_true=pd.read_csv('Results/testing for comparison/ARX_true_pendulum.csv')[:-10]
ARX_testing_pendulum_pred=pd.read_csv('Results/testing for comparison/ARX_pred_pendulum.csv')[:-10]

#XGB
XGB_testing_true=pd.read_csv('Results/testing for comparison/xgb_true.csv')[:-2]
XGB_testing_pred=pd.read_csv('Results/testing for comparison/xgb_pred.csv')[:-2]

#LSTM
LSTM_testing_true=pd.read_csv('Results/testing for comparison/lstm_true.csv')
LSTM_testing_pred=pd.read_csv('Results/testing for comparison/lstm_pred.csv')


In [None]:

print("ARX Heave True:", len(ARX_testing_heave_true))
print("ARX Heave Pred:", len(ARX_testing_heave_pred))
print("ARX Pitch True:", len(ARX_testing_pitch_true))
print("ARX Pitch Pred:", len(ARX_testing_pitch_pred))
print("ARX Pendulum True:", len(ARX_testing_pendulum_true))
print("ARX Pendulum Pred:", len(ARX_testing_pendulum_pred))

print("XGB True:", len(XGB_testing_true))
print("XGB Pred:", len(XGB_testing_pred))

print("LSTM True:", len(LSTM_testing_true))
print("LSTM Pred:", len(LSTM_testing_pred))

In [None]:
# Validate that all True data sets are identical

a = ARX_testing_pendulum_true['pendulum'].to_numpy()
b = XGB_testing_true['pendulum'].to_numpy()
c = LSTM_testing_true['pendulum'].to_numpy()  # this is just 'a' again

are_all_equal = np.all((a == b) & (b == c))  # or (a == b) & (a == c)

print("All equal:", are_all_equal)


In [None]:
# ARX Heave Absolute Error
errors_ARX_heave = np.abs(ARX_testing_heave_true['heave'] - ARX_testing_heave_pred['heave'])

# ARX Pitch Absolute Error
errors_ARX_pitch = np.abs(ARX_testing_pitch_true['pitch'] - ARX_testing_pitch_pred['pitch'])

# ARX Pendulum Absolute Error
errors_ARX_pendulum = np.abs(ARX_testing_pendulum_true['pendulum'] - ARX_testing_pendulum_pred['pendulum'])

# XGB Absolute Error
errors_XGB_heave = np.abs(XGB_testing_true['heave'] - XGB_testing_pred['heave'])

errors_XGB_pitch = np.abs(XGB_testing_true['pitch'] - XGB_testing_pred['pitch'])

errors_XGB_pendulum = np.abs(XGB_testing_true['pendulum'] - XGB_testing_pred['pendulum'])

# LSTM Absolute Error
errors_LSTM_heave = np.abs(LSTM_testing_true['heave'] - LSTM_testing_pred['heave_pred'])

errors_LSTM_pitch = np.abs(LSTM_testing_true['pitch'] - LSTM_testing_pred['pitch_pred'])

errors_LSTM_pendulum = np.abs(LSTM_testing_true['pendulum'] - LSTM_testing_pred['pendulum_pred'])

# Time steps (or indices for x-axis)
time_steps = np.arange(len(errors_ARX_heave))


In [None]:
time_steps.shape

In [None]:

# Degrees of freedom (DoF) for which plots will be generated
dofs = ['heave', 'pitch', 'pendulum']

# Loop over each DoF
for dof in dofs:
    # Create the figure for absolute errors over time for the current DoF
    fig = make_subplots(rows=1, cols=1)

    # Select the appropriate error variables based on the DoF
    errors_ARX = globals()[f"errors_ARX_{dof}"]
    errors_XGB = globals()[f"errors_XGB_{dof}"]
    errors_LSTM = globals()[f"errors_LSTM_{dof}"]

    # Add the traces for each model and current DoF (without markers)
    fig.add_trace(go.Scatter(
        x=time_steps,
        y=errors_ARX,
        mode="lines",  # Only plot lines, no markers
        name="ARX",
        line=dict(color="blue", dash="dash")  # No marker, just line
    ))

    fig.add_trace(go.Scatter(
        x=time_steps,
        y=errors_XGB,
        mode="lines",  # Only plot lines, no markers
        name="XGBoost-NARX",
        line=dict(color="red", dash="dot")  # No marker, just line
    ))

    fig.add_trace(go.Scatter(
        x=time_steps,
        y=errors_LSTM,
        mode="lines",  # Only plot lines, no markers
        name="LSTM",
        line=dict(color="green", dash="dashdot")  # No marker, just line
    ))

    # Update layout and display
    fig.update_layout(
        title_text=f"Absolute Error for {dof.capitalize()} Over Time - Tp = 6.8 s, Hs = 1 m",
        template="plotly_white",
        height=420,  # Reduced height to make the plot lighter
        showlegend=True,
        plot_bgcolor="white",  # Simplified background for a cleaner look
    )

    # Set the y-axis label with the appropriate unit based on DoF
    if dof == 'heave':
        fig.update_yaxes(title_text="Absolute Error (m)", row=1, col=1)  # Units in meters for Heave
    else:
        fig.update_yaxes(title_text="Absolute Error (°)", row=1, col=1)  # Units in degrees for Pitch and Pendulum

    # Set the x-axis label
    fig.update_xaxes(title_text="Time Step")

    # Show the figure
    fig.show()
    # Create the directory if it doesn't exist
    output_dir = "Results/error vs time/"
    os.makedirs(output_dir, exist_ok=True)
    fig.write_image(f"{output_dir}{dof}_absolute_error_plot.png")


# senstivety to intial conditoins

In [None]:
# Define the data for R² scores
original_r2 = {
    "Model": ["ARX", "ARX", "ARX", "XGBoost-NARX", "XGBoost-NARX", "XGBoost-NARX", "LSTM", "LSTM", "LSTM"],
    "Dataset": ["Training", "Validation", "Testing", "Training", "Validation", "Testing", "Training", "Validation", "Testing"],
    "Heave": [0.989, 0.980, 0.972, 0.959, 0.965, 0.969, 0.994, 0.994, 0.994],
    "Pitch": [0.991, 0.959, 0.978, 0.892, 0.893, 0.901, 0.985, 0.980, 0.982],
    "Pendulum": [0.944, 0.889, 0.935, 0.840, 0.841, 0.846, 0.989, 0.987, 0.976]
}

# Create DataFrame
df_r2_original = pd.DataFrame(original_r2)

df_r2_original

In [None]:
# Define the data for R² scores
trimmed_r2 = {
    "Model": ["ARX", "ARX", "ARX", "XGBoost-NARX", "XGBoost-NARX", "XGBoost-NARX", "LSTM", "LSTM", "LSTM"],
    "Dataset": ["Training", "Validation", "Testing", "Training", "Validation", "Testing", "Training", "Validation", "Testing"],
    "Heave": [0.989, 0.989, 0.991, 0.959, 0.966, 0.970, 0.994, 0.994, 0.995],
    "Pitch": [0.991, 0.991, 0.992, 0.893, 0.894, 0.897, 0.985, 0.984, 0.986],
    "Pendulum": [0.960, 0.962, 0.965, 0.841, 0.846, 0.845, 0.989, 0.988, 0.991]
}

# Create DataFrame
df_r2_trimmed = pd.DataFrame(trimmed_r2)

df_r2_trimmed

In [None]:
# Define colors for each model
colors = {
    "ARX": "blue",
    "XGBoost-NARX": "red",
    "LSTM": "green"
}

# Create subplots with 3 columns, one for each DoF
fig = make_subplots(rows=1, cols=3, subplot_titles=["Heave", "Pitch", "Pendulum"])

# Loop over each DoF and plot the markers for each model
for idx, dof in enumerate(['Heave', 'Pitch', 'Pendulum'], 1):
    for model in ['ARX', 'XGBoost-NARX', 'LSTM']:
        # Get the data for original and trimmed for each model and DoF
        original_data = df_r2_original[df_r2_original['Model'] == model][dof]
        trimmed_data = df_r2_trimmed[df_r2_trimmed['Model'] == model][dof]

        # Plot the markers for the original data
        fig.add_trace(go.Scatter(
            x=df_r2_original['Dataset'].unique(),
            y=original_data,
            mode='markers',
            name=f'{model} - Original',
            marker=dict(color=colors[model], symbol='circle', size=10),
            legendgroup=model,
            showlegend=(idx == 3)  # Show legend only for the last subplot (Pendulum)
        ), row=1, col=idx)

        # Plot the markers for the trimmed data
        fig.add_trace(go.Scatter(
            x=df_r2_trimmed['Dataset'].unique(),
            y=trimmed_data,
            mode='markers',
            name=f'{model} - Trimmed',
            marker=dict(color=colors[model], symbol='x', size=10),
            legendgroup=model,
            showlegend=(idx == 3)  # Show legend only for the last subplot (Pendulum)
        ), row=1, col=idx)

# Update layout for better visualization
fig.update_layout(
    title="",
    xaxis_title="Dataset",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    legend_title="Model and Dataset",
    showlegend=True,
    xaxis2=dict(title="Dataset"),  
    xaxis3=dict(title=""),
)

# Hiding x-axis title for the first subplot (Heave)
fig.update_xaxes(title="", row=1, col=1)

fig.show()


# Noise handling

In [None]:
df_test_full = pd.read_csv('prepared_data/test_data.csv')
# define test case
trainin_case='Tp6p8s_Hs2m'
df_case_test = df_test_full[df_test_full['test_name'] == trainin_case].copy()
df_case_test.describe()

In [None]:
def fourier(signal, f_samp=None):
    """
    [FT, freq] = fourier(signal, f_samp)
    evaluates Fourier transform with normalisation 2/N
    
    Input arguments:
    signal : array-like, signal data (will be converted to column)
    f_samp : float, sampling frequency in Hz (optional, required to evaluate freq)
    
    Outputs, expressed between f=0 and f=f_samp/2:
    FT : complex numpy array, Fourier transform (contains floor(N/2)+1 points)
    freq : numpy array, frequency vector in Hz (only if f_samp provided)
    """
    
    # Convert to column vector (1D numpy array)
    signal = np.asarray(signal).flatten()
    N = len(signal)
    
    # Apply FFT with 2/N normalization
    FT = 2 * np.fft.fft(signal) / N
    
    # Constant (f=0) mode - divide DC component by 2
    FT[0] = FT[0] / 2
    
    # Removing negative frequencies
    # Last Fourier mode index
    N_last = N // 2 + 1
    FT = FT[:N_last]
    
    # Handle frequency vector output
    if f_samp is not None:
        T_d = N / f_samp
        # Frequency vector
        freq = np.arange(N_last) / T_d
        return FT, freq
    else:
        return FT

In [None]:
# Function to scale noise to 10% of the clean signal
def scale_noise_to_5_percent(clean_signal, white_noise):
    # Calculate the RMS (Root Mean Square) of the clean signal
    rms_clean_signal = np.sqrt(np.mean(clean_signal**2))

    # Calculate the RMS of the white noise
    rms_white_noise = np.sqrt(np.mean(white_noise**2))

    # Scale the white noise to be 5% of the clean signal's amplitude
    scaling_factor = 0.05 * (rms_clean_signal / rms_white_noise)

    # Scale the white noise
    scaled_white_noise = white_noise * scaling_factor

    print(rms_clean_signal)
    return scaled_white_noise 

In [None]:
# Extract clean signals from the dataframe
eta = df_case_test['eta'].values
eta_velocity = df_case_test['eta_velocity'].values
eta_acceleration = df_case_test['eta_acceleration'].values

# Length of the signal
length = len(eta)  # Assuming all signals have the same length

# Sampling frequency (Hz)
fs = 800

# Generate white noise (broad spectrum)
white_noise = np.random.randn(length)


# Scale the white noise for each signal
scaled_noise_eta = scale_noise_to_5_percent(eta, white_noise)
scaled_noise_eta_velocity = scale_noise_to_5_percent(eta_velocity, white_noise)
scaled_noise_eta_acceleration = scale_noise_to_5_percent(eta_acceleration, white_noise)

# Add the scaled white noise to each clean signal
noisy_eta = eta + scaled_noise_eta
noisy_eta_velocity = eta_velocity + scaled_noise_eta_velocity
noisy_eta_acceleration = eta_acceleration + scaled_noise_eta_acceleration


In [None]:
fig = go.Figure()

# Plot for eta
fig.add_trace(go.Scatter(x=np.arange(length), y=eta, mode='lines', name='Clean eta'))
fig.add_trace(go.Scatter(x=np.arange(length), y=noisy_eta, mode='lines', name='Noisy eta (5% White Noise)', line=dict(color='orange')))

# Plot for eta_velocity
fig.add_trace(go.Scatter(x=np.arange(length), y=eta_velocity, mode='lines', name='Clean eta_velocity'))
fig.add_trace(go.Scatter(x=np.arange(length), y=noisy_eta_velocity, mode='lines', name='Noisy eta_velocity (5% White Noise)', line=dict(color='orange')))

# Plot for eta_acceleration
fig.add_trace(go.Scatter(x=np.arange(length), y=eta_acceleration, mode='lines', name='Clean eta_acceleration'))
fig.add_trace(go.Scatter(x=np.arange(length), y=noisy_eta_acceleration, mode='lines', name='Noisy eta_acceleration (5% White Noise)', line=dict(color='orange')))

# Update layout
fig.update_layout(
    title="Clean and Noisy Signals with 5% White Noise",
    xaxis_title="Time (samples)",
    yaxis_title="Amplitude",
    template="plotly_white",
    height=800,
    showlegend=True
)

fig.show()


In [None]:
df_case_test_noisy=df_case_test.copy()
df_case_test_noisy['eta']=noisy_eta 
df_case_test_noisy['eta_velocity']=noisy_eta_velocity 
df_case_test_noisy['eta_acceleration']=noisy_eta_acceleration
df_case_test_noisy

In [None]:
# Define the length of the signal
length = len(eta)

# Create a subplot with 3 rows and 1 column (for eta, eta_velocity, eta_acceleration)
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.1,
                    subplot_titles=['η ', 'η̇ ', 'η̈ '])

# Plot for eta (Row 1)
fig.add_trace(go.Scatter(x=np.arange(length), y=eta, mode='lines', name='Clean η (eta)', line=dict(color='red', width=5)),  # Thicker red line
              row=1, col=1)
fig.add_trace(go.Scatter(x=np.arange(length), y=df_case_test_noisy['eta'], mode='lines', name='Noisy η (eta) (5% White Noise)', line=dict(color='black', width=3)),  # Black with normal thickness
              row=1, col=1)

# Plot for eta_velocity (Row 2) -> Use η̇ (eta_dot)
fig.add_trace(go.Scatter(x=np.arange(length), y=eta_velocity, mode='lines', name='Clean η̇ (eta_dot)', line=dict(color='red', width=5)),  # Thicker red line
              row=2, col=1)
fig.add_trace(go.Scatter(x=np.arange(length), y=df_case_test_noisy['eta_velocity'], mode='lines', name='Noisy η̇ (eta_dot) (5% White Noise)', line=dict(color='black', width=3)),  # Black with normal thickness
              row=2, col=1)

# Plot for eta_acceleration (Row 3) -> Use η̈ (eta_ddot)
fig.add_trace(go.Scatter(x=np.arange(length), y=eta_acceleration, mode='lines', name='Clean ', line=dict(color='red', width=5)),  # Thicker red line
              row=3, col=1)
fig.add_trace(go.Scatter(x=np.arange(length), y=df_case_test_noisy['eta_acceleration'], mode='lines', name='Noisy (5% White Noise)', line=dict(color='black', width=3)),  # Black with normal thickness
              row=3, col=1)

# Update layout for the overall figure
fig.update_layout(
    title="Clean and Noisy Signals",
    xaxis_title="Time Steps",  # Adding title to the x-axis for the last subplot
    yaxis_title="Amplitude",  # General title for y-axis
    template="plotly_white",
    height=600,
    width=1000,  # Height to accommodate 3 subplots
    showlegend=True,
    title_x=0.5  # Center the main title
)

# Update y-axis titles with units for each subplot
fig.update_yaxes(title_text="η (m)", row=1, col=1)
fig.update_yaxes(title_text="η̇ (m/s)", row=2, col=1)
fig.update_yaxes(title_text="η̈ (m/s²)", row=3, col=1)

# Make x-axis label visible only for the last subplot (Row 3)
fig.update_xaxes(title_text="Time Steps", row=3, col=1)  # Only visible in the last subplot

# Hide x-axis label for the first two subplots
fig.update_xaxes(showticklabels=False, row=1, col=1)  # Hide x-axis label for the first subplot
fig.update_xaxes(showticklabels=False, row=2, col=1)  # Hide x-axis label for the second subplot
# Show legend only for the last subplot (Row 3)
fig.update_traces(showlegend=False, row=1, col=1)  # Hide legend for the first subplot
fig.update_traces(showlegend=False, row=2, col=1)  # Hide legend for the second subplot
# Control x-axis range for the last subplot (Row 3)
fig.update_xaxes(range=[2500, 2750], row=3, col=1)  # Set x-axis range for the last subplot

fig.show()


In [None]:
# save the noisy data to a CSV file
df_case_test_noisy.to_csv('df_case_test_noisy.csv')

Now we load true and predicted Data after using models to predcit on the noisy data

In [None]:
# Loading Data
# ARX
ARX_heave_true=pd.read_csv('Results/noisy test/ARX_heave_true.csv')[5:-33]
ARX_heave_pred=pd.read_csv('Results/noisy test/ARX_heave_pred.csv')[5:-33]


ARX_pitch_true=pd.read_csv('Results/noisy test/ARX_pitch_true.csv')[5:-33]
ARX_pitch_pred=pd.read_csv('Results/noisy test/ARX_pitch_pred.csv')[5:-33]

ARX_pendulum_true=pd.read_csv('Results/noisy test/ARX_pendulum_true.csv')[5:-38]
ARX_pendulum_pred=pd.read_csv('Results/noisy test/ARX_pendulum_pred.csv')[5:-38]


# XGBoost
XGB_true=pd.read_csv('Results/noisy test/XGB_true.csv')[5:-30]
XGB_pred=pd.read_csv('Results/noisy test/XGB_pred.csv')[5:-30]


#LSTM
LSTM_true=pd.read_csv('Results/noisy test/LSTM_true.csv')
LSTM_pred=pd.read_csv('Results/noisy test/LSTM_pred.csv')



In [None]:
# Validate that all True data sets are identical

a = ARX_pendulum_true['pendulum'].to_numpy()
b = XGB_true['pendulum'].to_numpy()
c = LSTM_true['pendulum'].to_numpy()  # this is just 'a' again

are_all_equal = np.all((a == b) & (b == c))  # or (a == b) & (a == c)

print("All equal:", are_all_equal)


In [None]:
# Define ARX adjusted predictions
arx_preds = {
    'heave': ARX_heave_pred['heave'],
    'pitch': ARX_pitch_pred['pitch'],
    'pendulum': ARX_pendulum_pred['pendulum'],
}


In [None]:
# Define DoFs and corresponding y-axis units
dofs = ['heave', 'pitch', 'pendulum']
y_units = {
    'heave': 'm',
    'pitch': '°',
    'pendulum': '°'
}

# Loop through each DoF
for dof in dofs:
    fig = go.Figure()

    # Ground Truth (solid black)
    fig.add_trace(go.Scatter(
        y=LSTM_true[dof],
        mode='lines',
        name=f'{dof.capitalize()} - Ground Truth',
        line=dict(color='black', dash='solid')
    ))

    # ARX Prediction (dashed blue)
    fig.add_trace(go.Scatter(
        y=arx_preds[dof],
        mode='lines',
        name='ARX Prediction',
        line=dict(color='blue', dash='dash')
    ))

    # XGBoost Prediction (dashed red)
    fig.add_trace(go.Scatter(
        y=XGB_pred[dof],
        mode='lines',
        name='XGBoost-NARX Prediction',
        line=dict(color='red', dash='dash')
    ))

    # LSTM Prediction (dashed green)
    fig.add_trace(go.Scatter(
        y=LSTM_pred[f'{dof}_pred'],
        mode='lines',
        name='LSTM Prediction',
        line=dict(color='green', dash='dash')
    ))

    # Calculate dynamic y-axis limits (min/max of all predictions + ground truth)
    y_min = min(LSTM_true[dof])
    y_max = max(LSTM_true[dof])

    # Optional: Add a padding factor (e.g., 5% more than the min/max range)
    padding = 0.05  # 5% padding
    y_min -= (y_max - y_min) * padding
    y_max += (y_max - y_min) * padding

    # Layout with dynamic y-axis range
    fig.update_layout(
        title=f'Noisy Data Predictions - {dof.capitalize()}',
        xaxis_title='Time Step',
        yaxis_title=f'{dof.capitalize()} [{y_units[dof]}]',
        yaxis=dict(range=[y_min, y_max]),
        xaxis=dict(range=[0,400]),# Dynamically set y-axis range
        template='plotly_white',
        
        
    )
    # Create the directory if it doesn't exist
    output_dir = "Results/noisy test/"
    os.makedirs(output_dir, exist_ok=True)
    fig.write_image(f"{output_dir}{dof}_absolute_error_plot.png")
    fig.write_image


In [None]:
import plotly.graph_objects as go
import os

# Define DoFs and corresponding y-axis units
dofs = ['heave', 'pitch', 'pendulum']
y_units = {
    'heave': 'm',
    'pitch': '°',
    'pendulum': '°'
}

# Loop through each DoF
for dof in dofs:
    fig = go.Figure()

    # Ground Truth (solid black)
    fig.add_trace(go.Scatter(
        y=LSTM_true[dof],
        mode='lines',
        name=f'{dof.capitalize()} - Ground Truth',
        line=dict(color='black', dash='solid')
    ))

    # ARX Prediction (dashed blue)
    fig.add_trace(go.Scatter(
        y=arx_preds[dof],
        mode='lines',
        name='ARX Prediction',
        line=dict(color='blue', dash='dash')
    ))

    # XGBoost Prediction (dashed red)
    fig.add_trace(go.Scatter(
        y=XGB_pred[dof],
        mode='lines',
        name='XGBoost-NARX Prediction',
        line=dict(color='red', dash='dash')
    ))

    # LSTM Prediction (dashed green)
    fig.add_trace(go.Scatter(
        y=LSTM_pred[f'{dof}_pred'],
        mode='lines',
        name='LSTM Prediction',
        line=dict(color='green', dash='dash')
    ))

    # Calculate dynamic y-axis limits (min/max of all predictions + ground truth)
    y_min = min(LSTM_true[dof])
    y_max = max(LSTM_true[dof])

    # Optional: Add a padding factor (e.g., 5% more than the min/max range)
    padding = 0.05  # 5% padding
    y_min -= (y_max - y_min) * padding
    y_max += (y_max - y_min) * padding

    # Layout with dynamic y-axis range
    fig.update_layout(
        title=f'Noisy Data Predictions - {dof.capitalize()}',
        xaxis_title='Time Step',
        yaxis_title=f'{dof.capitalize()} [{y_units[dof]}]',
        yaxis=dict(range=[y_min, y_max]),
        xaxis=dict(range=[0, 400]),  # Dynamically set x-axis range
        template='plotly_white',
    )

    fig.show()


# Data size sensitivity 

In [None]:
# load files

# Set the path where your CSV files are located
path = 'Results/data_tests'

# Use glob to find all CSV files in the folder
csv_files = glob.glob(os.path.join(path, '*.csv'))

# Dictionary to store each CSV as a separate DataFrame
dfs = {}

# Loop through each file and load it into the dictionary
for csv_file in csv_files:
    # Get the file name without the extension to use as the key
    file_name = os.path.basename(csv_file).replace('.csv', '')
    # Read the CSV file and assign it to the dictionary
    dfs[file_name] = pd.read_csv(csv_file)


print(dfs.keys())


In [None]:
dfs['df_data_test_results_lstm_final_ver2']

In [None]:
old_name=['data_lenght', 'train_heave_r2', 'train_pitch_r2', 'train_pendulum_r2',
          'test_heave_r2', 'test_pitch_r2', 'test_pendulum_r2']

new_name=['lenght', 'r2_train_heave', 'r2_train_pitch','r2_train_pendulum',
          'r2_test_heave', 'r2_test_pitch', 'r2_test_pendulum']
# Create a dictionary mapping old column names to new ones
rename_dict = dict(zip(old_name, new_name))

# Rename columns for a specific DataFrame
dfs['df_data_test_results_lstm_final_ver2'].rename(columns=rename_dict, inplace=True)
dfs['df_data_test_results_lstm_final_ver2']

In [None]:
old_name=['r2_val_heave', 'r2_val_pitch', 'r2_val_pendulum']

new_name=['r2_test_heave', 'r2_test_pitch', 'r2_test_pendulum']
# Create a dictionary mapping old column names to new ones
rename_dict = dict(zip(old_name, new_name))

# Rename columns for a specific DataFrame
dfs['metrics_df_Xgboost_3dof_data_test_new_parralel_ver2'].rename(columns=rename_dict, inplace=True)
dfs['metrics_df_Xgboost_3dof_data_test_series_ver2'].rename(columns=rename_dict, inplace=True)

In [None]:
for file_name, df in dfs.items():
    print(file_name)
    # Sort the DataFrame by the 'lenght' column
    dfs[file_name] = df.sort_values(by='lenght', ascending=True)


In [None]:
# Define ARX adjusted predictions
arx_parralel = {
    'heave': dfs['metrics_df_ARX_heave_data_test_parralel-'],
    'pitch': dfs['metrics_df_ARX_picth_data_test_parralel-'],
    'pendulum': dfs['metrics_df_ARX_pendulum_data_test_parallel-'],
}

arx_series = {
    'heave': dfs['metrics_df_ARX_heave_data_test_series-'],
    'pitch': dfs['metrics_df_ARX_picth_data_test_seiries-'],
    'pendulum': dfs['metrics_df_ARX_pendulum_data_test_pseries-'],
}

In [None]:
# Model info dictionary for customizing line styles
model_info = {
    "ARX": {"color": "blue", "symbol": "x", "dash": "dash"},
    "XGB": {"color": "red", "symbol": "circle", "dash": "dot"},
    "LSTM": {"color": "green", "symbol": "square", "dash": "solid"},
}

# List of degrees of freedom (DOFs)
dofs = ['heave', 'pitch', 'pendulum']

# Loop through each DOF and create a plot for testing only
for dof in dofs:
    # Create a figure for a single plot (no subplots)
    fig = go.Figure()

    # Testing R² - ARX
    fig.add_trace(go.Scatter(
        x=arx_series[dof]['lenght'],
        y=arx_series[dof][f'r2_test_{dof}'],
        name='ARX Test R²',
        mode='lines+markers',
        line=dict(color=model_info["ARX"]["color"], dash=model_info["ARX"]["dash"]),
        marker=dict(symbol=model_info["ARX"]["symbol"]),
    ))

    # Testing R² - XGBoost (NARX)
    fig.add_trace(go.Scatter(
        x=dfs['metrics_df_Xgboost_3dof_data_test_series_ver2']['lenght'],
        y=dfs['metrics_df_Xgboost_3dof_data_test_series_ver2'][f'r2_test_{dof}'],
        name='XGBoost-NARX Test R²',
        mode='lines+markers',
        line=dict(color=model_info["XGB"]["color"], dash=model_info["XGB"]["dash"]),
        marker=dict(symbol=model_info["XGB"]["symbol"]),
    ))

    # Testing R² - LSTM
    fig.add_trace(go.Scatter(
        x=dfs['df_data_test_results_lstm_final_ver2']['lenght'],
        y=dfs['df_data_test_results_lstm_final_ver2'][f'r2_test_{dof}'],
        name='LSTM Test R²',
        mode='lines+markers',
        line=dict(color=model_info["LSTM"]["color"], dash=model_info["LSTM"]["dash"]),
        marker=dict(symbol=model_info["LSTM"]["symbol"]),
    ))

    # Set the y-axis range from -0.1 to 1.1
    fig.update_layout(
        yaxis=dict(
            range=[-0.1, 1.1]  # Limit y-axis from -0.1 to 1.1
        ),
        width=850,
        height=350,
        title=f"Performance Sensitivity to Training Data Size for {dof.capitalize()}",  # Updated title based on DOF
        xaxis_title="Training Data Length",  # X-axis title
        yaxis_title="Test R² ",  # Y-axis title
        showlegend=True,  # Show the legend
        template='plotly_white',  # White background template for better visuals
    )
    # Create the directory if it doesn't exist
    output_dir = "Results/data_sesitivity/"
    os.makedirs(output_dir, exist_ok=True)
    fig.write_image(f"{output_dir}{dof}_data.png")
    # Show the figure
    fig.show()


# Sensitivity to dt

In [None]:
# load files

# Set the path where your CSV files are located
path = 'Results/dt_tests'

# Use glob to find all CSV files in the folder
csv_files = glob.glob(os.path.join(path, '*.csv'))

# Dictionary to store each CSV as a separate DataFrame
dfs = {}

# Loop through each file and load it into the dictionary
for csv_file in csv_files:
    # Get the file name without the extension to use as the key
    file_name = os.path.basename(csv_file).replace('.csv', '')
    # Read the CSV file and assign it to the dictionary
    dfs[file_name] = pd.read_csv(csv_file)


print(dfs.keys())


In [None]:
for file_name, df in dfs.items():
    print(file_name)
   


In [None]:
old_name=['train_heave_r2', 'train_pitch_r2', 'train_pendulum_r2',
          'test_heave_r2', 'test_pitch_r2', 'test_pendulum_r2']

new_name=['r2_train_heave', 'r2_train_pitch','r2_train_pendulum',
          'r2_test_heave', 'r2_test_pitch', 'r2_test_pendulum']
# Create a dictionary mapping old column names to new ones
rename_dict = dict(zip(old_name, new_name))

dfs['df_dt_test_results_lstm'].rename(columns=rename_dict, inplace=True)
dfs['df_dt_test_results_lstm']

In [None]:
# Define ARX adjusted predictions
arx_parallel = {
    'heave': dfs['metrics_df_ARX_heave_dt_senstivery_test_parralel-'],
    'pitch': dfs['metrics_df_ARX_picth_dt_senstivery_test_parralel-'],
    'pendulum': dfs['metrics_df_ARX_pendulum_dt_senstivery_test_parralel'],
}

arx_series = {
    'heave': dfs['metrics_df_ARX_heave_dt_senstivery_test-'],
    'pitch': dfs['metrics_df_ARX_picth_dt_senstivery_test-'],
    'pendulum': dfs['metrics_df_ARX_pendulum_dt_senstivery_test-'],
}

In [None]:
# 1. Update model_info with separate dash styles
model_info = {
    "ARX":  {"color": "blue",  "symbol": "circle", "dash_series": "solid",   "dash_parallel": "dash"},
    "XGB":  {"color": "red",   "symbol": "circle", "dash_series": "solid",   "dash_parallel": "dash"},
    "LSTM": {"color": "green", "symbol": "square","dash_series": "solid",   "dash_parallel": None},  # no parallel
}

# common hover template
hover_template = 'dt: %{x:.3f}s<br>R²: %{y:.3f}<extra></extra>'

for dof in dofs:
    fig = go.Figure()

    # ARX Series
    fig.add_trace(go.Scatter(
        x=arx_series[dof]['dt'],
        y=arx_series[dof][f'r2_test_{dof}'],
        name='ARX (Series)',
        legendgroup='ARX',
        mode='lines+markers',
        line=dict(color=model_info["ARX"]["color"],
                  dash=model_info["ARX"]["dash_series"],
                  width=2),
        marker=dict(symbol=model_info["ARX"]["symbol"], size=6),
        hovertemplate=hover_template
    ))

    # ARX Parallel
    fig.add_trace(go.Scatter(
        x=arx_parallel[dof]['dt'],
        y=arx_parallel[dof][f'r2_test_{dof}'],
        name='ARX (Parallel)',
        legendgroup='ARX',
        mode='lines+markers',
        line=dict(color=model_info["ARX"]["color"],
                  dash=model_info["ARX"]["dash_parallel"],
                  width=2),
        marker=dict(symbol="x", size=8),
        hovertemplate=hover_template
    ))

    # XGBoost Series
    fig.add_trace(go.Scatter(
        x=dfs['metrics_df_XGBOOST_3dof_dt_test_nb0_nf10_new']['dt'],
        y=dfs['metrics_df_XGBOOST_3dof_dt_test_nb0_nf10_new'][f'r2_test_{dof}'],
        name='XGB (Series)',
        legendgroup='XGB',
        mode='lines+markers',
        line=dict(color=model_info["XGB"]["color"],
                  dash=model_info["XGB"]["dash_series"],
                  width=2),
        marker=dict(symbol=model_info["XGB"]["symbol"], size=6),
        hovertemplate=hover_template
    ))

    # XGBoost Parallel
    fig.add_trace(go.Scatter(
        x=dfs['metrics_df_XGBOOST_3dof_dt_test_nb0_nf10_new_parralel']['dt'],
        y=dfs['metrics_df_XGBOOST_3dof_dt_test_nb0_nf10_new_parralel'][f'r2_test_{dof}'],
        name='XGB (Parallel)',
        legendgroup='XGB',
        mode='lines+markers',
        line=dict(color=model_info["XGB"]["color"],
                  dash=model_info["XGB"]["dash_parallel"],
                  width=2),
        marker=dict(symbol="x", size=6),
        hovertemplate=hover_template
    ))

    # LSTM (only series)
    fig.add_trace(go.Scatter(
        x=dfs['df_dt_test_results_lstm']['dt'],
        y=dfs['df_dt_test_results_lstm'][f'r2_test_{dof}'],
        name='LSTM',
        legendgroup='LSTM',
        mode='lines+markers',
        line=dict(color=model_info["LSTM"]["color"],
                  dash=model_info["LSTM"]["dash_series"],
                  width=2),
        marker=dict(symbol=model_info["LSTM"]["symbol"], size=6),
        hovertemplate=hover_template
    ))

    fig.update_layout(
        yaxis=dict(range=[-0.1, 1.1]),
        width=850, height=350,
        title=f"Performance Sensitivity to Training Reselution for {dof.capitalize()}",
        xaxis_title="dt [s]", yaxis_title="Test R²",
        template='plotly_white'
    )

     # Create the directory if it doesn't exist
    output_dir = "Results/dt_sesitivity/"
    os.makedirs(output_dir, exist_ok=True)
    fig.write_image(f"{output_dir}{dof}_dt.png")
    # Show the figure
    fig.show()

    


# Enviromental Impact

In [None]:
# load files

# Set the path where your CSV files are located
path = 'Results/co2'

# Use glob to find all CSV files in the folder
csv_files = glob.glob(os.path.join(path, '*.csv'))

# Dictionary to store each CSV as a separate DataFrame
dfs = {}

# Loop through each file and load it into the dictionary
for csv_file in csv_files:
    # Get the file name without the extension to use as the key
    file_name = os.path.basename(csv_file).replace('.csv', '')
    # Read the CSV file and assign it to the dictionary
    dfs[file_name] = pd.read_csv(csv_file)


dfs.keys()


In [None]:
dfs['arx_heave_parralel_pred'].columns

In [None]:
# List to hold the extracted emissions data
emissions_data = []

# Loop through each DataFrame in the dictionary
for file_name, df in dfs.items():
    # Extract the emission and emissions rate value from the last row
    # units in files are  kg co2eq , kg CO2eq/s and KWh
    emissions_value = df['emissions'].iloc[-1]  
    emissions_rate=df['emissions_rate'] .iloc[-1]  
    energy_used=df['energy_consumed'] .iloc[-1]  
    # Append the file name and the emissions value to the list
    emissions_data.append({'file_name': file_name, 'emissions': emissions_value ,'emissions_rate': emissions_rate , 'energy_consumed':energy_used})

# Create a new DataFrame to store the extracted emissions
emissions_df = pd.DataFrame(emissions_data)

emissions_df['calculted_emisions']=emissions_df['energy_consumed']*146

emissions_df