In [216]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import glob
import plotly.express as px
import plotly.graph_objects as go
import os
import re

from plotly.subplots import make_subplots

# ARX

In [217]:
dfs = {}  # dictionary to store DataFrames


In [218]:

# load heave data
folder = "ARX/choosen heros/heave/lags/*.csv"
# Get all CSV files
csv_files = sorted(glob.glob(folder))  # sort for consistent numbering

# Load each CSV into a dynamically named variable

for file_path in csv_files:
    file_name = os.path.basename(file_path)  # Get filename (e.g., "heave_case1.csv")
    file_base = os.path.splitext(file_name)[0]  # Remove '.csv', so just "heave_case1"
    
    df_name = file_base  # Use file_base as the dataframe name
    dfs[df_name] = pd.read_csv(file_path)


In [219]:

# load pitch data
folder = "ARX/choosen heros/pitch/lags/*.csv"
# Get all CSV files
csv_files = sorted(glob.glob(folder))  # sort for consistent numbering

# Load each CSV into a dynamically named variable

for file_path in csv_files:
    file_name = os.path.basename(file_path)  # Get filename (e.g., "heave_case1.csv")
    file_base = os.path.splitext(file_name)[0]  # Remove '.csv', so just "heave_case1"
    
    df_name = file_base  # Use file_base as the dataframe name
    dfs[df_name] = pd.read_csv(file_path)    
    


In [220]:
# load pendulum data
folder = "ARX/choosen heros/pendulum/lags/*.csv"
# Get all CSV files
csv_files = sorted(glob.glob(folder))  # sort for consistent numbering

# Load each CSV into a dynamically named variable

for file_path in csv_files:
    file_name = os.path.basename(file_path)  # Get filename (e.g., "heave_case1.csv")
    file_base = os.path.splitext(file_name)[0]  # Remove '.csv', so just "heave_case1"
    
    df_name = file_base  # Use file_base as the dataframe name
    dfs[df_name] = pd.read_csv(file_path)    

In [221]:
# Loop over all dfs to extract diffrent dfs
for name, df in dfs.items():
    print(f"{name}: {df.shape}")

metrics_df_test_heave_na2_nb2_nf6_val: (20, 4)
metrics_df_test_heave_na2_nb2_nf7_val: (10, 4)
metrics_df_test_heave_na2_nb2_nf8_val: (10, 4)
metrics_df_test_pitch_na2_nb2_nf2_val: (10, 4)
metrics_df_test_pitch_na2_nb2_nf5_val: (10, 4)
metrics_df_test_pitch_na2_nb2_nf6_val: (10, 4)
metrics_df_test_pitch_na2_nb2_nf7_val: (10, 4)
metrics_df_test_pitch_na2_nb2_nf8_val: (10, 4)
metrics_df_test_pendulum_na2_nb2_nf2_val: (10, 4)
metrics_df_test_pendulum_na2_nb2_nf7_val: (10, 4)
metrics_df_test_pendulum_na2_nb3_nf2_val: (10, 4)
metrics_df_test_pendulum_na2_nb4_nf2_val: (10, 4)


In [222]:
test_case_pretty_map = {
    'T4p5s_Hs1m': 'Tp = 4.5 s, Hs = 1 m',
    'T4p5s_Hs2m': 'Tp = 4.5 s, Hs = 2 m',
    'Tp6p8s_Hs1m': 'Tp = 6.8 s, Hs = 1 m',
    'Tp6p8s_Hs2m': 'Tp = 6.8 s, Hs = 2 m',
    'Tp6p8s_Hs4m': 'Tp = 6.8 s, Hs = 4 m',
    'Tp6p8s_Hs6m': 'Tp = 6.8 s, Hs = 6 m',
    'Tp6p8s_Hs8m': 'Tp = 6.8 s, Hs = 8 m',
    'T10p2s_Hs1m': 'Tp = 10.2 s, Hs = 1 m',
    'Tp10p2_Hs2m': 'Tp = 10.2 s, Hs = 2 m',
    'T10p2s_Hs4m': 'Tp = 10.2 s, Hs = 4 m'
}


In [223]:
# Define the data manually as a list of dictionaries
data_heave = [
    {'nd': -6, 'nb': 2, 'na': 2, 'r2_train': 0.9866, 'r2_val': 0.9749},
    {'nd': -7, 'nb': 2, 'na': 2, 'r2_train': 0.9889, 'r2_val': 0.9794},
    {'nd': -8, 'nb': 2, 'na': 2, 'r2_train': 0.9890, 'r2_val': 0.9795},
    
    #{'nd': -6, 'nb': 3, 'na': 2, 'r2_train': 0.9866, 'r2_val': 0.9749},
  #  {'nd': -7, 'nb': 3, 'na': 2, 'r2_train': 0.9889, 'r2_val': 0.9793},
   # {'nd': -7, 'nb': 3, 'na': 2, 'r2_train': 0.9890, 'r2_val': 0.9795}
]
# Create a DataFrame
df_summary_heave = pd.DataFrame(data_heave)

# Display it
print(df_summary_heave)



   nd  nb  na  r2_train  r2_val
0  -6   2   2    0.9866  0.9749
1  -7   2   2    0.9889  0.9794
2  -8   2   2    0.9890  0.9795


In [224]:
# Group the pitch dataframes correctly
heave_dfs = {k: v for k, v in dfs.items() if k.startswith('metrics_df_test_heave')}

# Define your custom x-axis order
custom_order = [
    'T4p5s_Hs1m', 
    'T4p5s_Hs2m', 
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',
    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]
pretty_custom_order = [test_case_pretty_map.get(tc, tc) for tc in custom_order]

# Create color map to maintain consistent color for each `comment_value`
color_map = {}

# Create two subplots with independent y-axes
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.75, 0.25],
    subplot_titles=("Test Cases R²", "Train/Validation R² Scores"),
    shared_yaxes=False
)

for df_name, original_df in heave_dfs.items():
    df = original_df.copy()

    # Extract na, nb, nf from df_name
    match = re.search(r'na(\d+)_nb(\d+)_nf(\d+)', df_name)
    if match:
        na_val, nb_val, nf_val = match.groups()
        comment_value = f'na={na_val}, nb={nb_val}, nd=-{nf_val}'
    else:
        comment_value = df_name

    # Assign color
    if comment_value not in color_map:
        color_map[comment_value] = px.colors.qualitative.Plotly[len(color_map) % len(px.colors.qualitative.Plotly)]

    # Identify correct column
    if 'test case' in df.columns:
        test_case_col = 'test case'
    elif 'test_case' in df.columns:
        test_case_col = 'test_case'
    else:
        raise ValueError(f"No valid 'test_case' column found in {df_name}")

    # Map and sort
    df[test_case_col] = df[test_case_col].map(test_case_pretty_map).astype('category')
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=pretty_custom_order, ordered=True)
    df = df.sort_values(test_case_col)

    # Conditional dash style for specific comment_values
    if comment_value in ['na=2, nb=3, nd=-6', 'na=2, nb=3, nd=-7']:
        line_style = dict(color=color_map[comment_value], dash='6,10')
    else:
        line_style = dict(color=color_map[comment_value])

    # Plot R² line
    fig.add_trace(
        go.Scatter(
            x=df[test_case_col],
            y=df['r2_test_heave'],
            mode='lines+markers',
            name=comment_value,
            legendgroup=comment_value,
            line=line_style
        ),
        row=1, col=1
    )

    # Summary plot
    summary_row = df_summary_heave[
        (df_summary_heave['na'] == int(na_val)) & 
        (df_summary_heave['nb'] == int(nb_val)) & 
        (df_summary_heave['nd'] == -int(nf_val))
    ]

    if not summary_row.empty:
        r2_train = summary_row['r2_train'].values[0]
        r2_val = summary_row['r2_val'].values[0]

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_train],
                mode='markers',
                marker=dict(symbol='x', size=8, color=color_map[comment_value]),
                name=f'{comment_value} Train',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_val],
                mode='markers',
                marker=dict(symbol='square', size=8, color=color_map[comment_value]),
                name=f'{comment_value} Val',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

# Add legend markers
fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='x', size=7, color='grey'),
        name='Train R²',
        showlegend=True
    ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='square', size=7, color='grey'),
        name='Validation R²',
        showlegend=True
    ),
    row=1, col=2
)

# Layout
fig.update_layout(
    title="Test R² Scores for Heave Prediction Across Sea Conditions and Lags Combinations - ARX Heave Model",
    xaxis_title="Test Case",
    xaxis2_title="Model Combination",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    width=1200,
    legend_title="na + nb + nd combination",
    xaxis=dict(
        categoryorder='array',
        categoryarray=pretty_custom_order
    ),
    yaxis2=dict(
        title="Train/Validation R² Score",
        range=[0, 1],
        showgrid=True
    )
)

fig.show()


In [225]:
# Define the data manually as a list of dictionaries
data_pitch = [
    {'nd': -2, 'nb': 2, 'na': 2, 'r2_train': 0.9569, 'r2_val': 0.9351},
    {'nd': -5, 'nb': 2, 'na': 2, 'r2_train': 0.9730, 'r2_val': 0.9424},
    {'nd': -6, 'nb': 2, 'na': 2, 'r2_train': 0.9773, 'r2_val': 0.9450},
    {'nd': -7, 'nb': 2, 'na': 2, 'r2_train': 0.9908, 'r2_val': 0.9574},
    {'nd': -8, 'nb': 2, 'na': 2, 'r2_train': 0.9908, 'r2_val': 0.9572},
    
    {'nd': -2, 'nb': 3, 'na': 2, 'r2_train': 0.9590, 'r2_val': 0.9279},
    {'nd': -5, 'nb': 3, 'na': 2, 'r2_train': 0.9730, 'r2_val': 0.9424},
    {'nd': -6, 'nb': 3, 'na': 2, 'r2_train': 0.9776, 'r2_val': 0.9450}]

# Create a DataFrame
df_summary_pitch = pd.DataFrame(data_pitch)

# Display it
print(df_summary_pitch)



   nd  nb  na  r2_train  r2_val
0  -2   2   2    0.9569  0.9351
1  -5   2   2    0.9730  0.9424
2  -6   2   2    0.9773  0.9450
3  -7   2   2    0.9908  0.9574
4  -8   2   2    0.9908  0.9572
5  -2   3   2    0.9590  0.9279
6  -5   3   2    0.9730  0.9424
7  -6   3   2    0.9776  0.9450


In [226]:
# Group the pitch dataframes correctly
pitch_dfs = {k: v for k, v in dfs.items() if k.startswith('metrics_df_test_pitch')}

# Define your custom x-axis order
custom_order = [
    'T4p5s_Hs1m', 
    'T4p5s_Hs2m', 
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',
    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]
pretty_custom_order = [test_case_pretty_map.get(tc, tc) for tc in custom_order]

# Create color map to maintain consistent color for each `comment_value`
color_map = {}

# Create two subplots with independent y-axes
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.75, 0.25],
    subplot_titles=("Test Cases R²", "Train/Validation R² Scores"),
    shared_yaxes=False  # Disable shared y-axes
)

for df_name, original_df in pitch_dfs.items():
    df = original_df.copy()
    # Extract na, nb, nf from df_name
    match = re.search(r'na(\d+)_nb(\d+)_nf(\d+)', df_name)
    if match:
        na_val, nb_val, nf_val = match.groups()
        comment_value = f'na={na_val}, nb={nb_val}, nd=-{nf_val}'
    else:
        comment_value = df_name

    # Assign color to each unique comment_value if not already assigned
    if comment_value not in color_map:
        color_map[comment_value] = px.colors.qualitative.Plotly[len(color_map) % len(px.colors.qualitative.Plotly)]

    # Find the correct test case column
    if 'test case' in df.columns:
        test_case_col = 'test case'
    elif 'test_case' in df.columns:
        test_case_col = 'test_case'
    else:
        raise ValueError(f"No valid 'test_case' column found in {df_name}")
    
    # Sort based on custom order
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=custom_order, ordered=True)
    df[test_case_col] = df[test_case_col].map(test_case_pretty_map).astype('category')
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=pretty_custom_order, ordered=True)
    df = df.sort_values(test_case_col)

        # Plot the main line (pitch)
    if comment_value == 'na=2, nb=3, nd=-5':
        line_style = dict(color=color_map[comment_value], dash='6,10')  # Custom dashed line
    else:
        line_style = dict(color=color_map[comment_value])

    fig.add_trace(
        go.Scatter(
            x=df[test_case_col],
            y=df['r2_test_pitch'],
            mode='lines+markers',
            name=comment_value,
            legendgroup=comment_value,
            line=line_style
        ),
        row=1, col=1
    )

    # Now plot the train/val points from df_summary_pitch
    summary_row = df_summary_pitch[(df_summary_pitch['na'] == int(na_val)) & 
                                      (df_summary_pitch['nb'] == int(nb_val)) &
                                      (df_summary_pitch['nd'] == -int(nf_val))]

    if not summary_row.empty:
        r2_train = summary_row['r2_train'].values[0]
        r2_val = summary_row['r2_val'].values[0]

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_train],
                mode='markers',
                marker=dict(symbol='x', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Train',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_val],
                mode='markers',
                marker=dict(symbol='square', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Val',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )
# Add legend-only markers to clarify symbol meaning
fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='x', size=7, color='grey'),
        name=' Train R²',
        showlegend=True
    ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='square', size=7, color='grey'),
        name='Validation R²',
        showlegend=True
    ),
    row=1, col=2
)

# Final layout adjustments
fig.update_layout(
    title="Test R² Scores for Pitch Prediction Across Sea Conditions and Lags Combinations - ARX Pitch Model",
    xaxis_title="Test Case",
    xaxis2_title="Model Combination",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    width=1200,
    legend_title="na + nb + nd combination",
    xaxis=dict(
        categoryorder='array',
        categoryarray=pretty_custom_order
    ),
    # Adjust the zoom to fit the second plot
    yaxis2=dict(
        title="Train/Validation R² Score",
        range=[0, 1],  # You can adjust the range based on the data
        showgrid=True
    )
)

fig.show()


In [229]:
# Define the data manually as a list of dictionaries
data_pendulum = [
    {'nd': -2, 'nb': 2, 'na': 2, 'r2_train': 0.9064, 'r2_val': 0.8908},
    {'nd': -2, 'nb': 3, 'na': 2, 'r2_train': 0.9231, 'r2_val': 0.8907},
    {'nd': -2, 'nb': 4, 'na': 2, 'r2_train': 0.9435, 'r2_val': 0.8893},
    
    {'nd': -5, 'nb': 2, 'na': 2, 'r2_train': 0.9185, 'r2_val': 0.8800},
    {'nd': -5, 'nb': 3, 'na': 2, 'r2_train': 0.9185, 'r2_val': 0.8800},
    
    {'nd': -7, 'nb': 2, 'na': 2, 'r2_train': 0.9689, 'r2_val': 0.9435},
    {'nd': -7, 'nb': 3, 'na': 2, 'r2_train': 0.9689, 'r2_val': 0.9435}
 
]

# Create a DataFrame
df_summary_pendulum = pd.DataFrame(data_pendulum)

# Display it
print(df_summary_pendulum)



   nd  nb  na  r2_train  r2_val
0  -2   2   2    0.9064  0.8908
1  -2   3   2    0.9231  0.8907
2  -2   4   2    0.9435  0.8893
3  -5   2   2    0.9185  0.8800
4  -5   3   2    0.9185  0.8800
5  -7   2   2    0.9689  0.9435
6  -7   3   2    0.9689  0.9435


In [231]:
# Group the pitch dataframes correctly
pendulum_dfs = {k: v for k, v in dfs.items() if k.startswith('metrics_df_test_pendulum')}

# Define your custom x-axis order
custom_order = [
    'T4p5s_Hs1m', 
    'T4p5s_Hs2m', 
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',
    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]
pretty_custom_order = [test_case_pretty_map.get(tc, tc) for tc in custom_order]

# Create color map to maintain consistent color for each `comment_value`
color_map = {}

# Create two subplots with independent y-axes
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.75, 0.25],
    subplot_titles=("Test Cases R²", "Train/Validation R² Scores"),
    shared_yaxes=False  # Disable shared y-axes
)

for df_name, original_df in pendulum_dfs.items():
    df = original_df.copy()
    # Extract na, nb, nf from df_name
    match = re.search(r'na(\d+)_nb(\d+)_nf(\d+)', df_name)
    if match:
        na_val, nb_val, nf_val = match.groups()
        comment_value = f'na={na_val}, nb={nb_val}, nd=-{nf_val}'
    else:
        comment_value = df_name

    # Assign color to each unique comment_value if not already assigned
    if comment_value not in color_map:
        color_map[comment_value] = px.colors.qualitative.Plotly[len(color_map) % len(px.colors.qualitative.Plotly)]

    # Find the correct test case column
    if 'test case' in df.columns:
        test_case_col = 'test case'
    elif 'test_case' in df.columns:
        test_case_col = 'test_case'
    else:
        raise ValueError(f"No valid 'test_case' column found in {df_name}")
    
    # Sort based on custom order
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=custom_order, ordered=True)
    df[test_case_col] = df[test_case_col].map(test_case_pretty_map).astype('category')
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=pretty_custom_order, ordered=True)
    df = df.sort_values(test_case_col)

       # Plot the main line (pitch)
    if comment_value == 'na=2, nb=3, nd=-7':
        line_style = dict(color=color_map[comment_value], dash='6,10')  # Custom dashed line
    else:
        line_style = dict(color=color_map[comment_value])

    fig.add_trace(
        go.Scatter(
            x=df[test_case_col],
            y=df['r2_test_pendulum'],
            mode='lines+markers',
            name=comment_value,
            legendgroup=comment_value,
            line=line_style
        ),
        row=1, col=1
    )

    # Now plot the train/val points from df_summary_pitch
    summary_row = df_summary_pendulum[(df_summary_pendulum['na'] == int(na_val)) & 
                                      (df_summary_pendulum['nb'] == int(nb_val)) &
                                      (df_summary_pendulum['nd'] == -int(nf_val))]

    if not summary_row.empty:
        r2_train = summary_row['r2_train'].values[0]
        r2_val = summary_row['r2_val'].values[0]

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_train],
                mode='markers',
                marker=dict(symbol='x', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Train',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_val],
                mode='markers',
                marker=dict(symbol='square', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Val',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

# Add legend-only markers to clarify symbol meaning
fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='x', size=7, color='grey'),
        name=' Train R²',
        showlegend=True
    ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='square', size=7, color='grey'),
        name='Validation R²',
        showlegend=True
    ),
    row=1, col=2
)

# Final layout adjustments
fig.update_layout(
    title="Test R² Scores for Pendulum Prediction Across Sea Conditions and Lags Combinations - ARX Pendulum Model",
    xaxis_title="Test Case",
    xaxis2_title="Model Combination",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    width=1200,
    legend_title="na + nb + nd combination",
    xaxis=dict(
        categoryorder='array',
        categoryarray=pretty_custom_order
    ),
    # Adjust the zoom to fit the second plot
    yaxis2=dict(
        title="Train/Validation R² Score",
        
        showgrid=True,
   
       
    )
    ,
      yaxis1=dict(
      
        
        showgrid=True,
   
        range=[0, 1]  # You can adjust the range based on the data
      
    )
)

fig.show()


# XGBoost-NARX - 1 Model for all Dofs

In [92]:
dfs_xgb_3dof = {}  # dictionary to store DataFrames

In [None]:

# load  data
folder = "Xgboost/results/3dof_model/test_best_models/metrics/*.csv"
# Get all CSV files
csv_files = sorted(glob.glob(folder))  # sort for consistent numbering

# Load each CSV into a dynamically named variable

for file_path in csv_files:
    file_name = os.path.basename(file_path)  # Get filename (e.g., "heave_case1.csv")
    file_base = os.path.splitext(file_name)[0]  # Remove '.csv', so just "heave_case1"
    
    df_name = file_base  # Use file_base as the dataframe name
    dfs_xgb_3dof[df_name] = pd.read_csv(file_path)


In [94]:
# Loop over all dfs to extract diffrent dfs
for name, df in dfs_xgb_3dof.items():
    print(f"{name}: {df.shape}")

Xgboost_3dof_ver2_extra_na2_nb0_nf10.joblib_metrics: (10, 12)
Xgboost_3dof_ver2_extra_na2_nb1_nf9.joblib_metrics: (10, 12)
Xgboost_3dof_ver2_extra_na2_nb2_nf8.joblib_metrics: (10, 12)
Xgboost_3dof_ver2_na2_nb3_nf7.joblib_metrics: (10, 12)
Xgboost_3dof_ver2_na2_nb4_nf6.joblib_metrics: (10, 12)


In [95]:
# Define the data manually as a list of dictionaries
data_heave_xgb_3dof = [
    {'nd': -10, 'nb': 0, 'na': 2, 'r2_train': 0.959, 'r2_val': 0.965},
    
    {'nd': -9, 'nb': 1, 'na': 2, 'r2_train': 0.956, 'r2_val': 0.962},
    
    {'nd': -8, 'nb': 2, 'na': 2, 'r2_train': 0.954, 'r2_val': 0.956},
    
    {'nd': -7, 'nb': 3, 'na': 2, 'r2_train': 0.943, 'r2_val': 0.952},
    
    {'nd': -6, 'nb': 4, 'na': 2, 'r2_train': 0.940, 'r2_val': 0.945}
    
    
    
    
]
# Create a DataFrame
df_summary_heave_xgb_3dof = pd.DataFrame(data_heave_xgb_3dof)

# Display it
print(df_summary_heave_xgb_3dof)



   nd  nb  na  r2_train  r2_val
0 -10   0   2     0.959   0.965
1  -9   1   2     0.956   0.962
2  -8   2   2     0.954   0.956
3  -7   3   2     0.943   0.952
4  -6   4   2     0.940   0.945


In [96]:
# Define the data manually as a list of dictionaries
data_pitch_xgb_3dof = [
    {'nd': -10, 'nb': 0, 'na': 2, 'r2_train': 0.892, 'r2_val': 0.893},
    
    {'nd': -9, 'nb': 1, 'na': 2, 'r2_train': 0.897, 'r2_val': 0.897},
    
    {'nd': -8, 'nb': 2, 'na': 2, 'r2_train': 0.900, 'r2_val': 0.886},
    
    {'nd': -7, 'nb': 3, 'na': 2, 'r2_train': 0.885, 'r2_val': 0.881},
    
    {'nd': -6, 'nb': 4, 'na': 2, 'r2_train': 0.886, 'r2_val': 0.874}
    
    
    
    
]
# Create a DataFrame
df_summary_pitch_xgb_3dof = pd.DataFrame(data_pitch_xgb_3dof)

# Display it
print(df_summary_pitch_xgb_3dof)

   nd  nb  na  r2_train  r2_val
0 -10   0   2     0.892   0.893
1  -9   1   2     0.897   0.897
2  -8   2   2     0.900   0.886
3  -7   3   2     0.885   0.881
4  -6   4   2     0.886   0.874


In [97]:
# Define the data manually as a list of dictionaries
data_pendulum_xgb_3dof = [
    {'nd': -10, 'nb': 0, 'na': 2, 'r2_train': 0.840, 'r2_val': 0.841},
    
    {'nd': -9, 'nb': 1, 'na': 2, 'r2_train': 0.853, 'r2_val': 0.846},
    
    {'nd': -8, 'nb': 2, 'na': 2, 'r2_train': 0.855, 'r2_val': 0.841},
    
    {'nd': -7, 'nb': 3, 'na': 2, 'r2_train': 0.852, 'r2_val': 0.812},
    
    {'nd': -6, 'nb': 4, 'na': 2, 'r2_train': 0.855, 'r2_val': 0.839}
    
    
]
# Create a DataFrame
df_summary_pendulum_xgb_3dof = pd.DataFrame(data_pendulum_xgb_3dof)

# Display it
print(df_summary_pendulum_xgb_3dof)

   nd  nb  na  r2_train  r2_val
0 -10   0   2     0.840   0.841
1  -9   1   2     0.853   0.846
2  -8   2   2     0.855   0.841
3  -7   3   2     0.852   0.812
4  -6   4   2     0.855   0.839


In [233]:
# Define your custom x-axis order
custom_order = [
    'T4p5s_Hs1m', 
    'T4p5s_Hs2m', 
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',
    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]
pretty_custom_order = [test_case_pretty_map.get(tc, tc) for tc in custom_order]

# Create color map to maintain consistent color for each `comment_value`
color_map = {}

# Create two subplots with independent y-axes
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.75, 0.25],
    subplot_titles=("Test Cases R²", "Train/Validation R² Scores"),
    shared_yaxes=False  # Disable shared y-axes
)

for df_name, original_df  in dfs_xgb_3dof.items():
    df = original_df.copy()
    # Extract na, nb, nf from df_name
    match = re.search(r'na(\d+)_nb(\d+)_nf(\d+)', df_name)
    if match:
        na_val, nb_val, nf_val = match.groups()
        comment_value = f'na={na_val}, nb={nb_val}, nd=-{nf_val}'
    else:
        comment_value = df_name

    # Assign color to each unique comment_value if not already assigned
    if comment_value not in color_map:
        color_map[comment_value] = px.colors.qualitative.Plotly[len(color_map) % len(px.colors.qualitative.Plotly)]

    # Find the correct test case column
    if 'test case' in df.columns:
        test_case_col = 'test case'
    elif 'test_case' in df.columns:
        test_case_col = 'test_case'
    else:
        raise ValueError(f"No valid 'test_case' column found in {df_name}")
    
    # Sort based on custom order
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=custom_order, ordered=True)
    df[test_case_col] = df[test_case_col].map(test_case_pretty_map).astype('category')
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=pretty_custom_order, ordered=True)
    df = df.sort_values(test_case_col)

    # Plot the main line (pendulum)
    fig.add_trace(
        go.Scatter(
            x=df[test_case_col],
            y=df['r2_test_heave'],  # Correct for pendulum
            mode='lines+markers',
            name=comment_value,
            legendgroup=comment_value,
            line=dict(color=color_map[comment_value])  # Use the assigned color
        ),
        row=1, col=1
    )

    # Now plot the train/val points from df_summary_pitch
    summary_row = df_summary_heave_xgb_3dof[(df_summary_heave_xgb_3dof['na'] == int(na_val)) & 
                                      (df_summary_heave_xgb_3dof['nb'] == int(nb_val)) &
                                      (df_summary_heave_xgb_3dof['nd'] == -int(nf_val))]

    if nb_val == 2:
            line_width = 4    # Thicker line for highlighting
            marker_size = 12  # Larger markers for highlighting
            marker_symbol = 'star'
    else:
            line_width = 2
            marker_size = 8
            marker_symbol = 'circle'
            
    if not summary_row.empty:
        r2_train = summary_row['r2_train'].values[0]
        r2_val = summary_row['r2_val'].values[0]

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_train],
                mode='markers',
                marker=dict(symbol='x', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Train',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_val],
                mode='markers',
                marker=dict(symbol='square', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Val',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )
# Add legend-only markers to clarify symbol meaning
fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='x', size=7, color='grey'),
        name=' Train R²',
        showlegend=True
    ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='square', size=7, color='grey'),
        name='Validation R²',
        showlegend=True
    ),
    row=1, col=2
)
# Final layout adjustments
fig.update_layout(
    title="Test R² Scores for Heave Prediction Across Sea Conditions and Lags Combinations - Combined XGBoost Model for All DoFs",
    xaxis_title="Test Case",
    xaxis2_title="Model Combination",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    width=1200,
    legend_title="na + nb + nd combination",
    xaxis=dict(
        categoryorder='array',
        categoryarray=pretty_custom_order
    ),
    # Adjust the zoom to fit the second plot
    yaxis2=dict(
        title="Train/Validation R² Score",
        range=[0, 1],  # You can adjust the range based on the data
        showgrid=True
    )
)

fig.show()


In [234]:
# Define your custom x-axis order
custom_order = [
    'T4p5s_Hs1m', 
    'T4p5s_Hs2m', 
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',
    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]
pretty_custom_order = [test_case_pretty_map.get(tc, tc) for tc in custom_order]

# Create color map to maintain consistent color for each `comment_value`
color_map = {}

# Create two subplots with independent y-axes
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.75, 0.25],
    subplot_titles=("Test Cases R²", "Train/Validation R² Scores"),
    shared_yaxes=False  # Disable shared y-axes
)

for df_name, original_df  in dfs_xgb_3dof.items():
    df = original_df.copy()
    # Extract na, nb, nf from df_name
    match = re.search(r'na(\d+)_nb(\d+)_nf(\d+)', df_name)
    if match:
        na_val, nb_val, nf_val = match.groups()
        comment_value = f'na={na_val}, nb={nb_val}, nd=-{nf_val}'
    else:
        comment_value = df_name

    # Assign color to each unique comment_value if not already assigned
    if comment_value not in color_map:
        color_map[comment_value] = px.colors.qualitative.Plotly[len(color_map) % len(px.colors.qualitative.Plotly)]

    # Find the correct test case column
    if 'test case' in df.columns:
        test_case_col = 'test case'
    elif 'test_case' in df.columns:
        test_case_col = 'test_case'
    else:
        raise ValueError(f"No valid 'test_case' column found in {df_name}")
    
    # Sort based on custom order
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=custom_order, ordered=True)
    df[test_case_col] = df[test_case_col].map(test_case_pretty_map).astype('category')
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=pretty_custom_order, ordered=True)
    df = df.sort_values(test_case_col)

    # Plot the main line 
    fig.add_trace(
        go.Scatter(
            x=df[test_case_col],
            y=df['r2_test_pitch'],  # Correct for
            name=comment_value,
            legendgroup=comment_value,
            line=dict(color=color_map[comment_value])  # Use the assigned color
        ),
        row=1, col=1
    )

    # Now plot the train/val points from df_summary_pitch
    summary_row = df_summary_pitch_xgb_3dof[(df_summary_pitch_xgb_3dof['na'] == int(na_val)) & 
                                      (df_summary_pitch_xgb_3dof['nb'] == int(nb_val)) &
                                      (df_summary_pitch_xgb_3dof['nd'] == -int(nf_val))]

                
    if not summary_row.empty:
        r2_train = summary_row['r2_train'].values[0]
        r2_val = summary_row['r2_val'].values[0]

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_train],
                mode='markers',
                marker=dict(symbol='x', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Train',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_val],
                mode='markers',
                marker=dict(symbol='square', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Val',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )
# Add legend-only markers to clarify symbol meaning
fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='x', size=7, color='grey'),
        name=' Train R²',
        showlegend=True
    ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='square', size=7, color='grey'),
        name='Validation R²',
        showlegend=True
    ),
    row=1, col=2
)
# Final layout adjustments
fig.update_layout(
    title="Test R² Scores for Pitch Prediction Across Sea Conditions and Lags Combinations - Combined XGBoost Model for All DoFs",
    xaxis_title="Test Case",
    xaxis2_title="Model Combination",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    width=1200,
    legend_title="na + nb + nd combination",
    xaxis=dict(
        categoryorder='array',
        categoryarray=pretty_custom_order
    ),
    # Adjust the zoom to fit the second plot
    yaxis2=dict(
        title="Train/Validation R² Score",
        range=[0, 1],  # You can adjust the range based on the data
        showgrid=True
    )
)

fig.show()


In [237]:
# Define your custom x-axis order
custom_order = [
    'T4p5s_Hs1m', 
    'T4p5s_Hs2m', 
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',
    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]
pretty_custom_order = [test_case_pretty_map.get(tc, tc) for tc in custom_order]

# Create color map to maintain consistent color for each `comment_value`
color_map = {}

# Create two subplots with independent y-axes
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.75, 0.25],
    subplot_titles=("Test Cases R²", "Train/Validation R² Scores"),
    shared_yaxes=False  # Disable shared y-axes
)

for df_name, original_df  in dfs_xgb_3dof.items():
    df = original_df.copy()

    # Extract na, nb, nf from df_name
    match = re.search(r'na(\d+)_nb(\d+)_nf(\d+)', df_name)
    if match:
        na_val, nb_val, nf_val = match.groups()
        comment_value = f'na={na_val}, nb={nb_val}, nd=-{nf_val}'
    else:
        comment_value = df_name

    # Assign color to each unique comment_value if not already assigned
    if comment_value not in color_map:
        color_map[comment_value] = px.colors.qualitative.Plotly[len(color_map) % len(px.colors.qualitative.Plotly)]

    # Find the correct test case column
    if 'test case' in df.columns:
        test_case_col = 'test case'
    elif 'test_case' in df.columns:
        test_case_col = 'test_case'
    else:
        raise ValueError(f"No valid 'test_case' column found in {df_name}")
    
    # Sort based on custom order
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=custom_order, ordered=True)
    df[test_case_col] = df[test_case_col].map(test_case_pretty_map).astype('category')
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=pretty_custom_order, ordered=True)
    df = df.sort_values(test_case_col)

    # Plot the main line 
    fig.add_trace(
        go.Scatter(
            x=df[test_case_col],
            y=df['r2_test_pendulum'],  # Correct for
            name=comment_value,
            legendgroup=comment_value,
            line=dict(color=color_map[comment_value])  # Use the assigned color
        ),
        row=1, col=1
    )

    # Now plot the train/val points from df_summary_pitch
    summary_row = df_summary_pendulum_xgb_3dof[(df_summary_pendulum_xgb_3dof['na'] == int(na_val)) & 
                                      (df_summary_pendulum_xgb_3dof['nb'] == int(nb_val)) &
                                      (df_summary_pendulum_xgb_3dof['nd'] == -int(nf_val))]

                
    if not summary_row.empty:
        r2_train = summary_row['r2_train'].values[0]
        r2_val = summary_row['r2_val'].values[0]

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_train],
                mode='markers',
                marker=dict(symbol='x', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Train',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_val],
                mode='markers',
                marker=dict(symbol='square', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Val',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )
# Add legend-only markers to clarify symbol meaning
fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='x', size=7, color='grey'),
        name=' Train R²',
        showlegend=True
    ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='square', size=7, color='grey'),
        name='Validation R²',
        showlegend=True
    ),
    row=1, col=2
)
# Final layout adjustments
fig.update_layout(
    title="Test R² Scores for Pendulum Prediction Across Sea Conditions and Lags Combinations - Combined XGBoost Model for All DoFs",
    xaxis_title="Test Case",
    xaxis2_title="Model Combination",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    width=1200,
    legend_title="na + nb + nd combination",
    xaxis=dict(
        categoryorder='array',
        categoryarray=pretty_custom_order
    ),
    # Adjust the zoom to fit the second plot
    yaxis1=dict(
        title="Train/Validation R² Score",
        range=[0, 1],  # You can adjust the range based on the data
        showgrid=True
    )
)

fig.show()


# XGBoost-NARX 1 Model for each DoF


In [240]:
dfs_xgb_1dof = {}  # dictionary to store DataFrames

In [241]:
# load heave data
folder = "Xgboost/results/1dof_model/heave/test_best_models/metrics/*.csv"
# Get all CSV files
csv_files = sorted(glob.glob(folder))  # sort for consistent numbering

# Load each CSV into a dynamically named variable

for file_path in csv_files:
    file_name = os.path.basename(file_path)  # Get filename (e.g., "heave_case1.csv")
    file_base = os.path.splitext(file_name)[0]  # Remove '.csv', so just "heave_case1"
    
    df_name = file_base  # Use file_base as the dataframe name
    dfs_xgb_1dof[df_name] = pd.read_csv(file_path)


In [242]:
# load pitch data
folder = "Xgboost/results/1dof_model/pitch/test_best_models/metrics/*.csv"
# Get all CSV files
csv_files = sorted(glob.glob(folder))  # sort for consistent numbering

# Load each CSV into a dynamically named variable

for file_path in csv_files:
    file_name = os.path.basename(file_path)  # Get filename (e.g., "heave_case1.csv")
    file_base = os.path.splitext(file_name)[0]  # Remove '.csv', so just "heave_case1"
    
    df_name = file_base  # Use file_base as the dataframe name
    dfs_xgb_1dof[df_name] = pd.read_csv(file_path)


In [243]:
# load pendulum data
folder = "Xgboost/results/1dof_model/pendulum/test_best_models/metrics/*.csv"
# Get all CSV files
csv_files = sorted(glob.glob(folder))  # sort for consistent numbering

# Load each CSV into a dynamically named variable

for file_path in csv_files:
    file_name = os.path.basename(file_path)  # Get filename (e.g., "heave_case1.csv")
    file_base = os.path.splitext(file_name)[0]  # Remove '.csv', so just "heave_case1"
    
    df_name = file_base  # Use file_base as the dataframe name
    dfs_xgb_1dof[df_name] = pd.read_csv(file_path)


In [244]:
# Loop over all dfs to extract diffrent dfs
for name, df in dfs_xgb_1dof.items():
    print(f"{name}: {df.shape}")

metrics_Xgboost_heave_eta_only_na2_nb0_nf10.joblib: (10, 8)
metrics_Xgboost_heave_eta_only_na2_nb0_nf11.joblib: (10, 8)
metrics_Xgboost_heave_eta_only_na2_nb0_nf12.joblib: (10, 8)
metrics_Xgboost_heave_eta_only_na2_nb0_nf13.joblib: (10, 8)
metrics_Xgboost_heave_eta_only_na2_nb0_nf8.joblib: (10, 8)
metrics_Xgboost_heave_eta_only_na2_nb0_nf9.joblib: (10, 8)
metrics_pitch_Xgboost_pitch_ver2__na2_nb0_nf8.joblib: (10, 8)
metrics_pitch_Xgboost_pitch_ver2__na2_nb4_nf4.joblib: (10, 8)
metrics_pitch_Xgboost_pitch_ver2__na2_nb5_nf3.joblib: (10, 8)
metrics_pitch_Xgboost_pitch_ver2__na2_nb6_nf2.joblib: (10, 8)
metrics_pitch_Xgboost_pitch_ver2__na2_nb7_nf1.joblib: (10, 8)
metrics_pendulum_Xgboost_pendulum_ver1__na2_nb9_nf1.joblib: (10, 8)
metrics_pendulum_Xgboost_pendulum_ver2__na2_nb10_nf0.joblib: (10, 8)
metrics_pendulum_Xgboost_pendulum_ver2__na2_nb8_nf2.joblib: (10, 8)


In [245]:
# Define the data manually as a list of dictionaries
data_heave_xgb_1dof = [
    {'nd': -8, 'nb': 0, 'na': 2, 'r2_train': 0.922, 'r2_val': 0.928},
    
    {'nd': -9, 'nb': 0, 'na': 2, 'r2_train': 0.925, 'r2_val': 0.930},
    
    {'nd': -10, 'nb': 0, 'na': 2, 'r2_train': 0.929, 'r2_val': 0.935},
    
    {'nd': -11, 'nb': 0, 'na': 2, 'r2_train': 0.926, 'r2_val': 0.934},
    
    {'nd': -12, 'nb': 0, 'na': 2, 'r2_train': 0.931, 'r2_val': 0.940},

    {'nd': -13, 'nb': 0, 'na': 2, 'r2_train': 0.929, 'r2_val': 0.938}

    
    
    
]
# Create a DataFrame
df_summary_heave_xgb_1dof = pd.DataFrame(data_heave_xgb_1dof)

# Display it
print(df_summary_heave_xgb_1dof)



   nd  nb  na  r2_train  r2_val
0  -8   0   2     0.922   0.928
1  -9   0   2     0.925   0.930
2 -10   0   2     0.929   0.935
3 -11   0   2     0.926   0.934
4 -12   0   2     0.931   0.940
5 -13   0   2     0.929   0.938


In [246]:
# Define the data manually as a list of dictionaries
data_pitch_xgb_1dof = [
    {'nd': 0, 'nb': 8, 'na': 2, 'r2_train': 0.781, 'r2_val': 0.793},
    
    {'nd': -1, 'nb': 7, 'na': 2, 'r2_train': 0.803, 'r2_val': 0.785},
    
    {'nd': -2, 'nb': 6, 'na': 2, 'r2_train': 0.801, 'r2_val': 0.790},
    
    {'nd': -3, 'nb': 5, 'na': 2, 'r2_train': 0.802, 'r2_val': 0.788},
    
    {'nd': -4, 'nb': 4, 'na': 2, 'r2_train': 0.794, 'r2_val': 0.772}
    
    
    
    
]
# Create a DataFrame
df_summary_pitch_xgb_1dof = pd.DataFrame(data_pitch_xgb_1dof)

# Display it
print(df_summary_pitch_xgb_1dof)

   nd  nb  na  r2_train  r2_val
0   0   8   2     0.781   0.793
1  -1   7   2     0.803   0.785
2  -2   6   2     0.801   0.790
3  -3   5   2     0.802   0.788
4  -4   4   2     0.794   0.772


In [247]:
# Define the data manually as a list of dictionaries
data_pendulum_xgb_1dof = [
    {'nd': -2, 'nb': 8, 'na': 2, 'r2_train': 0.632, 'r2_val': 0.631},
    
    {'nd': -1, 'nb': 9, 'na': 2, 'r2_train': 0.650, 'r2_val': 0.632},
    
    {'nd': 0, 'nb': 10, 'na': 2, 'r2_train': 0.643, 'r2_val': 0.619}
    
    
]
# Create a DataFrame
df_summary_pendulum_xgb_1dof = pd.DataFrame(data_pendulum_xgb_1dof)

# Display it
print(df_summary_pendulum_xgb_1dof)

   nd  nb  na  r2_train  r2_val
0  -2   8   2     0.632   0.631
1  -1   9   2     0.650   0.632
2   0  10   2     0.643   0.619


In [248]:
heave_dfs_xgb = {k: v for k, v in dfs_xgb_1dof.items() if k.startswith('metrics_Xgboost_heave')}

# Define your custom x-axis order
custom_order = [
    'T4p5s_Hs1m', 
    'T4p5s_Hs2m', 
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',
    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]
pretty_custom_order = [test_case_pretty_map.get(tc, tc) for tc in custom_order]

# Create color map to maintain consistent color for each `comment_value`
color_map = {}

# Create two subplots with independent y-axes
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.75, 0.25],
    subplot_titles=("Test Cases R²", "Train/Validation R² Scores"),
    shared_yaxes=False  # Disable shared y-axes
)

for df_name, original_df  in heave_dfs_xgb.items():
    df = original_df.copy()
    # Extract na, nb, nf from df_name
    match = re.search(r'na(\d+)_nb(\d+)_nf(\d+)', df_name)
    if match:
        na_val, nb_val, nf_val = match.groups()
        comment_value = f'na={na_val}, nb={nb_val}, nd=-{nf_val}'
    else:
        comment_value = df_name

    # Assign color to each unique comment_value if not already assigned
    if comment_value not in color_map:
        color_map[comment_value] = px.colors.qualitative.Plotly[len(color_map) % len(px.colors.qualitative.Plotly)]

    # Find the correct test case column
    if 'test case' in df.columns:
        test_case_col = 'test case'
    elif 'test_case' in df.columns:
        test_case_col = 'test_case'
    else:
        raise ValueError(f"No valid 'test_case' column found in {df_name}")
    
    # Sort based on custom order
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=custom_order, ordered=True)
    df[test_case_col] = df[test_case_col].map(test_case_pretty_map).astype('category')
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=pretty_custom_order, ordered=True)
    df = df.sort_values(test_case_col)

    # Plot the main line (pendulum)
    fig.add_trace(
        go.Scatter(
            x=df[test_case_col],
            y=df['r2_test_heave'],  # Correct for pendulum
            mode='lines+markers',
            name=comment_value,
            legendgroup=comment_value,
            line=dict(color=color_map[comment_value])  # Use the assigned color
        ),
        row=1, col=1
    )

    # Now plot the train/val points from df_summary_pitch
    summary_row = df_summary_heave_xgb_1dof[(df_summary_heave_xgb_1dof['na'] == int(na_val)) & 
                                      (df_summary_heave_xgb_1dof['nb'] == int(nb_val)) &
                                      (df_summary_heave_xgb_1dof['nd'] == -int(nf_val))]

    
    if not summary_row.empty:
        r2_train = summary_row['r2_train'].values[0]
        r2_val = summary_row['r2_val'].values[0]

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_train],
                mode='markers',
                marker=dict(symbol='x', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Train',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_val],
                mode='markers',
                marker=dict(symbol='square', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Val',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )
# Add legend-only markers to clarify symbol meaning
fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='x', size=7, color='grey'),
        name=' Train R²',
        showlegend=True
    ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='square', size=7, color='grey'),
        name='Validation R²',
        showlegend=True
    ),
    row=1, col=2
)
# Final layout adjustments
fig.update_layout(
    title="Test R² Scores for Heave Prediction Across Sea Conditions and Lags Combinations - XGBoost Heave Only Model",
    xaxis_title="Test Case",
    xaxis2_title="Model Combination",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    width=1200,
    legend_title="na + nb + nd combination",
    xaxis=dict(
        categoryorder='array',
        categoryarray=pretty_custom_order
    ),
    # Adjust the zoom to fit the second plot
    yaxis1=dict(
        title="Train/Validation R² Score",
        range=[0, 1],  # You can adjust the range based on the data
        showgrid=True
    )
)

fig.show()


In [249]:
pitch_dfs_xgb = {k: v for k, v in dfs_xgb_1dof.items() if k.startswith('metrics_pitch')}

# Define your custom x-axis order
custom_order = [
    'T4p5s_Hs1m', 
    'T4p5s_Hs2m', 
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',
    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]
pretty_custom_order = [test_case_pretty_map.get(tc, tc) for tc in custom_order]

# Create color map to maintain consistent color for each `comment_value`
color_map = {}

# Create two subplots with independent y-axes
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.75, 0.25],
    subplot_titles=("Test Cases R²", "Train/Validation R² Scores"),
    shared_yaxes=False  # Disable shared y-axes
)

for df_name, original_df  in pitch_dfs_xgb.items():
    df = original_df.copy()
    # Extract na, nb, nf from df_name
    match = re.search(r'na(\d+)_nb(\d+)_nf(\d+)', df_name)
    if match:
        na_val, nb_val, nf_val = match.groups()
        comment_value = f'na={na_val}, nb={nb_val}, nd=-{nf_val}'
    else:
        comment_value = df_name

    # Assign color to each unique comment_value if not already assigned
    if comment_value not in color_map:
        color_map[comment_value] = px.colors.qualitative.Plotly[len(color_map) % len(px.colors.qualitative.Plotly)]

    # Find the correct test case column
    if 'test case' in df.columns:
        test_case_col = 'test case'
    elif 'test_case' in df.columns:
        test_case_col = 'test_case'
    else:
        raise ValueError(f"No valid 'test_case' column found in {df_name}")
    
    # Sort based on custom order
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=custom_order, ordered=True)
    df[test_case_col] = df[test_case_col].map(test_case_pretty_map).astype('category')
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=pretty_custom_order, ordered=True)
    df = df.sort_values(test_case_col)

    # Plot the main line (pendulum)
    fig.add_trace(
        go.Scatter(
            x=df[test_case_col],
            y=df['r2_test_pitch'],  # Correct for pendulum
            mode='lines+markers',
            name=comment_value,
            legendgroup=comment_value,
            line=dict(color=color_map[comment_value])  # Use the assigned color
        ),
        row=1, col=1
    )

    # Now plot the train/val points from df_summary_pitch
    summary_row = df_summary_pitch_xgb_1dof[(df_summary_pitch_xgb_1dof['na'] == int(na_val)) & 
                                      (df_summary_pitch_xgb_1dof['nb'] == int(nb_val)) &
                                      (df_summary_pitch_xgb_1dof['nd'] == -int(nf_val))]

    
    if not summary_row.empty:
        r2_train = summary_row['r2_train'].values[0]
        r2_val = summary_row['r2_val'].values[0]

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_train],
                mode='markers',
                marker=dict(symbol='x', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Train',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_val],
                mode='markers',
                marker=dict(symbol='square', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Val',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )
# Add legend-only markers to clarify symbol meaning
fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='x', size=7, color='grey'),
        name=' Train R²',
        showlegend=True
    ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='square', size=7, color='grey'),
        name='Validation R²',
        showlegend=True
    ),
    row=1, col=2
)
# Final layout adjustments
fig.update_layout(
    title="Test R² Scores for Heave Prediction Across Sea Conditions and Lags Combinations - XGBoost Pitch Only Model",
    xaxis_title="Test Case",
    xaxis2_title="Model Combination",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    width=1200,
    legend_title="na + nb + nd combination",
    xaxis=dict(
        categoryorder='array',
        categoryarray=pretty_custom_order
    ),
    # Adjust the zoom to fit the second plot
    yaxis2=dict(
        title="Train/Validation R² Score",
        range=[0, 1],  # You can adjust the range based on the data
        showgrid=True
    )
)

fig.show()


In [None]:
pendulum_dfs_xgb = {k: v for k, v in dfs_xgb_1dof.items() if k.startswith('metrics_pendulum')}

# Define your custom x-axis order
custom_order = [
    'T4p5s_Hs1m', 
    'T4p5s_Hs2m', 
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',
    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]
pretty_custom_order = [test_case_pretty_map.get(tc, tc) for tc in custom_order]

# Create color map to maintain consistent color for each `comment_value`
color_map = {}

# Create two subplots with independent y-axes
fig = make_subplots(
    rows=1, cols=2,
    column_widths=[0.75, 0.25],
    subplot_titles=("Test Cases R²", "Train/Validation R² Scores"),
    shared_yaxes=False  # Disable shared y-axes
)

for df_name, original_df  in pendulum_dfs_xgb.items():
    df = original_df.copy()
    # Extract na, nb, nf from df_name
    match = re.search(r'na(\d+)_nb(\d+)_nf(\d+)', df_name)
    if match:
        na_val, nb_val, nf_val = match.groups()
        comment_value = f'na={na_val}, nb={nb_val}, nd=-{nf_val}'
    else:
        comment_value = df_name

    # Assign color to each unique comment_value if not already assigned
    if comment_value not in color_map:
        color_map[comment_value] = px.colors.qualitative.Plotly[len(color_map) % len(px.colors.qualitative.Plotly)]

    # Find the correct test case column
    if 'test case' in df.columns:
        test_case_col = 'test case'
    elif 'test_case' in df.columns:
        test_case_col = 'test_case'
    else:
        raise ValueError(f"No valid 'test_case' column found in {df_name}")
    
    # Sort based on custom order
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=custom_order, ordered=True)
    df[test_case_col] = df[test_case_col].map(test_case_pretty_map).astype('category')
    df[test_case_col] = pd.Categorical(df[test_case_col], categories=pretty_custom_order, ordered=True)
    df = df.sort_values(test_case_col)

    # Plot the main line (pendulum)
    fig.add_trace(
        go.Scatter(
            x=df[test_case_col],
            y=df['r2_test_pendulum'],  # Correct for pendulum
            mode='lines+markers',
            name=comment_value,
            legendgroup=comment_value,
            line=dict(color=color_map[comment_value])  # Use the assigned color
        ),
        row=1, col=1
    )

    # Now plot the train/val points from df_summary_pitch
    summary_row = df_summary_pendulum_xgb_1dof[(df_summary_pendulum_xgb_1dof['na'] == int(na_val)) & 
                                      (df_summary_pendulum_xgb_1dof['nb'] == int(nb_val)) &
                                      (df_summary_pendulum_xgb_1dof['nd'] == -int(nf_val))]

    
    if not summary_row.empty:
        r2_train = summary_row['r2_train'].values[0]
        r2_val = summary_row['r2_val'].values[0]

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_train],
                mode='markers',
                marker=dict(symbol='x', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Train',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )

        fig.add_trace(
            go.Scatter(
                x=[comment_value],
                y=[r2_val],
                mode='markers',
                marker=dict(symbol='square', size=8, color=color_map[comment_value]),  # Use the assigned color
                name=f'{comment_value} Val',
                legendgroup=comment_value,
                showlegend=False
            ),
            row=1, col=2
        )
# Add legend-only markers to clarify symbol meaning
fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='x', size=7, color='grey'),
        name=' Train R²',
        showlegend=True
    ),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(symbol='square', size=7, color='grey'),
        name='Validation R²',
        showlegend=True
    ),
    row=1, col=2
)
# Final layout adjustments
fig.update_layout(
    title="Test R² Scores for Heave Prediction Across Sea Conditions and Lags Combinations - XGBoost Pendulum Only Model",
    xaxis_title="Test Case",
    xaxis2_title="Model Combination",
    yaxis_title="R² Score",
    template="plotly_white",
    height=600,
    width=1200,
    legend_title="na + nb + nd combination",
    xaxis=dict(
        categoryorder='array',
        categoryarray=pretty_custom_order
    ),
    # Adjust the zoom to fit the second plot
    yaxis2=dict(
        title="Train/Validation R² Score",
        range=[0, 1],  # You can adjust the range based on the data
        showgrid=True
    )
)

fig.show()
