In [None]:
import numpy as np
import chardet
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt 
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import plotly.colors as pc
%matplotlib inline  
import psutil
from pathlib import Path
from scipy.io import loadmat
import glob
from scipy.signal import savgol_filter
from tqdm import tqdm
import os



### Load data

In [None]:
# Path to the folder containing .mat files
folder_path = "PeWEC_data/Bpto_2e6"

# Get all file names in the folder
file_names = os.listdir(folder_path)

# Filter for .mat files
file_names = [f for f in file_names if f.endswith('.mat')]

print("mat File Names:")
print(file_names)

In [None]:
# Dictionary to store loaded data
loaded_data = {}

# Iterate through each file
for file in file_names:
    # Construct the full file path
    file_path = os.path.join(folder_path, file)
    
    # Load the .mat file
    mat_data = loadmat(file_path)
    
    # Store the loaded data in the dictionary
    # Use the file name (without extension) as the key
    variable_name = os.path.splitext(file)[0]  # Remove .mat extension
    loaded_data[variable_name] = mat_data

In [None]:
#access each file's data using the dictionary
for name, data in loaded_data.items():
    print(f"Data from {name}:")
    print(data.keys())  # Print the keys in the .mat file
    print()

In [None]:
# Create an empty DataFrame with the specified columns
df = pd.DataFrame(columns=['test_name', 'time', 'eta', 'heave', 'heaveVel', 
                          'pitch', 'pitchVel', 'pendulum', 'pendulumVel'])

# Loop through each key in loaded_data
for name in loaded_data.keys():
    # Determine the length of the data (assuming all arrays have the same length)
    # num_rows = len(data['heave'].flatten())
    # Create an empty DataFrame for the current file
    df_load = pd.DataFrame(columns=['test_name', 'time', 'eta', 'heave', 'heaveVel',
                                    'pitch', 'pitchVel', 'pendulum', 'pendulumVel'])
    
    # Extract data from the loaded .mat file
    data = loaded_data[name]
    df_load['heave'] = data['heave'].flatten()  # Flatten to ensure it's a 1D array
    df_load['heaveVel'] = data['heaveVel'].flatten()
    df_load['pendulum'] = data['pendulum'].flatten()
    df_load['pendulumVel'] = data['pendulumVel'].flatten()
    df_load['pitch'] = data['pitch'].flatten()
    df_load['pitchVel'] = data['pitchVel'].flatten()
    df_load['time'] = data['t'].flatten()
    if 'FreeDecay' in name:
        df_load['eta'] = 0
    else:
        df_load['eta'] = data['eta'].flatten()
    
    # Add the test name to the DataFrame
    df_load['test_name'] = name
    
    # Concatenate the current DataFrame with the main DataFrame
    df = pd.concat([df, df_load], ignore_index=True)

# Display the final DataFrame
print(df)

# functions

In [None]:
import numpy as np


def fourier(signal, f_samp=None):
    """
    [FT, freq] = fourier(signal, f_samp)
    evaluates Fourier transform with normalisation 2/N
    
    Input arguments:
    signal : array-like, signal data (will be converted to column)
    f_samp : float, sampling frequency in Hz (optional, required to evaluate freq)
    
    Outputs, expressed between f=0 and f=f_samp/2:
    FT : complex numpy array, Fourier transform (contains floor(N/2)+1 points)
    freq : numpy array, frequency vector in Hz (only if f_samp provided)
    """
    
    # Convert to column vector (1D numpy array)
    signal = np.asarray(signal).flatten()
    N = len(signal)
    
    # Apply FFT with 2/N normalization
    FT = 2 * np.fft.fft(signal) / N
    
    # Constant (f=0) mode - divide DC component by 2
    FT[0] = FT[0] / 2
    
    # Removing negative frequencies
    # Last Fourier mode index
    N_last = N // 2 + 1
    FT = FT[:N_last]
    
    # Handle frequency vector output
    if f_samp is not None:
        T_d = N / f_samp
        # Frequency vector
        freq = np.arange(N_last) / T_d
        return FT, freq
    else:
        return FT

In [None]:
import numpy as np
from scipy.signal import welch, get_window

def spectral_analysis(method, f_samp, signal, T_win):
    """
    Perform spectral analysis using either Welch's method or FFT.

    Parameters:
    - method: 'welch' or 'fft'
    - f_samp: Sampling frequency in Hz
    - signal: 1D numpy array of the signal
    - T_win: Window duration in seconds (used for Welch only)

    Returns:
    - S_F: Power spectral density or magnitude spectrum
    - freq: Frequency vector in Hz
    """
    overlap_ratio = 0.5

    if method.lower() == 'welch':
        nperseg = int(min(T_win * f_samp, len(signal)))
        noverlap = int(overlap_ratio * nperseg)
        window = get_window('blackmanharris', nperseg)
        freq, S_F = welch(signal, fs=f_samp, window=window, nperseg=nperseg, noverlap=noverlap)

    elif method.lower() == 'fft':
        L = len(signal)
        freq = np.fft.rfftfreq(L, d=1/f_samp)
        F_T = np.fft.rfft(signal)
        S_F = (1 / (f_samp * L)) * np.abs(F_T)**2
        S_F[1:-1] *= 2  # double except DC and Nyquist

    else:
        raise ValueError(f"Method '{method}' not implemented. Use 'welch' or 'fft'.")

    return S_F, freq


# Vizualize all Data in Time Domian

In [None]:
# Muted color palette
muted_palette = [
    "#1f77b4", "#2ca02c", "#d62728", "#9467bd", "#8c564b",
    "#e377c2", "#7f7f7f", "#17becf", "#2c2c2c", "#393b79",
    "#637939", "#8c6d31", "#843c39", "#7b4173"
]

In [None]:
# Sorted for consistent mapping
test_names = sorted([
    'FreeDecay_All', 'FreeDecay_Heave', 'FreeDecay_Pendulum', 'FreeDecay_Pitch',
    'T10p2s_Hs1m', 'T10p2s_Hs4m', 'T4p5s_Hs1m', 'T4p5s_Hs2m',
    'Tp10p2_Hs2m', 'Tp6p8s_Hs1m', 'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m', 'Tp6p8s_Hs6m', 'Tp6p8s_Hs8m'
])

In [None]:
# Build consistent map
color_map = {name: muted_palette[i % len(muted_palette)] for i, name in enumerate(test_names)}


In [None]:

# prettify names
name_prettify_map = {
    'FreeDecay_All': 'Free Decay - All Modes',
    'FreeDecay_Heave': 'Free Decay - Heave',
    'FreeDecay_Pendulum': 'Free Decay - Pendulum',
    'FreeDecay_Pitch': 'Free Decay - Pitch',
    'T10p2s_Hs1m': 'Tp = 10.2 s, Hs = 1 m',
    'T10p2s_Hs4m': 'Tp = 10.2 s, Hs = 4 m',
    'T4p5s_Hs1m': 'Tp = 4.5 s, Hs = 1 m',
    'T4p5s_Hs2m': 'Tp = 4.5 s, Hs = 2 m',
    'Tp10p2_Hs2m': 'Tp = 10.2 s, Hs = 2 m',
    'Tp6p8s_Hs1m': 'Tp = 6.8 s, Hs = 1 m',
    'Tp6p8s_Hs2m': 'Tp = 6.8 s, Hs = 2 m',
    'Tp6p8s_Hs4m': 'Tp = 6.8 s, Hs = 4 m',
    'Tp6p8s_Hs6m': 'Tp = 6.8 s, Hs = 6 m',
    'Tp6p8s_Hs8m': 'Tp = 6.8 s, Hs = 8 m'
}

In [None]:
#  free surface elevation
# Ensure export directory exists
export_dir = "Annex/figures/EDA/free_surface_elevation"
os.makedirs(export_dir, exist_ok=True)
test_names = df['test_name'].unique()

for name in test_names:
    df_subset = df[df['test_name'] == name]

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df_subset["time"],
        y=df_subset["eta"],
        mode="lines",
        name="Free Surface Elevation",
        line=dict(color=color_map[name])
    ))
    prettified_title = name_prettify_map.get(name, name)
     
    fig.update_layout(
        title=f"Free Surface Elevation - {name_prettify_map.get(name, name)}",
        xaxis_title="Time [s]",
        yaxis_title="η [m]",
        template="plotly_white",
        xaxis=dict(range=[0, 600]),
        height=400
    )
    print(f"{name} → {color_map[name]}")

    fig.show()


    # Create safe filename
    safe_filename = f"{prettified_title.replace(' ', '_').replace(',', '').replace('=', '').replace('.', 'p')}.png"

    # Save figure as PNG
    fig.write_image(os.path.join(export_dir, safe_filename), scale=2)  # higher scale for better resolution



In [None]:
#  Heave
# Ensure export directory exists
export_dir = "Annex/figures/EDA/heave"
os.makedirs(export_dir, exist_ok=True)
test_names = df['test_name'].unique()

for name in test_names:
    df_subset = df[df['test_name'] == name]

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df_subset["time"],
        y=df_subset["heave"],
        mode="lines",
        name="Heave",
        line=dict(color=color_map[name])
    ))
    prettified_title = name_prettify_map.get(name, name)
     
    fig.update_layout(
        title=f"Heave - {name_prettify_map.get(name, name)}",
        xaxis_title="Time [s]",
        yaxis_title="Heave [m]",
        template="plotly_white",
        xaxis=dict(range=[0, 600]),
        height=400
    )
    print(f"{name} → {color_map[name]}")

    fig.show()


    # Create safe filename
    safe_filename = f"{prettified_title.replace(' ', '_').replace(',', '').replace('=', '').replace('.', 'p')}.png"

    # Save figure as PNG
    fig.write_image(os.path.join(export_dir, safe_filename), scale=2)  # higher scale for better resolution



In [None]:
#  pitch
# Ensure export directory exists
export_dir = "Annex/figures/EDA/Pitch"
os.makedirs(export_dir, exist_ok=True)
test_names = df['test_name'].unique()

for name in test_names:
    df_subset = df[df['test_name'] == name]

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df_subset["time"],
        y=df_subset["pitch"],
        mode="lines",
        name="Pitch",
        line=dict(color=color_map[name])
    ))
    prettified_title = name_prettify_map.get(name, name)
     
    fig.update_layout(
        title=f"Pitch - {name_prettify_map.get(name, name)}",
        xaxis_title="Time [s]",
        yaxis_title="Pitch [°]",
        template="plotly_white",
        xaxis=dict(range=[0, 600]),
        height=400
    )
    print(f"{name} → {color_map[name]}")

    fig.show()


    # Create safe filename
    safe_filename = f"{prettified_title.replace(' ', '_').replace(',', '').replace('=', '').replace('.', 'p')}.png"

    # Save figure as PNG
    fig.write_image(os.path.join(export_dir, safe_filename), scale=2)  # higher scale for better resolution



In [None]:
#  pitch
# Ensure export directory exists
export_dir = "Annex/figures/EDA/pendulum"
os.makedirs(export_dir, exist_ok=True)
test_names = df['test_name'].unique()

for name in test_names:
    df_subset = df[df['test_name'] == name]

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df_subset["time"],
        y=df_subset["pendulum"],
        mode="lines",
        name="Pendulum",
        line=dict(color=color_map[name])
    ))
    prettified_title = name_prettify_map.get(name, name)
     
    fig.update_layout(
        title=f"Pendulum - {name_prettify_map.get(name, name)}",
        xaxis_title="Time [s]",
        yaxis_title="Pendulum [°]",
        template="plotly_white",
        xaxis=dict(range=[0, 600]),
        height=400
    )
    print(f"{name} → {color_map[name]}")

    fig.show()


    # Create safe filename
    safe_filename = f"{prettified_title.replace(' ', '_').replace(',', '').replace('=', '').replace('.', 'p')}.png"

    # Save figure as PNG
    fig.write_image(os.path.join(export_dir, safe_filename), scale=2)  # higher scale for better resolution



# Freedecay Analysis

In [None]:


# Fixed color scheme
fixed_color_map = {
    'heave': 'blue',
    'pitch': 'green',
    'pendulum': 'red'
}

# Map test names to DOFs
free_decay_cases = {
    'FreeDecay_Heave': 'heave',
    'FreeDecay_Pitch': 'pitch',
    'FreeDecay_Pendulum': 'pendulum'
}

for test_name, dof in free_decay_cases.items():
    df_subset = df[df['test_name'] == test_name]

    time = df_subset["time"].values
    signal = df_subset[dof].values

    # Sampling frequency
    dt = time[1] - time[0]
    fs = 1 / dt

    # Use the Fourier function 
    FT, freq = fourier(signal, fs)
    
    # Get magnitude for plotting
    magnitude = np.abs(FT)

    # Create subplots
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=("Time Domain", "Frequency Domain (Fourier Transform)"),
        horizontal_spacing=0.15
    )

    # Time domain
    fig.add_trace(go.Scatter(
        x=time,
        y=signal,
        mode="lines",
        name=dof.capitalize(),
        line=dict(color=fixed_color_map[dof])
    ), row=1, col=1)

    # Frequency domain (Fourier Transform)
    fig.add_trace(go.Scatter(
        x=freq,
        y=magnitude,
        mode="lines",
        name=f"{dof} |FT|",
        line=dict(color=fixed_color_map[dof])
    ), row=1, col=2)

    # Y-axis units
    y_units = "m" if dof == "heave" else "°"

    # Layout
    fig.update_layout(
        title={
        "text": f"{dof.capitalize()} Free Decay Test",
        "x": 0.5,  # center horizontally
        "xanchor": "center"},
        template="plotly_white",
        width=900,
        height=350,
        showlegend=False,
        xaxis1=dict(range=[0, 40]),
        xaxis2=dict(range=[0, 1])  # Adjust based on your expected frequency range
    )

    fig.update_xaxes(title_text="Time [s]", row=1, col=1)
    fig.update_yaxes(title_text=f"{dof.capitalize()} [{y_units}]", row=1, col=1)

    fig.update_xaxes(title_text="Frequency [Hz]", row=1, col=2)
    fig.update_yaxes(title_text=f"Amplitude [{y_units}]", row=1, col=2)

    fig.show()

# Training Sea State Analysis

In [None]:
# --- Define which signals to plot ---
test_name = 'Tp6p8s_Hs2m'
df_subset = df[df['test_name'] == test_name]

# Fixed color scheme
fixed_color_map = {
    'eta': 'grey',
    'heave': 'blue',
    'pitch': 'green',
    'pendulum': 'red'
}

# Loop through each signal to plot separately
for dof, color in fixed_color_map.items():
    time = df_subset["time"].values
    signal = df_subset[dof].values

    # Sampling frequency
    dt = time[1] - time[0]
    fs = 1 / dt

    # Use the new spectral analysis function
    S_F, freq = spectral_analysis(method='welch', f_samp=fs, signal=signal, T_win=120)
   # Forcefully convert all values to real float values, element by element
    S_F = np.array([np.real_if_close(x).real for x in S_F], dtype=float)




    # Define proper label for η
    label = "η" if dof == "eta" else dof.capitalize()

    # Define units for time and frequency plots
    y_units_time = "m" if dof in ["heave", "eta"] else "°"
    y_units_freq = "m²/Hz" if dof in ["heave", "eta"] else "°²/Hz"
    print(f"{dof} → freq dtype: {freq.dtype}, S_F dtype: {S_F.dtype}")
    print(f"{dof} → any complex? freq: {np.iscomplexobj(freq)}, S_F: {np.iscomplexobj(S_F)}")
    print(f"{dof} → any NaN? freq: {np.isnan(freq).any()}, S_F: {np.isnan(S_F).any()}")

    # Create subplots
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=("Time Domain Signal", "Frequency Domain (Spectral)"),
        horizontal_spacing=0.15
    )

    # Time domain
    fig.add_trace(go.Scatter(
        x=time,
        y=signal,
        mode="lines",
        name=label,
        showlegend=False,
        line=dict(color=color)
    ), row=1, col=1)

    # Frequency domain (Spectral)
    fig.add_trace(go.Scatter(
        x=freq,
        y=S_F,
        mode="lines",
        name=f"{label} PSD",
        showlegend=False,
        line=dict(color=color)
    ), row=1, col=2)

    # Layout
    fig.update_layout(
        title={
            "text": f"{label} – Training Sea State<br><sup>Tp = 6.8 s, Hs = 2.0 m</sup>",
            "x": 0.5,
            "xanchor": "center"
        },
        template="plotly_white",
        width=1200,
        height=350,
        showlegend=True,
        xaxis1=dict(range=[0, 200]),
        xaxis2=dict(range=[0, 1])
    )

    fig.update_xaxes(title_text="Time [s]", row=1, col=1)
    fig.update_yaxes(title_text=f"Amplitude [{y_units_time}]", row=1, col=1)

    fig.update_xaxes(title_text="Frequency [Hz]", row=1, col=2)
    fig.update_yaxes(title_text=f"PSD [{y_units_freq}]", row=1, col=2)
        # Add natural frequency reference lines to the frequency-domain plot
       # Add natural frequencies as vertical dashed lines with legend entries
    nat_freqs = {
        "Heave Natural Frequency": (0.18, "blue"),
        "Pitch Natural Frequency": (0.19, "green"),
        "Pendulum Natural Frequency": (0.17, "red"),
    }

    for name, (x_val, line_color) in nat_freqs.items():
        fig.add_trace(go.Scatter(
            x=[x_val, x_val],
            y=[0, max(S_F)*1.05],  # Extend line a bit above max power
            mode="lines",
            name=name,
            line=dict(color=line_color, dash="dash", width=1),
            showlegend=True
        ), row=1, col=2)



    # Define folder and filename
    output_dir = "Results/testing_case"
    os.makedirs(output_dir, exist_ok=True)

    filename = f"{output_dir}/{dof}_PSD_2.png"
    fig.write_image(filename, width=1200, height=350)


    fig.show()


# Data Pre-processing

## Creating $\dot{\eta}$ and $\ddot{\eta}$ Columns

In [None]:
df.info()
    

In [None]:

combined_df = []

for name in test_names:
    df_subset = df[df['test_name'] == name].copy()   
    # Define sampling time (dt)
    dt = df_subset['time'].iloc[2] - df_subset['time'].iloc[1] 
    x0 = df_subset['eta']
    
# Central difference: 5-point stencil for velocity (dx/dt)
    velocity = (-1*x0.shift(-2) + 8 * x0.shift(-1)
                - 8 * x0.shift(1) + x0.shift(2)) / (12 * dt)
    
    # Forward difference for velocity (first two points)
    velocity.iloc[0] = (-3 * x0.iloc[0] + 4 * x0.iloc[1] - x0.iloc[2]) / (2 * dt)
    velocity.iloc[1] = (-3 * x0.iloc[1] + 4 * x0.iloc[2] - x0.iloc[3]) / (2 * dt)
    
    # Backward difference for velocity (last two points)
    velocity.iloc[-1] = (3 * x0.iloc[-1] - 4 * x0.iloc[-2] + x0.iloc[-3]) / (2 * dt)
    velocity.iloc[-2] = (3 * x0.iloc[-2] - 4 * x0.iloc[-3] + x0.iloc[-4]) / (2 * dt)
    
    # Central difference: 5-point stencil for acceleration (d²x/dt²)
    acceleration = (-1*x0.shift(-2) + 16 * x0.shift(-1)
                    - 30 * x0 + 16 * x0.shift(1)
                    - x0.shift(2)) / (12 * dt**2)
    
    # Forward difference for acceleration (first two points)
    acceleration.iloc[0] = (2 * x0.iloc[0] - 5 * x0.iloc[1] + 4 * x0.iloc[2] - x0.iloc[3]) / dt**2
    acceleration.iloc[1] = (2 * x0.iloc[1] - 5 * x0.iloc[2] + 4 * x0.iloc[3] - x0.iloc[4]) / dt**2
    
    # Backward difference for acceleration (last two points)
    acceleration.iloc[-1] = (2 * x0.iloc[-1] - 5 * x0.iloc[-2] + 4 * x0.iloc[-3] - x0.iloc[-4]) / dt**2
    acceleration.iloc[-2] = (2 * x0.iloc[-2] - 5 * x0.iloc[-3] + 4 * x0.iloc[-4] - x0.iloc[-5]) / dt**2

    # add to the df in new velocity and acceleration columns
    df_subset['eta_velocity'] = velocity
    df_subset['eta_acceleration'] = acceleration
    
    combined_df.append(df_subset)

# Concatenate the processed subsets into one DataFrame
df_final = pd.concat(combined_df, ignore_index=True)

# Check test_name column
print(df_final['test_name'].unique())  # Should list all your test names

# EDA

Looking for insights on what kind of normalization to use 

In [None]:
df_final.groupby(['test_name']).describe()

## Box plots

In [None]:
custom_order = [
    'T4p5s_Hs1m',
    'T4p5s_Hs2m',
    
    'Tp6p8s_Hs1m',
    'Tp6p8s_Hs2m',
    'Tp6p8s_Hs4m',
    'Tp6p8s_Hs6m',
    'Tp6p8s_Hs8m',

    'T10p2s_Hs1m',
    'Tp10p2_Hs2m',
    'T10p2s_Hs4m'
]


In [None]:
df_final['eta']=df_final['eta'].astype(float)
df_final['eta_velocity']=df_final['eta_velocity'].astype(float)
df_final['eta_acceleration']=df_final['eta_acceleration'].astype(float)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# --- Filter and map pretty names ---
df_plot = df_final[~df_final['test_name'].str.startswith('FreeDecay')].copy()
df_plot['pretty_name'] = df_plot['test_name'].map(name_prettify_map)



# --- Corresponding pretty name order ---
pretty_order = [name_prettify_map[k] for k in custom_order if k in name_prettify_map]

# --- Compute group means ---
group_means = df_plot.groupby('pretty_name')['eta'].mean()

# --- Plot ---
sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))

sns.boxplot(
    x='pretty_name',
    y='eta',
    data=df_plot,
    order=pretty_order,     # ✅ Apply custom order here
    width=0.5,
    palette="pastel"
)

# Add means as red diamonds
plt.scatter(
    x=range(len(pretty_order)),
    y=[group_means.get(name, None) for name in pretty_order],
    color='red',
    marker='D',
    label='Mean',
    zorder=5
)

# --- Labels and formatting ---
plt.title("Box Plot of Free Surface Elevation (η) Across Sea States", fontsize=14)
plt.xlabel("Sea State", fontsize=12)
plt.ylabel("Amplitude η [m]", fontsize=12)
plt.xticks(rotation=45, ha="right")
plt.legend()
plt.tight_layout()
plt.savefig("Results/EDA-Plots/box/box_eta.png", dpi=300, bbox_inches='tight')

plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# --- Filter and map pretty names ---
df_plot = df_final[~df_final['test_name'].str.startswith('FreeDecay')].copy()
df_plot['pretty_name'] = df_plot['test_name'].map(name_prettify_map)



# --- Corresponding pretty name order ---
pretty_order = [name_prettify_map[k] for k in custom_order if k in name_prettify_map]

# --- Compute group means ---
group_means = df_plot.groupby('pretty_name')['eta_velocity'].mean()

# --- Plot ---
sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))

sns.boxplot(
    x='pretty_name',
    y='eta_velocity',
    data=df_plot,
    order=pretty_order,     # ✅ Apply custom order here
    width=0.5,
    palette="pastel"
)

# Add means as red diamonds
plt.scatter(
    x=range(len(pretty_order)),
    y=[group_means.get(name, None) for name in pretty_order],
    color='red',
    marker='D',
    label='Mean',
    zorder=5
)

# --- Labels and formatting ---
plt.title(r"Box Plot of Free Surface Velocity ($\dot{\eta}$) Across Sea States", fontsize=14)
plt.xlabel("Sea State", fontsize=12)
plt.ylabel("Amplitude $\dot{\eta}$ [m/s]", fontsize=12)
plt.xticks(rotation=45, ha="right")
plt.legend()
plt.tight_layout()

plt.savefig("Results/EDA-Plots/box/box_eta_velocity.png", dpi=300, bbox_inches='tight')

plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# --- Filter and map pretty names ---
df_plot = df_final[~df_final['test_name'].str.startswith('FreeDecay')].copy()
df_plot['pretty_name'] = df_plot['test_name'].map(name_prettify_map)



# --- Corresponding pretty name order ---
pretty_order = [name_prettify_map[k] for k in custom_order if k in name_prettify_map]

# --- Compute group means ---
group_means = df_plot.groupby('pretty_name')['eta_acceleration'].mean()

# --- Plot ---
sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))

sns.boxplot(
    x='pretty_name',
    y='eta_acceleration',
    data=df_plot,
    order=pretty_order,     # ✅ Apply custom order here
    width=0.5,
    palette="pastel"
)

# Add means as red diamonds
plt.scatter(
    x=range(len(pretty_order)),
    y=[group_means.get(name, None) for name in pretty_order],
    color='red',
    marker='D',
    label='Mean',
    zorder=5
)

# --- Labels and formatting ---
plt.title(r"Box Plot of Free Surface Velocity ($\ddot{\eta}$) Across Sea States", fontsize=14)
plt.xlabel("Sea State", fontsize=12)
plt.ylabel("Amplitude $\ddot{\eta}$ [m/s]", fontsize=12)
plt.xticks(rotation=45, ha="right")
plt.legend()
plt.tight_layout()

plt.savefig("Results/EDA-Plots/box/box_eta_acceleration.png", dpi=300, bbox_inches='tight')
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# --- Filter and map pretty names ---
df_plot = df_final[~df_final['test_name'].str.startswith('FreeDecay')].copy()
df_plot['pretty_name'] = df_plot['test_name'].map(name_prettify_map)



# --- Corresponding pretty name order ---
pretty_order = [name_prettify_map[k] for k in custom_order if k in name_prettify_map]

# --- Compute group means ---
group_means = df_plot.groupby('pretty_name')['heave'].mean()

# --- Plot ---
sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))

sns.boxplot(
    x='pretty_name',
    y='heave',
    data=df_plot,
    order=pretty_order,     # ✅ Apply custom order here
    width=0.5,
    palette="pastel"
)

# Add means as red diamonds
plt.scatter(
    x=range(len(pretty_order)),
    y=[group_means.get(name, None) for name in pretty_order],
    color='red',
    marker='D',
    label='Mean',
    zorder=5
)

# --- Labels and formatting ---
plt.title(r"Box Plot of Heave Across Sea States", fontsize=14)
plt.xlabel("Sea State", fontsize=12)
plt.ylabel("Amplitude Heave [m]", fontsize=12)
plt.xticks(rotation=45, ha="right")
plt.legend()
plt.tight_layout()
plt.savefig("Results/EDA-Plots/box/box_eta_heave.png", dpi=300, bbox_inches='tight')
plt.show()



In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# --- Filter and map pretty names ---
df_plot = df_final[~df_final['test_name'].str.startswith('FreeDecay')].copy()
df_plot['pretty_name'] = df_plot['test_name'].map(name_prettify_map)



# --- Corresponding pretty name order ---
pretty_order = [name_prettify_map[k] for k in custom_order if k in name_prettify_map]

# --- Compute group means ---
group_means = df_plot.groupby('pretty_name')['pitch'].mean()

# --- Plot ---
sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))

sns.boxplot(
    x='pretty_name',
    y='pitch',
    data=df_plot,
    order=pretty_order,     # ✅ Apply custom order here
    width=0.5,
    palette="pastel"
)

# Add means as red diamonds
plt.scatter(
    x=range(len(pretty_order)),
    y=[group_means.get(name, None) for name in pretty_order],
    color='red',
    marker='D',
    label='Mean',
    zorder=5
)

# --- Labels and formatting ---
plt.title(r"Box Plot of Pitch Across Sea States", fontsize=14)
plt.xlabel("Sea State", fontsize=12)
plt.ylabel("Amplitude Pitch [°]", fontsize=12)
plt.xticks(rotation=45, ha="right")
plt.legend()
plt.tight_layout()

plt.savefig("Results/EDA-Plots/box/box_eta_pitch.png", dpi=300, bbox_inches='tight')

plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# --- Filter and map pretty names ---
df_plot = df_final[~df_final['test_name'].str.startswith('FreeDecay')].copy()
df_plot['pretty_name'] = df_plot['test_name'].map(name_prettify_map)



# --- Corresponding pretty name order ---
pretty_order = [name_prettify_map[k] for k in custom_order if k in name_prettify_map]

# --- Compute group means ---
group_means = df_plot.groupby('pretty_name')['pendulum'].mean()

# --- Plot ---
sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))

sns.boxplot(
    x='pretty_name',
    y='pendulum',
    data=df_plot,
    order=pretty_order,     # ✅ Apply custom order here
    width=0.5,
    palette="pastel"
)

# Add means as red diamonds
plt.scatter(
    x=range(len(pretty_order)),
    y=[group_means.get(name, None) for name in pretty_order],
    color='red',
    marker='D',
    label='Mean',
    zorder=5
)

# --- Labels and formatting ---
plt.title(r"Box Plot of Pendulum Across Sea States", fontsize=14)
plt.xlabel("Sea State", fontsize=12)
plt.ylabel("Amplitude Pendulum [°]", fontsize=12)
plt.xticks(rotation=45, ha="right")
plt.legend()
plt.tight_layout()

plt.savefig("Results/EDA-Plots/box/box_eta_pendulum.png", dpi=300, bbox_inches='tight')

plt.show()

## Line Pair Plots

In [None]:

# Define columns
dofs = ['heave', 'pitch', 'pendulum']
etas = ['eta', 'eta_velocity', 'eta_acceleration']
x_vars = etas + dofs
y_vars = dofs

# Axis label map (with symbols + units)
axis_label_map = {
    'eta': r'$\eta$ [m]',
    'eta_velocity': r'$\dot{\eta}$ [m/s]',
    'eta_acceleration': r'$\ddot{\eta}$ [m/s$^2$]',
    'heave': 'Heave [m]',
    'pitch': 'Pitch [°]',
    'pendulum': 'Pendulum [°]'
}

# Set test name
test_names = [ 'T10p2s_Hs1m','T10p2s_Hs4m', 'T4p5s_Hs1m',
       'T4p5s_Hs2m', 'Tp10p2_Hs2m', 'Tp6p8s_Hs1m', 'Tp6p8s_Hs2m',
       'Tp6p8s_Hs4m', 'Tp6p8s_Hs6m', 'Tp6p8s_Hs8m']
for test_name in test_names:
    # Subset and clean
    cols_to_use = list(dict.fromkeys(x_vars + y_vars))  # remove duplicates
    subset = df_final[df_final['test_name'] == test_name][cols_to_use].copy()

    # Flatten and convert any array-like entries
    for col in cols_to_use:
        if subset[col].apply(lambda x: isinstance(x, (list, np.ndarray))).any():
            subset[col] = subset[col].apply(lambda x: x[0] if isinstance(x, (list, np.ndarray)) else x)
        subset[col] = pd.to_numeric(subset[col], errors='coerce')

    # Drop NaNs
    subset = subset.dropna()

    # Create PairGrid with muted color
    g = sns.PairGrid(
        subset,
        x_vars=x_vars,
        y_vars=y_vars,
        palette=[color_map[test_name]]
    )
   # Scatter in off-diagonal cells only
    g.map_offdiag(sns.scatterplot, s=10, alpha=0.6, color=color_map[test_name])

    # Histogram on diagonal
    g.map_diag(sns.histplot, kde=False, bins=30, color=color_map[test_name])

    # Apply custom axis labels
    for ax in g.axes.flatten():
        if ax is not None:
            xlabel = ax.get_xlabel()
            ylabel = ax.get_ylabel()
            if xlabel in axis_label_map:
                ax.set_xlabel(axis_label_map[xlabel], fontsize=10)
            if ylabel in axis_label_map:
                ax.set_ylabel(axis_label_map[ylabel], fontsize=10)

    # Add formatted title
    formal_name = name_prettify_map[test_name]
    g.fig.suptitle(f"Pair Plots : DOFs and Free Surface Features – {formal_name}", fontsize=14)
    plt.subplots_adjust(top=0.9)
    output_dir = "Annex/figures/EDA/pairgrids"
    os.makedirs(output_dir, exist_ok=True)  # make sure directory exists

    # Generate safe filename
    safe_name = formal_name.replace(" ", "_").replace("=", "").replace(",", "").replace(".", "p")
    filename = f"{output_dir}/pairgrid_{safe_name}.png"

    # Save the figure
    g.fig.savefig(filename, dpi=300, bbox_inches='tight')

    # Show plot
    plt.show()


# Saving Data in csv Format

In [None]:
# Define the path for the new folder
new_folder = 'prepared_data'

# Create the new folder if it doesn't exist
if not os.path.exists(new_folder):
    os.makedirs(new_folder)

# Define the full path for the CSV file
csv_file_path = os.path.join(new_folder, 'data_damping_2e6.csv')

# Save the DataFrame to the CSV file
df_final.to_csv(csv_file_path, index=False)

print(f"CSV file saved to {csv_file_path}")

# Splitting Data into Training, Validation, and Testing

In [None]:
df=pd.read_csv('prepared_data/data_damping_2e6.csv')
df.info()

In [None]:
# checking dt in each test
# Create an empty list to store results
dt_summary = []

# Loop through each test
for name in df['test_name'].unique(): 
    df_subset = df[df['test_name'] == name].copy()

    # Compute unique dt values
    dt_values = np.round(df_subset['time'].diff().dropna(), 4).unique()

    
    # Append the test name and dt(s)
    dt_summary.append({
        'test_name': name,
        'dt_values': dt_values
    })

# Convert to DataFrame for better display
dt_df = pd.DataFrame(dt_summary)

# Show results
dt_df


In [None]:
# seperate free decay data from other tests data
# Separate Free Decay and other tests
df_freedecay = df[df['test_name'].str.contains('Free', case=False)].copy()

df_forced = df[~df['test_name'].str.contains('Free', case=False)].copy()

# Confirm the separation
print(f"Free Decay tests: {df_freedecay['test_name'].unique()}")
print(f"Other tests: {df_forced['test_name'].unique()}")


In [None]:
# seperat data into 60% train , 30 % validte and 10% test
# Initialize lists for splits
train_list = []
val_list = []
test_list = []

# Loop through each test and split
for name in df_forced['test_name'].unique():
    df_subset = df_forced[df_forced['test_name'] == name].copy()

    # Compute split indices
    n = len(df_subset)
    train_end = int(0.6 * n)
    val_end = int(0.9 * n)

    # Create splits
    df_train = df_subset.iloc[:train_end]
    df_val = df_subset.iloc[train_end:val_end]
    df_test = df_subset.iloc[val_end:]

    # Append to lists
    train_list.append(df_train)
    val_list.append(df_val)
    test_list.append(df_test)

# Combine splits into full DataFrames
df_train_full = pd.concat(train_list, ignore_index=True)
df_val_full = pd.concat(val_list, ignore_index=True)
df_test_full = pd.concat(test_list, ignore_index=True)

# Confirm shapes and columns
print(df_train_full.columns)
print(df_train_full.shape, df_val_full.shape, df_test_full.shape)

# Preview to confirm
df_train_full.head()


In [None]:
# save the splits to CSV files

df_train_full.pd.to_csv('prepared_data/train_data.csv')
df_val_full.pd.to_csv('prepared_data/val_data.csv')
df_test_full.pd.to_csv('prepared_data/test_data.csv')
