### Import necessary packages

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import plotly.express as px

### Load data to DataFrame called master

In [None]:
master = pd.read_parquet("../results/processed/master_clean.parquet")
master.drop(columns = ['t_delta'], inplace = True)
master.head()

### Check if there is each combination of `load_value` and `speedSet` for each fault type

In [None]:
fault_types = master['gear_fault_desc'].unique()
display(fault_types)

In [None]:
experiments = master[['experiment_id', 'speedSet', 'load_value', 'gear_fault_desc']].drop_duplicates()
display(experiments)

In [None]:
pd.crosstab(experiments['gear_fault_desc'], [experiments['speedSet'], experiments['load_value']])

Now I know that there is one experiment for each combination of `load_value` and `speedSet` per type of failure

### Check if order of magnitude is generally similar for data from both sensors

In [None]:
basic_sensor_stats = master[['sensor1', 'sensor2']].agg(["min", "max", "mean", "std"])
display(basic_sensor_stats)

### Check the correlation between `sensor1`and `sensor2` for each experiment separately

In [None]:
corr_sensors=(
    master.groupby('experiment_id')[['sensor1', 'sensor2']]
    .apply(lambda g: g['sensor1'].corr(g['sensor2']))
    .rename("corr")
    .reset_index()
)

display(corr_sensors)
display(corr_sensors[corr_sensors['corr']>0.5])

In [None]:
plt.figure(figsize=(6,4))
sns.histplot(corr_sensors["corr"], bins=36, kde=True)
plt.title("histogram of correlation between sensor1 and sensor2")
plt.xlabel("correlation")
plt.ylabel("number of experiments")
plt.show()

Correlation between sensor 1 and sensor 2 is low (<0.4) for all experiments. 

### Plot sensor data for each cathegory (type of failure)

In [None]:
master["combo"] = master["speedSet"].astype(str) + " / " + master["load_value"].astype(str)

sensors = ["sensor1","sensor2"]
id_cols = ["gear_fault_desc","combo","t_rel_s"]
df_long = master[id_cols + sensors].melt(
    id_vars=id_cols,
    value_vars=sensors,
    var_name="sensor",
    value_name="value"
)

for fault in sorted(df_long["gear_fault_desc"].unique()):
    sub = df_long.query("gear_fault_desc == @fault")
    fig = px.line(
        sub,
        x="t_rel_s", y="value",
        color="combo",        # -> 6 klar verschiedene Farben
        facet_col="sensor" ,   # links sensor1, rechts sensor2
        title = fault
    )
    fig.update_layout(width=1100, height=300)  # optional größer
    fig.show()

###### Calculate typical coeffcients used in time series analysis:

1.**RMS:** (DE: Effektivwert)

2.**P2P:** Peak to Peak 

In [None]:
def rms(x):
     return np.sqrt(np.mean(x**2))

def p2p(x):
    return np.max(x) - np.min(x)

keys = ['gear_fault_desc', 'experiment_id', 'speedSet', 'load_value']
sensor_cols = ['sensor1', 'sensor2']

features = master.groupby(keys)[sensor_cols].agg([rms,p2p])

In [None]:
display(features)

In [None]:
#sns.boxplot(data=features, x="gear_fault_desc", y=("sensor1","rms"))
#plt.xticks(rotation=90)
#plt.xlabel("fault type", labelpad=20)

plt.figure(figsize=(10,5))
sns.boxplot(data=features, x="gear_fault_desc", y=("sensor1","rms"), color="lightgray")
sns.stripplot(data=features, x="gear_fault_desc", y=("sensor1","rms"), hue="experiment_id", dodge=True, size=5, alpha=0.7)
plt.xticks(rotation=90)
plt.legend(ncols=3, loc='upper left', bbox_to_anchor=(1, 1))
plt.show()

 


In [None]:
g = sns.catplot(
    data=features,
    x="gear_fault_desc", y=("sensor2","rms"),
    kind="box",
    col="speedSet",           # Facetten-Spalten nach Geschwindigkeit
    hue="load_value",         # Farben für Load (optional)
    height=5, aspect=1        # Größe der Subplots
)
g.set_xticklabels(rotation=90)

In [None]:
g = sns.catplot(
    data=features,
    x="gear_fault_desc", y=("sensor2","rms"),
    kind="box",
    col="load_value",           # Facetten-Spalten nach Geschwindigkeit
    hue="speedSet",         # Farben für Load (optional)
    height=5, aspect=1        # Größe der Subplots
)
g.set_xticklabels(rotation=90)

In [None]:
g = sns.catplot(
    data=features,
    x="gear_fault_desc", y=("sensor1","rms"),
    kind="box",
    col="speedSet", row="load_value",
    height=4, aspect=1.2
)
g.set_xticklabels(rotation=90)