# Setup
This section contains all necessary setup such as libraries and model creation.

In [None]:
import numpy as np

from src.data.data_helper import get_raw_data_as_dataframe

In [None]:
raw_data = get_raw_data_as_dataframe()
raw_data.head()

In [None]:
raw_data.shape

## Pre-processor Calibration
Due to the fact that all data needs to be pre-processed using the same bandpass filter and normalization the pre-processor needs to be calibrated. This is done by calculating the coefficients of the bandpass filter and the normalization parameters.

These parameters will then be passed to the selected model class.

In [None]:
from src.models.preprocessing.preprocessor import SignalPreprocessor

pre_processor = SignalPreprocessor(
    low_freq=20.0, # Maybe try down to 17.
    high_freq=500.0, # Around 100-150 looks good for our data.
    fs=5000.0,
    order=7
)

In [None]:
pre_processor.calibrate(raw_data)

## Data Parsing
This section deals with segmenting the data into usable segements with correct labeling. After that we pre-process the data using the pre-processor which is valibrated above. Once the data is pre-processed we apply one-hot encoding to the labels to allow for the use of F1 score. Finally we split the data into training and validation sets.

In [None]:
from src.data.data_helper import segement_data

In [None]:
segmented_data = segement_data(raw_data, window_length=200*5, overlap=50*5)

In [None]:
segmented_data.head()

In [None]:
segmented_data.iloc[1000]

In [None]:
num_classes = segmented_data['label'].nunique()
num_classes

In [None]:
import tensorflow as tf

y_data = np.array(segmented_data['label'].values)
y_data = tf.keras.utils.to_categorical(y_data, num_classes=num_classes)

# I apologize for this horrible line. Can be fixed by fixing the data_helper
X_data = np.stack(segmented_data.drop(columns=['label', 'source'])['window_data'].values)

In [None]:
X_data.shape

In [None]:
X_data = pre_processor.batch_pre_process(X_data)
X_data.shape

In [None]:
y_data.shape

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
        X_data, y_data, test_size=0.2
    )

In [None]:
X_train.shape

In [None]:
X_val.shape

## Analysis and Fixing of Imblanaces in Labels

In [None]:
label_percentages = segmented_data['label'].value_counts(normalize=True).sort_index() * 100
print(label_percentages)

In [None]:
import matplotlib.pyplot as plt

# Plot label distribution
plt.figure(figsize=(8, 4))
segmented_data['label'].value_counts().sort_index().plot(kind='bar')
plt.xlabel('Label')
plt.ylabel('Count')
plt.title('Distribution of Labels')
plt.show()

In [None]:
labels = np.argmax(y_train, axis=1)
unique, counts = np.unique(labels, return_counts=True)

In [None]:
labels.shape[0]

In [None]:
unique

In [None]:
counts

In [None]:
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / counts[0]) * (labels.shape[0] / 2.0)
weight_for_1 = (1 / counts[1]) * (labels.shape[0] / 2.0)
weight_for_2 = (1 / counts[2]) * (labels.shape[0] / 2.0)
weight_for_3 = (1 / counts[3]) * (labels.shape[0] / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2, 3: weight_for_3}
class_weight

In [None]:
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE

# ros = RandomOverSampler(random_state=0)
# ros = RandomUnderSampler(random_state=0)
# smote = SMOTE(random_state=0)

# X_train, y_train = ros.fit_resample(X_train, y_train)
# X_train, y_train = smote.fit_resample(X_train, y_train)

In [None]:
X_train.shape

In [None]:
# Plot label distribution for resampled training data
plt.figure(figsize=(8, 4))
labels_resampled = np.argmax(y_train, axis=1)
unique, counts = np.unique(labels_resampled, return_counts=True)
plt.bar(unique, counts)
plt.xlabel('Label')
plt.ylabel('Count')
plt.title('Distribution of Labels (Resampled Training Data)')
plt.show()

In [None]:
X_train.shape

In [None]:
X_val.shape

# Model Setup
Import model classes and create instances of the models.

## Hyperparameters

In [None]:
batch_size = 512
epochs = 20

learning_rate = 1e-3

## Model Creation

In [None]:
from src.models.LSTM.LSTM import LSTM
from src.models.LSTM_STFT.LSTM_STFT import LSTM_STFT
from src.models.LSTM_STFT_Dense.LSTM_STFT_Dense import LSTM_STFT_Dense

In [None]:
LSTM_model = LSTM(
    input_shape=X_data.shape[1],
    num_classes=num_classes,
    learning_rate=learning_rate
)

In [None]:
LSTM_STFT_model = LSTM_STFT(
    input_shape=X_data.shape[1],
    num_classes=num_classes,
    learning_rate=learning_rate
)

In [None]:
LSTM_STFT_Dense_model = LSTM_STFT_Dense(
    input_shape=X_data.shape[1],
    num_classes=num_classes,
    learning_rate=learning_rate
)

# Train Model
Train the models

In [None]:
LSTM_model.get_model().fit(X_train, y_train,
                       validation_data=(X_val, y_val),
                       epochs=epochs,
                       batch_size=batch_size,
                       verbose=2,
                       class_weight=class_weight
                    )

In [None]:
LSTM_STFT_model.get_model().fit(X_train, y_train,
                       validation_data=(X_val, y_val),
                       epochs=epochs,
                       batch_size=batch_size,
                       verbose=2,
                       class_weight=class_weight
                    )

In [None]:
LSTM_STFT_Dense_model.get_model().fit(X_train, y_train,
                       validation_data=(X_val, y_val),
                       epochs=epochs,
                       batch_size=batch_size,
                       verbose=2,
                       class_weight=class_weight
                    )

# Plotting

In [None]:
from src.visualizations.plot_learning_curves import plot_learning_curves, plot_confusion_and_f1

In [None]:
plot_learning_curves(LSTM_model.get_model(), plot_title=LSTM_model.get_model_name())
plot_confusion_and_f1(LSTM_model.get_model(), X_val, y_val, plot_title=LSTM_model.get_model_name())

In [None]:
plot_learning_curves(LSTM_STFT_model.get_model(), plot_title=LSTM_STFT_model.get_model_name())
plot_confusion_and_f1(LSTM_STFT_model.get_model(), X_val, y_val, plot_title=LSTM_STFT_model.get_model_name())

In [None]:
plot_learning_curves(LSTM_STFT_Dense_model.get_model(), plot_title=LSTM_STFT_Dense_model.get_model_name())
plot_confusion_and_f1(LSTM_STFT_Dense_model.get_model(), X_val, y_val, plot_title=LSTM_STFT_Dense_model.get_model_name())