# Lab 3

## Reading CSV files

In [None]:
import pandas as pd

training_mice = pd.read_csv('./pw3_data/EEG_mouse_data_1.csv')
training_mice2 = pd.read_csv('./pw3_data/EEG_mouse_data_2.csv')

training_data = pd.concat([training_mice, training_mice2], ignore_index=True)


## Choosing features

In [None]:

feature_list = [
    "state",
    "amplitude_around_1_Hertz",
    "amplitude_around_2_Hertz",
    "amplitude_around_3_Hertz",
    "amplitude_around_4_Hertz",
    "amplitude_around_5_Hertz",
    "amplitude_around_6_Hertz",
    "amplitude_around_7_Hertz",
    "amplitude_around_8_Hertz",
    "amplitude_around_9_Hertz",
    "amplitude_around_10_Hertz",
    "amplitude_around_11_Hertz",
    "amplitude_around_12_Hertz",
    "amplitude_around_13_Hertz",
    "amplitude_around_14_Hertz",
    "amplitude_around_15_Hertz",
    "amplitude_around_16_Hertz",
    "amplitude_around_17_Hertz",
    "amplitude_around_18_Hertz",
    "amplitude_around_19_Hertz",
    "amplitude_around_20_Hertz",
    "amplitude_around_21_Hertz",
    "amplitude_around_22_Hertz",
    "amplitude_around_23_Hertz",
    "amplitude_around_24_Hertz",
    "amplitude_around_25_Hertz",
]

input_training_mice = training_data[feature_list]
print(input_training_mice.head())


## Normalize and encode data

balance data and fit and transform all column except "state"  with StandardScaler

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

scaler = StandardScaler()
encoder = OneHotEncoder(sparse_output=False).set_output(transform="pandas")
# Encode the state with OneHotEncoder
state = pd.DataFrame(input_training_mice['state'])
encoder.fit(state)

output_training_mice = encoder.transform(state)
input_training_mice = input_training_mice.drop(columns=['state'])

for column in input_training_mice:
    column_data = input_training_mice[column].to_frame()
    scaler.fit(column_data)
    input_training_mice[column] = scaler.transform(column_data)
    


## Create model and fold

In [None]:
import keras
from keras import layers
from sklearn.model_selection import KFold

keras.utils.set_random_seed(123)
kf = KFold(n_splits=3, shuffle=True)

def create_model():
  mlp = keras.Sequential([
      layers.Input(shape=(25,)),
      layers.Dense(8, activation="relu"), 
      layers.Dense(3, activation="softmax"),
  ])

  mlp.compile(
      optimizer=keras.optimizers.SGD(learning_rate=0.001, momentum=0.99),
      loss="categorical_crossentropy",
  )

  return mlp

mlp = create_model()
mlp.summary()

## Training

In [None]:
import numpy as np
history_list = []
trained_mlp = []

for i, (train_index, test_index) in enumerate(kf.split(input_training_mice)):
 
  mlp = create_model()
  x_train, x_test = input_training_mice.iloc[train_index], input_training_mice.iloc[test_index]
  y_train, y_test = np.array(output_training_mice)[train_index],np.array(output_training_mice)[test_index]
  
  
  history = mlp.fit(
      x=x_train, y=y_train,
      validation_data=(x_test, y_test),
      epochs=50
  )

  history_list.append(history)
  trained_mlp.append(mlp)

## Plot training history

In [None]:
import matplotlib.pyplot as pl
%matplotlib inline

train_losses = np.array([history.history['loss'] for history in history_list])
val_losses = np.array([history.history['val_loss'] for history in history_list])

# Calculate mean and standard deviation for training and validation losses
mean_train_loss = np.mean(train_losses, axis=0)
std_train_loss = np.std(train_losses, axis=0)
mean_val_loss = np.mean(val_losses, axis=0)
std_val_loss = np.std(val_losses, axis=0)

# Plot mean and standard deviation for training loss
pl.plot(mean_train_loss, label='Training Loss (Mean)')
pl.fill_between(range(len(mean_train_loss)), mean_train_loss - std_train_loss, mean_train_loss + std_train_loss, alpha=0.3, label='Training Loss (Std)')

# Plot mean and standard deviation for validation loss
pl.plot(mean_val_loss, label='Validation Loss (Mean)')
pl.fill_between(range(len(mean_val_loss)), mean_val_loss - std_val_loss, mean_val_loss + std_val_loss, alpha=0.3, label='Validation Loss (Std)')

# Add labels and legend
pl.xlabel('Epochs')
pl.ylabel('Loss')
pl.legend()

# Display the plot
pl.show()

## Performance

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, f1_score
import seaborn as sns

def plot_confusion_matrix(confusion_matrix, title):
    # Plot confusion matrix
    pl.figure(figsize=(8, 6))
    sns.heatmap(confusion_matrix.astype(int), annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=["rem","n-rem","awake"], yticklabels=["rem","n-rem", "awake"])
    pl.title(title)
    pl.xlabel('Predicted')
    pl.ylabel('True')
    pl.show()

f1_scores = []
mean_confusion_matrix = np.zeros((3, 3))

for i, (train_index, test_index) in enumerate(kf.split(input_training_mice)):
    # Evaluate the trained model on the test fold
    predictions = trained_mlp[i].predict(input_training_mice.loc[test_index])
    true_labels = np.array(output_training_mice)[test_index]
    max_predictions = []
    max_true_labels = []
    for prediction in predictions:
        max_predictions.append(np.argmax(prediction))
        
    for true_label in true_labels:
        max_true_labels.append(np.argmax(true_label))
    
    # Compute confusion matrix
    cm = confusion_matrix(max_true_labels, max_predictions)
    mean_confusion_matrix += confusion_matrix(max_true_labels, max_predictions)

    # Compute confusion matrix and plot
    plot_confusion_matrix(cm, f'Confusion Matrix - Fold {i + 1}')

    # Compute F1 score
    f1 = f1_score(max_true_labels, max_predictions, average='weighted')
    f1_scores.append(f1)
    print(f"F1 Score - Fold {i + 1}: {f1}")

# Plot mean confusion matrix
plot_confusion_matrix(mean_confusion_matrix, 'Global confusion matrix')

# Calculate and display the mean F1 score across all folds
mean_f1_score = np.mean(f1_scores)
print(f"Mean F1 Score across all folds: {mean_f1_score}")