# Importing libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import GRU, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split

In [None]:
import importlib
spec = importlib.util.spec_from_file_location("preprocessing", "..\\utils\\preprocessing.py")
preprocessing = importlib.util.module_from_spec(spec)
spec.loader.exec_module(preprocessing)

spec = importlib.util.spec_from_file_location("fspliter", "..\\utils\\files_spliter.py")
fspliter = importlib.util.module_from_spec(spec)
spec.loader.exec_module(fspliter)

spec = importlib.util.spec_from_file_location("results", "..\\utils\\results.py")
results = importlib.util.module_from_spec(spec)
spec.loader.exec_module(results)

label_encoder = LabelEncoder()

# Preprocessing

In [None]:
def train_test_validation_split_on_day_3(num_mice):

    # Loading day 3 without first 6 hours
    data = fspliter.get_mice(num_mice)
    day3 = fspliter.retrieve_day(data, 3)
    day3_without_first_6_hours = day3.iloc[5400:]

    # preprocessing and encoding
    data_processed = preprocessing.do_preprocessing(day3_without_first_6_hours, 'WS')
    data_processed['state_encoded'] = label_encoder.fit_transform(data_processed['state'])

    # Feature selection
    feature_columns = data_processed.columns[1:-2]

    # Scaling
    data_processed[feature_columns] = StandardScaler().fit_transform(data_processed[feature_columns])

    # PCA
    pca = PCA(n_components=50)
    pca = pca.fit_transform(data_processed[feature_columns])

    # Train test val split
    X_train, X_temp, y_train, y_temp = train_test_split(pca, data_processed['state_encoded'], test_size=0.3, shuffle = False, stratify = None)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle = False, stratify = None)

    return X_train, X_val, X_test, y_train, y_val, y_test


# Load data

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = train_test_validation_split_on_day_3(9)
print(X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape)

# Model

In [None]:
sequence_size = 10

model = Sequential([
    Bidirectional(LSTM(100, return_sequences=True, input_shape=(sequence_size, X_train.shape[1]))),
    Dropout(0.3),
    Bidirectional(LSTM(100)),
    Dropout(0.3),
    Dense(50, activation='relu'),
    Dense(2, activation='softmax')
])

In [None]:
def create_bidirectional_sequences(data, n):
    sequences = []
    data_length = len(data)

    for i in range(n, data_length - n):
        seq = data[i - n: i + n + 1]
        sequences.append(seq)

    return np.array(sequences)

In [None]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

X_train_sequences = create_bidirectional_sequences(X_train, sequence_size)
X_val_sequences = create_bidirectional_sequences(X_val, sequence_size)

y_train_adjusted = y_train[sequence_size*2:]
y_val_adjusted = y_val[sequence_size*2:]

class_weights = class_weight.compute_class_weight(class_weight = "balanced", classes= np.unique(y_train_adjusted), y= y_train_adjusted)
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

history = model.fit(X_train_sequences, y_train_adjusted, epochs=2, batch_size=64, validation_data=(X_val_sequences, y_val_adjusted), verbose=1, class_weight=class_weights_dict)

model.summary()

# Saving model

In [None]:
#model.save('Saved_model/Generalize_Wake_Sleep_classification.h5')

# Training and validation results

In [None]:
import matplotlib.pyplot as plt

# Extracting accuracy and loss from the history object
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

# Plotting training and validation accuracy
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, acc, 'bo-', label='Training accuracy')
plt.plot(epochs, val_acc, 'ro-', label='Validation accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plotting training and validation loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, 'bo-', label='Training loss')
plt.plot(epochs, val_loss, 'ro-', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


# Load model if needed

In [None]:
#model = load_model('Saved_model/Mouse_3_state_classification.h5')

# Testing model

In [None]:
X_test_sequences = create_bidirectional_sequences(X_test, sequence_size)

y_test_adjusted = y_test[sequence_size*2:]

X_test_pred = model.predict(X_test_sequences)

predicted_labels = X_test_pred.argmax(axis=1)

In [None]:
y_test_original = label_encoder.inverse_transform(y_test_adjusted)
y_pred_original = label_encoder.inverse_transform(predicted_labels)

results.scores(y_test_original, y_pred_original, ('W', 'S'))

## Testing model on other mice

In [None]:
def test_model_on_other_mice(model, num_mice):
    X_testmouse, _, _, y_testmouse, _, _ = train_test_validation_split_on_day_3(num_mice)

    X_test_sequences = create_bidirectional_sequences(X_testmouse, 10)

    y_test_adjusted = y_testmouse[20:]

    X_test_pred = model.predict(X_test_sequences)
    predicted_labels = X_test_pred.argmax(axis=1)
    y_test_original = label_encoder.inverse_transform(y_test_adjusted)
    y_pred_original = label_encoder.inverse_transform(predicted_labels)
    results.scores(y_test_original, y_pred_original, ('W', 'S'))

### Testing model on same strain

In [None]:
test_model_on_other_mice(model, 10)

### Testing model on other strain

In [None]:
test_model_on_other_mice(model, 1)