In [16]:
%load_ext autoreload 
%autoreload 2 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tensorflow as tf

from src.data_processing.pipelines.ClassifierPipe import ClassifierPipe
from sklearn.metrics import f1_score
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
model = tf.keras.models.load_model('/projects/p31961/ENIGMA/results/optimized_models/BaseClassifier_optimized')
metrics = [tf.keras.metrics.BinaryAccuracy(name='accuracy'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.AUC(name='auc-roc')]
  
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)

In [None]:
model.summary()

In [None]:
DATA_PATH = '/projects/p31961/gaby_data/aggregated_data/raw_data/datasets/raw_data_raw_data.parquet.gzip'
SUBJECT_IDS_PATH = "/projects/p31961/ENIGMA/results/optimized_models/subjects.json"
cols_to_drop = ["mouse_id", "event", "sex", "day", "trial_count", "trial"]
processor = (ClassifierPipe(DATA_PATH)
            .read_raw_data()
            .calculate_max_min_signal()
            .split_data(load_subject_ids=True, 
                        subject_ids_path=SUBJECT_IDS_PATH,
                        target='action')
            .drop_features(cols_to_drop)
            .transform_data(numeric_target_dict={'avoid': 1, 'escape': 0})
            )
processor.processed_data

In [None]:
cols_to_drop = ["mouse_id", "event", "sex", "day", "trial_count", "trial"]
processor.processed_data.drop(columns = cols_to_drop)

In [None]:
y_pred = model.predict(processor.X_test)
y_pred = np.where(y_pred > 0.5, 1, 0)
y_pred

In [None]:
f1_score_results = f1_score(processor.y_test, y_pred)

In [None]:
display = ConfusionMatrixDisplay.from_predictions(processor.y_test, y_pred)
plt.text(-.5, -.8, f"F1-score: {np.round(f1_score_results, 2)}", fontsize = 14)
plt.title('Confusion Matrix')

In [None]:
def reconstruct_df(x_data, y_data, prediction, processor):
    num_cols = processor.processor.named_transformers_["num"].get_feature_names_out().tolist()
    cat_cols = processor.processor.named_transformers_["cat"].get_feature_names_out().tolist()
    cols = num_cols + cat_cols


    df = (
        pd.DataFrame(x_data, columns=cols)
        .assign(target=y_data.reset_index(drop=True),
                prediction=prediction.reshape(-1)
                ))
    return (df
        #     .melt(
        #     id_vars = df.drop(columns = ["target", "prediction"]).columns,
        #     value_vars = ["target", "prediction"],
        #     value_name = "avoid",
        #     var_name = 'truth'
        # )
    )
    
    
    
    


In [None]:
test_df = reconstruct_df(processor = processor, 
               x_data = processor.X_test, 
               y_data = processor.y_test, 
               prediction = y_pred)
    

In [None]:
test_df

In [None]:

sns.kdeplot(data=test_df, x="day", y="avoid", hue = 'truth', palette = ['grey','red'])
# sns.kdeplot(data=test_df, x="day", y="prediction", color = 'grey')

In [None]:
sns.stripplot(data=test_df, x="avoid",
              y="day",
              hue = 'truth', 
              palette = ['grey','red'],
              dodge = True,
              jitter = True
              )

In [None]:
sns.violinplot(data=test_df,
               x="avoid",
               y="trial_count",
               hue = 'truth', 
               inner = "stick",
               split = True,
               palette = ['grey','red'])

In [None]:
processor.raw_data

# Same model trained on FP data only

In [18]:
def process_data(file_path, subject_ids_path,features_to_drop):
    """
    Process the data from a given file path and save the processed data to a specified path.

    Args:
        file_path (str): The path to the file containing the data.
        path_to_save (str): The path to save the processed data.

    Returns:
        ClassifierPipe: The processed data.
    """
    
    processor = (ClassifierPipe(file_path)
                  .read_raw_data()
                  .calculate_max_min_signal()
                  .split_data(load_subject_ids = True, 
                              subject_ids_path = subject_ids_path,
                              target = 'action')
                  .drop_features(features_to_drop)
                  .transform_data(numeric_target_dict={'avoid': 1, 'escape': 0})
                  )
    return processor

In [19]:
DATA_PATH = '/projects/p31961/gaby_data/aggregated_data/raw_data/datasets/raw_data_raw_data.parquet.gzip'


FEATURES_TO_DROP = ["mouse_id", "event", "sex", "day", "trial_count", "trial"]
SUBJECT_IDS_PATH = "/projects/p31961/ENIGMA/results/optimized_models/subjects.json"

In [24]:
processor = ClassifierPipe(DATA_PATH)
processor.read_raw_data()
processor.calculate_max_min_signal()
processor.split_data(load_subject_ids = True, 
                     subject_ids_path = SUBJECT_IDS_PATH,
                     target = 'action')
processor.drop_features(FEATURES_TO_DROP)
processor.transform_data(numeric_target_dict={'avoid': 1, 'escape': 0})
processor.processor.named_transformers_['cat'].get_feature_names_out().tolist()

['sensor_D1', 'sensor_D2', 'sensor_DA']

In [12]:
processor = process_data(DATA_PATH, SUBJECT_IDS_PATH, FEATURES_TO_DROP)

In [9]:
processor.processed_data

Unnamed: 0,action,sensor,signal_max,signal_min
0,avoid,D1,3.127942,0.191952
1,avoid,D1,0.919383,-0.615567
2,avoid,D1,-0.394839,-2.368783
3,avoid,D1,0.367330,-1.989866
4,avoid,D1,3.867368,1.411265
...,...,...,...,...
12789,escape,DA,2.978387,-1.480360
12790,escape,DA,1.694918,-2.158833
12791,escape,DA,3.849639,-1.624187
12792,escape,DA,4.559294,-1.507916


In [None]:
fp_only_model = tf.keras.models.load_model('/projects/p31961/ENIGMA/results/optimized_models/BaseClassifier_optimized_fp_only')
metrics = [tf.keras.metrics.BinaryAccuracy(name='accuracy'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.AUC(name='auc-roc')]
  
fp_only_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
fp_only_model.summary()

In [None]:
y_pred = fp_only_model.predict(processor.X_test)

In [None]:
fp_only_df = reconstruct_df(processor = processor, 
               x_data = processor.X_test, 
               y_data = processor.y_test, 
               prediction = y_pred)

In [None]:
processor.processed_data

In [None]:
fp_only_df