In [None]:
%load_ext autoreload
%autoreload 2 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data_processing.pipelines.ClassifierPipe import ClassifierPipe

import tensorflow as tf

In [3]:
# print gpu available to tensorflow
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [5]:
DATA_PATH = '/projects/p31961/gaby_data/aggregated_data/raw_data/datasets/raw_data_raw_data.parquet.gzip'
processor = ClassifierPipe(DATA_PATH)
processor.read_raw_data().calculate_max_min_signal()

<src.data_processing.pipelines.ClassifierPipe.ClassifierPipe at 0x2b312eaf2510>

In [6]:
processor.raw_data

Unnamed: 0,mouse_id,day,event,sensor,time,trial,signal,action,latency,sex,learning_phase,trial_count
0,12,5,cue,D2,-25.000000,0,-0.155359,escape,0.00,F,0,115
1,12,5,cue,D2,-24.901531,0,-0.420553,escape,0.00,F,0,115
2,12,5,cue,D2,-24.803064,0,-1.592294,escape,0.00,F,0,115
3,12,5,cue,D2,-24.704596,0,-1.268734,escape,0.00,F,0,115
4,12,5,cue,D2,-24.606127,0,-0.210176,escape,0.00,F,0,115
...,...,...,...,...,...,...,...,...,...,...,...,...
5860105,1,4,escape,D1,19.606127,1,0.299603,avoid,7.54,F,0,87
5860106,1,4,escape,D1,19.704596,1,-0.249408,avoid,7.54,F,0,87
5860107,1,4,escape,D1,19.803064,1,-0.486369,avoid,7.54,F,0,87
5860108,1,4,escape,D1,19.901531,1,-0.146374,avoid,7.54,F,0,87


In [7]:
da_query = "event == 'cue' & sensor=='DA'"
da_data = processor.processed_data.query(da_query)
da_data = da_data.assign(max_min_ration = da_data.signal_max / da_data.signal_min,
                         signal_range = da_data.signal_max - da_data.signal_min)
da_data

Unnamed: 0,mouse_id,event,action,sensor,sex,day,trial_count,signal_max,signal_min,max_min_ration,signal_range
307,0,cue,avoid,DA,F,1,1,2.287195,-2.712690,-0.843147,4.999885
308,0,cue,avoid,DA,F,1,18,3.183504,-2.207469,-1.442151,5.390972
309,0,cue,avoid,DA,F,1,19,2.618372,-3.175299,-0.824606,5.793671
310,0,cue,avoid,DA,F,1,25,2.474569,-2.778508,-0.890611,5.253077
311,0,cue,avoid,DA,F,1,27,3.101258,-2.721624,-1.139488,5.822883
...,...,...,...,...,...,...,...,...,...,...,...
12513,13,cue,escape,DA,M,6,146,3.589701,-2.390371,-1.501734,5.980072
12514,13,cue,escape,DA,M,6,151,3.327390,-2.113279,-1.574515,5.440669
12515,13,cue,escape,DA,M,6,154,3.171232,-2.922195,-1.085222,6.093427
12516,13,cue,escape,DA,M,6,158,3.521800,-1.935932,-1.819175,5.457732


In [10]:
processor_pipe = (ClassifierPipe(DATA_PATH)
             .read_raw_data()
             .calculate_max_min_signal()
             .calculate_percent_avoid()
             .drop_columns(["event", "action", "trial", "trial_count", "num_avoids", "max_trial"])
             .split_data(test_size=0.3,
                test_dev_size=0.5, 
                split_group = "mouse_id", 
                stratify_group = "sex", 
                target='ratio_avoid',
                save_subject_ids=False)
                # path_to_save =os.path.dirname(raw_path)
            .transorm_data()
)
processor_pipe.X_train

AttributeError: 'ClassifierPipe' object has no attribute 'calculate_percent_avoid'

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    # tf.keras.layers.Dense(256, activation='relu'),
    # tf.keras.layers.Dense(128, activation='relu'),
    # tf.keras.layers.Dense(64, activation='relu'),
    # tf.keras.layers.Dense(32, activation='relu'),
    # tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1)])
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mse')
model.fit(processor_pipe.X_train, processor_pipe.y_train, validation_data=(processor_pipe.X_dev, processor_pipe.y_dev),epochs=10)


In [None]:
plt.plot(model.history.history['loss'])
plt.plot(model.history.history['val_loss'])

In [None]:
predicted_signal = model.predict(processor_pipe.X_test)

In [None]:
num_cols = processor_pipe.processor.named_transformers_["num"].get_feature_names_out().tolist()
cat_cols = processor_pipe.processor.named_transformers_["cat"].get_feature_names_out().tolist()
cols = num_cols + cat_cols

testing_df = (pd.DataFrame(processor_pipe.X_test, columns=cols)
              .assign(predicted_avoid_ratio = predicted_signal, 
                      true_avoid_ratio = processor_pipe.y_test.values.reshape(-1,1)
                      )
)
dopamine = testing_df.query("sensor_DA ==1").drop(columns = ["sex_M"]).drop_duplicates(subset = ["mouse_id", "day"])
sns.scatterplot(data=dopamine, x="day", y="true_avoid_ratio")
sns.scatterplot(data=dopamine, x="day", y="predicted_avoid_ratio", alpha=0.3)

In [None]:
dopamine.sort_values("day")

In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error

xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, max_depth = 3, learning_rate = 0.1)
xgb_model.fit(processor_pipe.X_train, processor_pipe.y_train)
xg_pred = xgb_model.predict(processor_pipe.X_test)


In [None]:
testing_df = testing_df.assign(xg_pred_avoid_ratio = xg_pred)
dopamine = testing_df.query("sensor_DA ==1").drop(columns = ["sex_M"]).drop_duplicates(subset = ["mouse_id", "day"])
sns.scatterplot(data=dopamine, x="day", y="true_avoid_ratio")
# sns.scatterplot(data=dopamine, x="day", y="predicted_avoid_ratio", alpha=0.3)
sns.scatterplot(data=dopamine, x="day", y="xg_pred_avoid_ratio", alpha=0.5)

In [None]:
testing_df