# Miniproject - Neuroscience: cellular and circuit mechanisms (BIO-482)

- This notebook produces the figures for **part 4** of the miniproject.

#### Importing libraries

In [89]:
import os
import sys
import numpy as np
import pandas as pd
import scipy.stats
#%matplotlib widget
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#### Importing helper functions
 Feel free to check out what these functions do in the corresponding files, `helpers.py` or `utils.py`. 
 - `helpers.py` contains functions to analyze membrane potential recording data
 - `utils.py` contains functions useful for other things e.g. plotting

In [2]:
base_path = os.getcwd()
base_path = base_path.replace('notebooks', 'scripts') # note: if you have notebooks twice in your base_path, this won't work
sys.path.insert(1, base_path)

from helpers import *
from utils import remove_top_right_frame, jitter_scatterplot

# Test

# Load data
Load the data as `.pkl` file (saved from MATLAB):

In [37]:
# Make paths
main_dir = r"C:\Users\Gauthier Huguelet\OneDrive - epfl.ch\EPFL\MA1\Cell and circuit mech of brain fct\miniproject\BIO482_MiniProject_2025_Python" # EDIT the path !!!!!!!
print('Main working directory:', main_dir)
figure_path = os.path.join(main_dir, 'Figures')
os.makedirs(figure_path, exist_ok=True)
data_path = os.path.join(main_dir, 'Data') #if your folder is organized differently, just specify the full path to the .mat file
results_path = os.path.join(main_dir, 'Results')
fname = 'data_bio482.pkl'
data_df = pd.read_pickle(os.path.join(data_path, fname))

Main working directory: C:\Users\Gauthier Huguelet\OneDrive - epfl.ch\EPFL\MA1\Cell and circuit mech of brain fct\miniproject\BIO482_MiniProject_2025_Python


In [10]:
data_df.head()

Unnamed: 0,Cell_APThreshold_Slope,Cell_Anatomy,Cell_Counter,Cell_Depth,Cell_ID,Cell_Layer,Cell_TargetedBrainArea,Cell_Type,Cell_tdTomatoExpressing,Mouse_DateOfBirth,...,Sweep_Counter,Sweep_MembranePotential,Sweep_MembranePotential_SamplingRate,Sweep_PassiveContactTimes,Sweep_QuietTimes,Sweep_StartTime,Sweep_Type,Sweep_WhiskerAngle,Sweep_WhiskerAngle_SamplingRate,Sweep_WhiskingTimes
0,10.0,L2/3;C2,1.0,229.0,SC901_1,L2/3,C2,EXC,False,,...,1.0,"[-0.044009375, -0.044028125, -0.0439875, -0.04...",20000.0,"[[4.567, 4.713], [7.327, 7.519], [14.481, 14.6...","[[0.002, 4.564], [4.606, 6.09], [14.634, 15.52...",2005.0,active touch,"[2.740000000000009, 2.740000000000009, 2.74000...",100.0,"[[6.118, 6.326], [7.942, 13.856]]"
1,10.0,L2/3;C2,1.0,229.0,SC901_1,L2/3,C2,EXC,False,,...,2.0,"[-0.046275, -0.04629375, -0.046259375, -0.0462...",20000.0,"[[15.274999999999999, 15.375], [16.247, 16.643...","[[0.002, 4.496], [5.558, 6.626], [13.406, 15.2...",2005.0,active touch,"[2.680000000000007, 2.680000000000007, 2.68000...",100.0,"[[4.7, 5.196], [6.696, 11.15], [12.032, 13.006..."
2,10.0,L2/3;C2,1.0,229.0,SC901_1,L2/3,C2,EXC,False,,...,3.0,"[-0.041896875, -0.0419125, -0.041909375, -0.04...",20000.0,"[[4.347000000000001, 7.0390000000000015], [8.2...","[[0.002, 2.128], [6.252, 7.01], [7.15, 20.0]]",2005.0,passive contact,"[1.4958937492820894, 1.5019819900587095, 1.509...",100.0,"[[2.15, 3.652], [4.362, 5.848]]"
3,10.0,L2/3;C2,1.0,229.0,SC901_1,L2/3,C2,EXC,False,,...,4.0,"[-0.04655625, -0.0465875, -0.046575, -0.046596...",20000.0,,"[[0.002, 2.764], [12.03, 14.464], [14.814, 15....",2005.0,active touch,"[2.467493802679826, 2.459711310964792, 2.46562...",100.0,"[[2.766, 12.026], [15.626, 18.406], [18.814, 1..."
4,10.0,L2/3;C2,1.0,229.0,SC901_1,L2/3,C2,EXC,False,,...,5.0,"[-0.047296875, -0.047284375, -0.047265625, -0....",20000.0,"[[4.411000000000001, 4.4809999999999945], [5.2...","[[0.002, 3.36], [3.57, 4.424], [4.48, 5.224], ...",2005.0,active touch,"[2.4399999999999977, 2.4399999999999977, 2.440...",100.0,"[[5.228, 5.624], [7.462, 8.022], [11.002, 15.2..."


In [32]:

cell_list = sorted(np.unique(data_df.Cell_ID), reverse=True)
print(np.size(cell_list))

240


In [51]:
r_p1name = 'Part1_Results.csv'
results_p1 = pd.read_csv(os.path.join(results_path,r_p1name),sep=';',index_col=0)
r_p2name = 'Part2_Results.csv'
results_p2 = pd.read_csv(os.path.join(results_path,r_p2name),sep=';',index_col=0)
r_p3name = 'Part3_Results.csv'
results_p3 = pd.read_csv(os.path.join(results_path,r_p3name),sep=';',index_col=0)

In [50]:
results_p1.head()

Unnamed: 0,cell_id,cell_type,firing_rate,ap_threshold,ap_duration,mean_vm,std_vm,fft_low,fft_high
0,AP032_1,PV,29.116667,-44.05542,0.390398,-55.179827,4.857357,0.64083,0.094975
1,AP032_2,PV,18.75,-40.9051,0.532933,-49.469576,5.034024,0.654376,0.104321
2,AP035_1,PV,20.5,-44.699736,0.513374,-55.808342,6.227139,0.842114,0.106274
3,AP035_2,PV,33.46,-42.586909,0.423192,-51.302675,6.347127,0.867482,0.089732
4,AP039_1,SST,0.4,-42.144056,0.6375,-58.333904,2.841876,0.33828,0.028239


# Preprocessing

In [90]:
#remove Nana values
results_p1.dropna(axis=0,inplace=True)
#create features matrix and label matrix
feature_matrix = results_p1.drop(columns=['cell_type','cell_id'])
label_matrix = results_p1['cell_type']
# create train/test split
X_train, X_test, y_train, y_test = train_test_split(
    feature_matrix,label_matrix,test_size = 0.3,stratify = label_matrix, random_state=42
    )
# standardize based on training data only
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [101]:
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.metrics import (
    accuracy_score,
    balanced_accuracy_score,
    confusion_matrix,
    classification_report
)
#create the models : 

Rfc = RandomForestClassifier(
    random_state =42
    )
Gbc = GradientBoostingClassifier(
    random_state= 42
)
#Fit the models :
Rfc.fit(X_train,y_train)
Gbc.fit(X_train,y_train)

#print out scores :
print("RandomForestClassifier score : ",Rfc.score(X_test,y_test))
print("GradientBoostingClassifier score : ",Gbc.score(X_test,y_test))



RandomForestClassifier score :  0.8695652173913043
GradientBoostingClassifier score :  0.855072463768116


# Evaluation Metrics
- Overall accuracy
- Per-class precision, recall, F1
- Confusion Matrix
- Balanced accuracy
- CV 5-fold

In [102]:
#more in depth metrics : 
# Predictions
y_pred_rfc = Rfc.predict(X_test)
y_pred_gbc = Gbc.predict(X_test)

print("RandomForest - accuracy:", accuracy_score(y_test, y_pred_rfc))
print("RandomForest - balanced accuracy:", balanced_accuracy_score(y_test, y_pred_rfc))
print("\nRandomForest - classification report:\n", classification_report(y_test, y_pred_rfc))

print("\nGradientBoosting - accuracy:", accuracy_score(y_test, y_pred_gbc))
print("GradientBoosting - balanced accuracy:", balanced_accuracy_score(y_test, y_pred_gbc))
print("\nGradientBoosting - classification report:\n", classification_report(y_test, y_pred_gbc))

print("\nRandomForest - confusion matrix:\n", confusion_matrix(y_test, y_pred_rfc))
print("\nGradientBoosting - confusion matrix:\n", confusion_matrix(y_test, y_pred_gbc))

RandomForest - accuracy: 0.8695652173913043
RandomForest - balanced accuracy: 0.8048268398268399

RandomForest - classification report:
               precision    recall  f1-score   support

         EXC       0.96      0.96      0.96        25
          PV       0.92      0.73      0.81        15
         SST       0.78      0.95      0.86        22
         VIP       0.80      0.57      0.67         7

    accuracy                           0.87        69
   macro avg       0.86      0.80      0.82        69
weighted avg       0.88      0.87      0.87        69


GradientBoosting - accuracy: 0.855072463768116
GradientBoosting - balanced accuracy: 0.8001298701298702

GradientBoosting - classification report:
               precision    recall  f1-score   support

         EXC       0.96      0.92      0.94        25
          PV       0.86      0.80      0.83        15
         SST       0.83      0.91      0.87        22
         VIP       0.57      0.57      0.57         7

    acc