In [4]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import confusion_matrix, classification_report, f1_score, roc_curve, auc
from sklearn.model_selection import KFold, cross_val_score
import os

#file_path = r".\4tone_cell\4T1006.mat"
file_path = r"Z:\projects\trial_classification\4tone_cell\4T1005.mat"

file = h5py.File(file_path, 'r')
filename = os.path.basename(file.filename)
print("Keys in file:", list(file.keys()))

ffr_nodss = file["ffr_nodss"][:]  
labels_ref = file["labels"][:]
t = file["time"][:].flatten()

labels = np.array([
    int(file[ref][()].tobytes().decode('ascii').strip('\x00'))
    for ref in labels_ref[0]
])


Keys in file: ['#refs#', 'ffr_dss', 'ffr_nodss', 'labels', 'time']


In [5]:
xmin, xmax = 50, 250

xmin_ind = np.argmin(np.abs(t - xmin))   
xmax_ind = np.argmin(np.abs(t - xmax))   


ffr_trimmed = ffr_nodss[:, xmin_ind:xmax_ind]
t_trimmed = t[xmin_ind:xmax_ind]

print(f"Trimmed shape: {ffr_trimmed.shape}, Time range: {t_trimmed[0]} – {t_trimmed[-1]}")


df = pd.DataFrame(ffr_trimmed)
df['label'] = labels
df.columns = [f't{t_i}' for t_i in range(ffr_trimmed.shape[1])] + ['label']

group_size = 50
num_groups = len(df) // group_size

X_avg = np.array([
    df.iloc[i*group_size:(i+1)*group_size, :-1].mean(axis=0).values
    for i in range(num_groups)
])
y_avg = np.array([
    df.iloc[i*group_size:(i+1)*group_size]['label'].mode()[0]
    for i in range(num_groups)
])

print("After averaging:", X_avg.shape, y_avg.shape)



Trimmed shape: (3093, 3277), Time range: 49.98779296875 – 249.93896484375
After averaging: (61, 3277) (61,)


In [6]:
from sklearn.pipeline import make_pipeline

lda_pipeline = make_pipeline(StandardScaler(), LinearDiscriminantAnalysis())

kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(lda_pipeline, X_avg, y_avg, cv=kf)
print(f"\nCross-validation accuracies: {scores}")
print(f"Mean CV Accuracy: {np.mean(scores):.3f}")

lda_pipeline.fit(X_avg, y_avg)
y_pred = lda_pipeline.predict(X_avg)

print("\nClassification Report:\n", classification_report(y_avg, y_pred))
print("Macro F1-score:", f1_score(y_avg, y_pred, average='macro'))





Cross-validation accuracies: [0.84615385 0.66666667 0.83333333 0.83333333 0.91666667]
Mean CV Accuracy: 0.819

Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        15
           3       1.00      1.00      1.00        11
           4       1.00      1.00      1.00        21

    accuracy                           1.00        61
   macro avg       1.00      1.00      1.00        61
weighted avg       1.00      1.00      1.00        61

Macro F1-score: 1.0


In [None]:
cm = confusion_matrix(y_avg, y_pred)
print("\nConfusion Matrix (50Trial):\n", cm)

plt.figure(figsize=(5,5))
plt.imshow(cm, cmap='Blues')
plt.title("Confusion Matrix 50 trial")
plt.xlabel("Predicted")
plt.ylabel("True")
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, str(cm[i,j]), ha='center', va='center', color='black')
plt.show()

In [None]:
classes = np.unique(y_avg)
y_bin = label_binarize(y_avg, classes=classes)
y_score = lda_pipeline.predict_proba(X_avg)  
plt.figure(figsize=(6,5))
for i, cls in enumerate(classes):
    fpr, tpr, _ = roc_curve(y_bin[:, i], y_score[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'Class {cls} (AUC = {roc_auc:.2f})')

plt.plot([0,1],[0,1],'k--', lw=1)  # diagonal line
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve (50 trials avg)')
plt.legend()
plt.grid(True)
plt.show()