### BIOMETRIC IDENTIFICATION
    Multi-class: Each subject is a unique class
    20 neurons (number of subjects)

    Training data would be from 2 sessions of each subject
    Test data would be from 3rd session of each subject
    The labels would be from 0 to 19 (20 subjects)
    This code is for one task only

In [1]:
# Importing necessary libraries
import torch
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
from sklearn.preprocessing import normalize
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.utils import shuffle


In [2]:

# Setting random seeds for reproducibility
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [None]:
all_subjects = ['S1', 'S2', 'S3', 'S4', 'S5',
                 'S6','S7', 'S8', 'S9', 'S10',
                 'S11', 'S12', 'S13', 'S14', 'S15',
                   'S16','S17', 'S18', 'S19', 'S20']

training_sessions = ['S1', 'S2']
test_session = ['S3']
task = 'MI2'

lookback = 100
path = 'give the path here'

batch_size = 32

num_classes = 20

input_channels = 20

sequence_length = 100

#hidden_size = 128
hidden_size = 200

#num_epochs = 40
num_epochs = 60

num_folds = 10



In [4]:

# Function to create dataset
def create_dataset(df, lookback, label):
    df = df.to_numpy()
    X, y = [], []
    for i in range(0, len(df) - lookback + 1, lookback):
        X.append(df[i:i + lookback])
        y.append(label)
    return np.array(X), np.array(y)


In [None]:

X_train_all, y_train_all = [], []
X_test_all, y_test_all = [], []
print("Subjects to process:", all_subjects)

for idx, subject in enumerate(all_subjects):
    #print(f"Processing subject: {subject}")
    # Loading training data
    df_train = pd.DataFrame()
    df_test = pd.DataFrame()


    for session in training_sessions:
        file_path = os.path.join(path, subject, task, f'{session}.csv')
        df = pd.read_csv(file_path)
        df_train = pd.concat([df_train, df], ignore_index=True)
    
    if 'time' in df_train.columns:
        df_train.drop(['time'], axis=1, inplace=True)
    
    df_train_N = normalize(df_train, norm='max', axis=0)
    X_tr, y_tr = create_dataset(pd.DataFrame(df_train_N), lookback, label=idx)
    X_train_all.append(X_tr)
    y_train_all.append(y_tr)
    
    for session in test_session:
        file_path = os.path.join(path, subject, task, f'{session}.csv')
        df = pd.read_csv(file_path)
        df_test = pd.concat([df_test, df], ignore_index=True)
    
    if 'time' in df_test.columns:
        df_test.drop(['time'], axis=1, inplace=True)
    
    df_test_N = normalize(df_test, norm='max', axis=0)
    X_te, y_te = create_dataset(pd.DataFrame(df_test_N), lookback, label=idx)
    X_test_all.append(X_te)
    y_test_all.append(y_te)



Subjects to process: ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11', 'S12', 'S13', 'S14', 'S15', 'S16', 'S17', 'S18', 'S19', 'S20']


In [None]:
# Combining all training and test data
X_train_all = np.concatenate(X_train_all, axis=0)
X_test_all = np.concatenate(X_test_all, axis=0)
y_train_all = np.concatenate(y_train_all, axis=0)
y_test_all = np.concatenate(y_test_all, axis=0)


In [7]:
print("Train label distribution:", np.bincount(y_train_all))
print("Test label distribution:", np.bincount(y_test_all))


Train label distribution: [256 256 256 256 256 256 256 256 256 256 256 256 256 256 256 256 256 256
 256 256]
Test label distribution: [128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128
 128 128]


In [8]:
print("Final shapes:")
print("X_train_all:", X_train_all.shape)
print("y_train_all:", y_train_all.shape)
print("X_test_all:", X_test_all.shape)
print("y_test_all:", y_test_all.shape)


Final shapes:
X_train_all: (5120, 100, 20)
y_train_all: (5120,)
X_test_all: (2560, 100, 20)
y_test_all: (2560,)


In [None]:


# Convert to torch tensors
X_train = torch.tensor(X_train_all).float()
y_train = torch.tensor(y_train_all).long()
X_test = torch.tensor(X_test_all).float()
y_test = torch.tensor(y_test_all).long()


In [10]:
### Printing the shapes of the datasets
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")


X_train shape: torch.Size([5120, 100, 20])
y_train shape: torch.Size([5120])
X_test shape: torch.Size([2560, 100, 20])
y_test shape: torch.Size([2560])


In [11]:
# Training loop
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total

    return train_loss, train_accuracy


In [12]:
# Validation loop
def validate(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels) 
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * correct / total

    return val_loss, val_accuracy


In [13]:

class EEGCNN_GRU(nn.Module):
    def __init__(self, input_channels, sequence_length, hidden_size, num_classes):
        super(EEGCNN_GRU, self).__init__()
        self.conv1 = nn.Conv1d(input_channels, 64, 3)
        self.bn1 = nn.BatchNorm1d(64)
        # self.conv2 = nn.Conv1d(64, 64, 3)
        # self.bn2 = nn.BatchNorm1d(64)
        # self.conv3 = nn.Conv1d(64, 64, 3)
        # self.bn3 = nn.BatchNorm1d(64)
        self.dropout1 = nn.Dropout(0.5)
        self.maxpool = nn.MaxPool1d(2)
        self.gru = nn.GRU(64, hidden_size, batch_first=True, bidirectional=True)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(2*hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # print(x.shape)
        x = x.permute(0, 2, 1)
        # print(x.shape)
        x = F.elu(self.bn1(self.conv1(x)))
        # print(x.shape)
        # x = F.elu(self.bn2(self.conv2(x)))
        # x = F.elu(self.bn3(self.conv3(x)))
        x = self.dropout1(x)
        x = self.maxpool(x)
        x = x.permute(0, 2, 1)
        x, _ = self.gru(x)
        x = self.dropout2(x)
        x = x[:, -1, :]
        x = F.elu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)
        return x


In [14]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = EEGCNN_GRU(input_channels, sequence_length, hidden_size, num_classes).to(device)
criterion = torch.nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training metrics and variables
best_val_loss = float('inf')
best_model_state_dict = None
train_accuracies, val_accuracies = [], []
train_losses, val_losses = [], []

    # K-fold cross-validation within the training set
kf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)
for fold, (train_index, val_index) in enumerate(kf.split(X_train, y_train)):
    X_train_fold, y_train_fold = X_train[train_index], y_train[train_index]
    X_val_fold, y_val_fold = X_train[val_index], y_train[val_index]

    train_loader_fold = DataLoader(TensorDataset(X_train_fold, y_train_fold), batch_size=batch_size, shuffle=True)
    val_loader_fold = DataLoader(TensorDataset(X_val_fold, y_val_fold), batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs):
        train_loss, train_accuracy = train(model, train_loader_fold, optimizer, criterion, device)
        val_loss, val_accuracy = validate(model, val_loader_fold, criterion, device)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_accuracy)
        val_accuracies.append(val_accuracy)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state_dict = model.state_dict()

avg_train_accuracy = np.mean(train_accuracies)
avg_val_accuracy = np.mean(val_accuracies)
avg_train_loss = np.mean(train_losses)
avg_val_loss = np.mean(val_losses)

# Plot and save training/validation accuracies and losses
os.makedirs(f"results_identification_{task}", exist_ok=True)

plt.figure(figsize=(10, 5))
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy')
plt.savefig(f"results_identification_{task}/accuracy_curve.png")
plt.close()

plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.savefig(f"results_identification_{task}/loss_curve_subject.png")
plt.close()


In [15]:
# Load best model for testing
model.load_state_dict(best_model_state_dict)
torch.save(model.state_dict(), f'results_identification_{task}/{task}_model.pth')
model.eval()
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=False)


In [None]:

# --- Evaluate model ---
all_preds, all_labels = [], []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(y_batch.numpy())

# --- Evaluation metrics ---
accuracy = np.mean(np.array(all_preds) == np.array(all_labels))
print(f"\nIdentification Accuracy: {accuracy * 100:.2f}%\n")
precision = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
recall = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)



In [None]:
metrics = {
    "avg_train_accuracy": avg_train_accuracy,
    "avg_val_accuracy": avg_val_accuracy,
    "avg_train_loss": avg_train_loss,
    "avg_val_loss": avg_val_loss,
    "test_accuracy": accuracy,
    "precision": precision,
    "recall": recall,
    "f1_score": f1
}

metrics_df = pd.DataFrame([metrics])
metrics_df.to_csv(f'results_identification_{task}/{task}_metrics.csv', index=False)



In [None]:

target_names = [f"S{i+1}" for i in range(20)]
# --- Confusion Matrix ---
conf_mat = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(12, 10))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues', xticklabels=target_names, yticklabels=target_names)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix - Subject Identification")
plt.tight_layout()
plt.savefig(f"results_identification_{task}/{task}_cf.png")


In [19]:
from sklearn.metrics import classification_report

report = classification_report(all_labels, all_preds, labels=list(range(20)), target_names=target_names, zero_division=0, output_dict=True)
# --- Save Per-Class Metrics to CSV ---
df_class_report = pd.DataFrame(report).transpose()
df_class_report.to_csv(f"results_identification_{task}/{task}_per_class_metrics.csv")

print("Overall metrics saved to 'overall_metrics.csv'")
print("Per-class metrics saved to 'per_class_metrics.csv'")

Overall metrics saved to 'overall_metrics.csv'
Per-class metrics saved to 'per_class_metrics.csv'
