    Train data from first 2 sessions
    -Genuine class (1)
    samples of VEP class of user 1
    -Forged class (0)
    samples of non-VEP of users other than user 1
    i.e. remaining 19 users (other than user 1)

    Test data from third session:
    -Genuine class (1)
    samples of VEP class of user 1
    -Forged class (0)
    samples of non-VEP of all other users i.e. remaining 19 users (other than user 1)

In [149]:
# Import necessary libraries
import torch
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
from sklearn.preprocessing import normalize
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.utils import shuffle


In [150]:

# Set random seeds for reproducibility
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [None]:
target_subject = 'S20'

all_subjects = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6','S7', 'S8', 'S9', 'S10','S11', 'S12', 'S13', 'S14', 'S15', 'S16','S17', 'S18', 'S19', 'S20']

tasks_non_vep = ['REO', 'MI1', 'MM1', 'MI2', 'MM2', 'MI3', 'MM3', 'MI4', 'REC']

training_sessions = ['S1', 'S2']

test_session = ['S3']

path = 'PATH_TO_DATA'

lookback = 100

#batch_size = 32
batch_size = 32

num_classes = 2

input_channels = 20

sequence_length = 100

#hidden_size = 128
hidden_size = 200

#num_epochs = 40
num_epochs = 60

num_folds = 10



In [152]:

# Function to create dataset
def create_dataset(df, lookback, label):
    df = df.to_numpy()
    X, y = [], []
    for i in range(0, len(df) - lookback + 1, lookback):
        X.append(df[i:i + lookback])
        y.append(label)
    return np.array(X), np.array(y)


# Making Training data

## For genuine user

In [153]:
Data_VEP = pd.DataFrame()
for session in training_sessions:
    vep_path = os.path.join(path, target_subject, 'VEP3', f'{session}.csv' )
    vep_data = pd.read_csv(vep_path)
    Data_VEP = pd.concat([Data_VEP, vep_data], ignore_index=True)

Data_VEP.drop(['time'], axis=1, inplace=True)
normalize_vep = normalize(Data_VEP, norm='max', axis=0)
Data_VEP = pd.DataFrame(normalize_vep, columns=Data_VEP.columns)

In [154]:
Data_VEP.shape

(56320, 20)

In [155]:
num_vep_samples_train = len(Data_VEP)
num_vep_samples_train

56320

## For Forged user

In [156]:
Data_non_VEP = pd.DataFrame()
for sub in all_subjects:
    if sub == target_subject:
        continue
    for task in tasks_non_vep:
        for session in training_sessions:
            non_vep_path = os.path.join(path, sub, task, f'{session}.csv')
            non_vep_data = pd.read_csv(non_vep_path)
            Data_non_VEP = pd.concat([Data_non_VEP, non_vep_data], ignore_index=True)

Data_non_VEP.drop(['time'], axis=1, inplace=True)
normalize_non_vep = normalize(Data_non_VEP, norm='max', axis=0)
Data_non_VEP = pd.DataFrame(normalize_non_vep, columns=Data_non_VEP.columns)
                 

In [159]:
Data_non_VEP_sampled = Data_non_VEP.sample(n=num_vep_samples_train, random_state=987)

In [161]:
X_vep_train, y_vep_train = create_dataset(Data_VEP, lookback, label=1)
X_non_vep_train, y_non_vep_train = create_dataset(Data_non_VEP_sampled, lookback, label=0)

In [162]:
X_vep_train.shape, y_vep_train.shape, X_non_vep_train.shape, y_non_vep_train.shape

((563, 100, 20), (563,), (563, 100, 20), (563,))

In [163]:
X_train = np.concatenate((X_non_vep_train, X_vep_train), axis=0)
y_train = np.concatenate((y_non_vep_train, y_vep_train), axis=0)

In [164]:
X_train.shape, y_train.shape

((1126, 100, 20), (1126,))

# Making Test Data

## For Genuine User

In [165]:
Data_VEP_test = pd.DataFrame()
for session in test_session:
    vep_path = os.path.join(path, target_subject, 'VEP3', f'{session}.csv' )
    vep_data_test = pd.read_csv(vep_path)
    Data_VEP_test = pd.concat([Data_VEP_test, vep_data_test], ignore_index=True)

Data_VEP_test.drop(['time'], axis=1, inplace=True)
normalize_vep_test = normalize(Data_VEP_test, norm='max', axis=0)
Data_VEP_test = pd.DataFrame(normalize_vep_test, columns=Data_VEP_test.columns)

In [166]:
Data_VEP_test.shape

(28160, 20)

In [167]:
num_vep_samples_test = len(Data_VEP_test)
num_vep_samples_test

28160

## For Forged user

In [168]:
Data_non_VEP_test = pd.DataFrame()
for sub in all_subjects:
    if sub == target_subject:
        continue
    for task in tasks_non_vep:
        for session in test_session:
            non_vep_path = os.path.join(path, sub, task, f'{session}.csv')
            non_vep_data_test = pd.read_csv(non_vep_path)
            Data_non_VEP_test = pd.concat([Data_non_VEP_test, non_vep_data_test], ignore_index=True)

Data_non_VEP_test.drop(['time'], axis=1, inplace=True)
normalize_non_vep_test = normalize(Data_non_VEP_test, norm='max', axis=0)
Data_non_VEP_test = pd.DataFrame(normalize_non_vep_test, columns=Data_non_VEP_test.columns)
                 

In [169]:
Data_non_VEP_test.shape

(2188800, 20)

In [170]:
# Data_non_VEP_sampled_test = Data_non_VEP_test.sample(n=num_vep_samples_test, random_state=987)

In [171]:
#Data_non_VEP_sampled_test.shape

In [172]:
X_vep_test, y_vep_test = create_dataset(Data_VEP_test, lookback, label=1)
X_non_vep_test, y_non_vep_test = create_dataset(Data_non_VEP_test, lookback, label=0)

In [173]:
X_vep_test.shape, y_vep_test.shape, X_non_vep_test.shape, y_non_vep_test.shape

((281, 100, 20), (281,), (21888, 100, 20), (21888,))

In [174]:
X_test = np.concatenate((X_non_vep_test, X_vep_test), axis=0)
y_test = np.concatenate((y_non_vep_test, y_vep_test), axis=0)

In [175]:
X_test.shape, y_test.shape

((22169, 100, 20), (22169,))

## Shape of train and test data

In [176]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1126, 100, 20), (1126,), (22169, 100, 20), (22169,))

In [177]:
def shuffle_data(X, y):
    indices = np.random.permutation(len(X))
    return X[indices], y[indices]

In [178]:
X_train, y_train = shuffle_data(X_train, y_train)
X_test, y_test = shuffle_data(X_test, y_test)

In [179]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1126, 100, 20), (1126,), (22169, 100, 20), (22169,))

In [180]:
# converting into tensors
X_train, y_train = torch.tensor(X_train).float(), torch.tensor(y_train).long()
X_test, y_test = torch.tensor(X_test).float(), torch.tensor(y_test).long()

In [181]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

(torch.Size([1126, 100, 20]),
 torch.Size([1126]),
 torch.Size([22169, 100, 20]),
 torch.Size([22169]))

### Defining Model

In [182]:

class EEGCNN_GRU(nn.Module):
    def __init__(self, input_channels, sequence_length, hidden_size, num_classes):
        super(EEGCNN_GRU, self).__init__()
        self.conv1 = nn.Conv1d(input_channels, 64, 3)
        self.bn1 = nn.BatchNorm1d(64)
        self.conv2 = nn.Conv1d(64, 64, 3)
        self.bn2 = nn.BatchNorm1d(64)
        self.conv3 = nn.Conv1d(64, 64, 3)
        self.bn3 = nn.BatchNorm1d(64)
        self.dropout1 = nn.Dropout(0.5)
        self.maxpool = nn.MaxPool1d(2)
        self.gru = nn.GRU(64, hidden_size, batch_first=True)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = F.elu(self.bn1(self.conv1(x)))
        x = F.elu(self.bn2(self.conv2(x)))
        x = F.elu(self.bn3(self.conv3(x)))
        x = self.dropout1(x)
        x = self.maxpool(x)
        x = x.permute(0, 2, 1)
        x, _ = self.gru(x)
        x = self.dropout2(x)
        x = x[:, -1, :]
        x = F.elu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)
        return x


In [183]:
# Training loop
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total

    return train_loss, train_accuracy


In [184]:
# Validation loop
def validate(model, val_loader, criterion, device):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels) 
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * correct / total

    return val_loss, val_accuracy


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = EEGCNN_GRU(input_channels, sequence_length, hidden_size, num_classes).to(device)
criterion = torch.nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training metrics and variables
best_val_loss = float('inf')
best_model_state_dict = None
train_accuracies, val_accuracies = [], []
train_losses, val_losses = [], []

    # K-fold cross-validation within the training set
kf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)
for fold, (train_index, val_index) in enumerate(kf.split(X_train, y_train)):
    X_train_fold, y_train_fold = X_train[train_index], y_train[train_index]
    X_val_fold, y_val_fold = X_train[val_index], y_train[val_index]

    train_loader_fold = DataLoader(TensorDataset(X_train_fold, y_train_fold), batch_size=batch_size, shuffle=True)
    val_loader_fold = DataLoader(TensorDataset(X_val_fold, y_val_fold), batch_size=batch_size, shuffle=False)

    for epoch in range(num_epochs):
        train_loss, train_accuracy = train(model, train_loader_fold, optimizer, criterion, device)
        val_loss, val_accuracy = validate(model, val_loader_fold, criterion, device)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_accuracy)
        val_accuracies.append(val_accuracy)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state_dict = model.state_dict()

avg_train_accuracy = np.mean(train_accuracies)
avg_val_accuracy = np.mean(val_accuracies)
avg_train_loss = np.mean(train_losses)
avg_val_loss = np.mean(val_losses)

    # Plot and save training/validation accuracies and losses
os.makedirs(f"results_{target_subject}", exist_ok=True)

plt.figure(figsize=(10, 5))
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy')
plt.savefig(f"results_{target_subject}/accuracy_curve_subject_{target_subject}.png")
plt.close()

plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.savefig(f"results_{target_subject}/loss_curve_subject_{target_subject}.png")
plt.close()

    # Load best model for testing
model.load_state_dict(best_model_state_dict)
torch.save(model.state_dict(), f'results_{target_subject}/{target_subject}_model.pth')
model.eval()
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size, shuffle=False)
correct, total = 0, 0
predicted_labels, true_labels, predicted_probs_list = [], [], []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        predicted_labels.extend(predicted.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
        predicted_probs = F.softmax(outputs, dim=1)
        predicted_probs_list.append(predicted_probs.cpu().numpy())
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

predicted_probs_np = np.concatenate(predicted_probs_list)
test_accuracy = 100 * correct / total

# Metrics calculations
acc = accuracy_score(true_labels, predicted_labels)
prec = precision_score(true_labels, predicted_labels)
rec = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

cm = confusion_matrix(true_labels, predicted_labels)
tn, fp, fn, tp = cm.ravel()
tpr = tp / (tp + fn)
fpr = fp / (fp + tn)

far = fp / (fp + tn) if (fp + tn) > 0 else 0  # False Acceptance Rate
frr = fn / (fn + tp) if (fn + tp) > 0 else 0  # False Rejection Rate

# Calculating EER
fpr_curve, tpr_curve, thresholds = roc_curve(true_labels, predicted_probs_np[:, 1])
eer_threshold = thresholds[np.nanargmin(np.abs(fpr_curve - (1 - tpr_curve)))]
eer = fpr_curve[np.nanargmin(np.abs(fpr_curve - (1 - tpr_curve)))]

auc = roc_auc_score(true_labels, predicted_probs_np[:, 1])

# Saving metrics as CSV
metrics = {
    "avg_train_accuracy": avg_train_accuracy,
    "avg_val_accuracy": avg_val_accuracy,
    "avg_train_loss": avg_train_loss,
    "avg_val_loss": avg_val_loss,
    "test_accuracy": test_accuracy,
    "precision": prec,
    "recall": rec,
    "f1_score": f1,
    "TPR": tpr,
    "FPR": fpr,
    "AUC": auc,
    "EER": eer,
    "FAR": far,
    "FRR": frr
}

metrics_df = pd.DataFrame([metrics])
metrics_df.to_csv(f"results_{target_subject}/metrics_subject_{target_subject}.csv", index=False)

# Saving Confusion Matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, annot_kws={"size": 35})
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.savefig(f"results_{target_subject}/confusion_matrix_subject_{target_subject}.png")
plt.close()

# Saving ROC Curve
plt.figure()
plt.plot(fpr_curve, tpr_curve, label=f"ROC Curve (AUC = {auc:.2f})")
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.savefig(f"results_{target_subject}/roc_curve_subject_{target_subject}.png")
plt.close()

print(f"Completed processing for subject {target_subject}.")


Completed processing for subject S20.


In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, 
    confusion_matrix, roc_auc_score, roc_curve
)

# Sample metric calculations
acc = accuracy_score(true_labels, predicted_labels)
prec = precision_score(true_labels, predicted_labels)
rec = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

# Confusion matrix and additional metric calculations
cm = confusion_matrix(true_labels, predicted_labels)
tn, fp, fn, tp = cm.ravel()
tpr = tp / (tp + fn) if (tp + fn) > 0 else 0  # True Positive Rate
fpr = fp / (fp + tn) if (fp + tn) > 0 else 0  # False Positive Rate

# False Acceptance Rate and False Rejection Rate
far = fp / (fp + tn) if (fp + tn) > 0 else 0
frr = fn / (fn + tp) if (fn + tp) > 0 else 0

# ROC Curve and EER calculation
predicted_probs = predicted_probs_np[:, 1]  # Assuming second column for positive class probability
fpr_curve, tpr_curve, thresholds = roc_curve(true_labels, predicted_probs)
auc = roc_auc_score(true_labels, predicted_probs)

# Calculate EER (Equal Error Rate)
eer_index = np.nanargmin(np.abs(fpr_curve - (1 - tpr_curve)))
eer = fpr_curve[eer_index]
eer_threshold = thresholds[eer_index]

# Save key metrics as CSV
metrics = {
    "avg_train_accuracy": avg_train_accuracy,
    "avg_val_accuracy": avg_val_accuracy,
    "avg_train_loss": avg_train_loss,
    "avg_val_loss": avg_val_loss,
    "test_accuracy": acc,
    "precision": prec,
    "recall": rec,
    "f1_score": f1,
    "TPR": tpr,
    "FPR": fpr,
    "AUC": auc,
    "EER": eer,
    "Threshold at EER": eer_threshold,
    "FAR": far,
    "FRR": frr
}

# Creating DataFrame and saving metrics to CSV
metrics_df = pd.DataFrame([metrics])
result_folder = f"results_for_{target_subject}_{batch_size}"
os.makedirs(result_folder, exist_ok=True)
metrics_df.to_csv(f"{result_folder}/metrics_subject_{target_subject}.csv", index=False)

# Save FPR, TPR, and thresholds to CSV for ROC Curve
fpr_tpr_thresh_content = "fpr,tpr,thresholds\n"
for fpr_val, tpr_val, threshold in zip(fpr_curve, tpr_curve, thresholds):
    fpr_tpr_thresh_content += f"{fpr_val},{tpr_val},{threshold}\n"
fpr_tpr_thresh_content += f"\nEER:,{eer},Threshold at EER:,{eer_threshold}\n"

# Write FPR, TPR, and threshold values along with EER to a CSV file
with open(f"{result_folder}/fpr_tpr_thresholds_subject_{target_subject}.csv", "w") as f:
    f.write(fpr_tpr_thresh_content)

print("Metrics and ROC-related information saved successfully.")
