1. Environment Setup & Dependencies


In [1]:
# Install dependencies (only needs to be run once)
%pip install numpy pandas mne tensorflow scikit-learn

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from sklearn.utils import shuffle
from sklearn.metrics import roc_curve
import mne

# Silence TF Warnings
tf.get_logger().setLevel('ERROR')

[0mCollecting scikit-learn
[0m  Downloading scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m821.0 kB/s[0m  [33m0:00:11[0m eta [36m0:00:01[0m
[?25hDownloading joblib-1.5.2-py3-none-any.whl (308 kB)
Downloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [scikit-learn][0m [scikit-learn]
[1A[2KSuccessfully installed joblib-1.5.2 scikit-learn-1.7.2 threadpoolctl-3.6.0
Note: you may need to restart 

2025-11-20 17:09:14.068066: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-20 17:09:14.560521: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-20 17:09:16.587015: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


#2. System Configuration

In [2]:
class Config:
    # Dataset
    DATA_PATH = "eeg_data/" # Local path or download location
    CHANNELS = ['Oz', 'T7', 'Cz'] # The 3 optimal channels 
    SFREQ = 160.0
    
    # Segmentation Parameters (Table 1)
    T = 160          # Window length (samples) -> 1.0 sec
    ETA = 20         # Number of overlapping segments per input image
    DELTA_STRIDE = 4 # Stride between segments (delta)
    
    # The "Sampling Window" F is the total duration required to build one input
    # F = (eta - 1) * delta + T = (19 * 4) + 160 = 236 samples
    F_SAMPLING_WINDOW = 236 
    
    # Data Augmentation Stride (Big Delta)
    AUGMENTATION_STRIDE = 8 # Stride for creating new inputs

    # Training
    BATCH_SIZE = 64     #
    EPOCHS = 30         #
    LR = 0.0001         #
    DROPOUT = 0.25      #

#3. Signal Preprocessing & Feature Engineering

![alt text](<Images/sliding window.webp>)


In [3]:
def gram_schmidt_orthogonalization(data):
    orthogonalized = np.zeros_like(data)
    
    # 1. First channel (Oz)
    v0 = data[0]
    orthogonalized[0] = v0
    
    # 2. Second channel (T7)
    if np.dot(v0, v0) == 0: num, den = 0, 1 
    else: num, den = np.dot(data[1], v0), np.dot(v0, v0)
    v1 = data[1] - (num / den) * v0
    orthogonalized[1] = v1
    
    # 3. Third channel (Cz)
    if np.dot(v0, v0) == 0: n1, d1 = 0, 1
    else: n1, d1 = np.dot(data[2], v0), np.dot(v0, v0)
    
    if np.dot(v1, v1) == 0: n2, d2 = 0, 1
    else: n2, d2 = np.dot(data[2], v1), np.dot(v1, v1)
    
    v2 = data[2] - (n1 / d1) * v0 - (n2 / d2) * v1
    orthogonalized[2] = v2
    
    return orthogonalized

def preprocess_signal(raw_data):
    # Min-Max Normalization
    min_vals = np.min(raw_data, axis=1, keepdims=True)
    max_vals = np.max(raw_data, axis=1, keepdims=True)
    denom = (max_vals - min_vals)
    denom[denom == 0] = 1.0
    normalized = (raw_data - min_vals) / denom
    
    return gram_schmidt_orthogonalization(normalized)

def create_inputs(raw_data):
    n_channels, n_total_samples = raw_data.shape
    inputs = []
    
    start = 0
    while start + Config.F_SAMPLING_WINDOW <= n_total_samples:
        block = raw_data[:, start : start + Config.F_SAMPLING_WINDOW]
        img_segments = []
        for i in range(Config.ETA):
            seg_start = i * Config.DELTA_STRIDE
            seg_end = seg_start + Config.T
            segment = block[:, seg_start:seg_end] 
            img_segments.append(segment.T) 
            
        input_matrix = np.array(img_segments) 
        inputs.append(input_matrix)
        start += Config.AUGMENTATION_STRIDE
        
    return np.array(inputs)

#4. Dataset Integration (MNE & PhysioNet)

In [4]:
def load_dataset(num_subjects=10):
    import mne.datasets.eegbci as eegbci
    
    X_all = []
    y_all = []
    
    print(f"Loading {num_subjects} subjects...")
    
    for subject_id in range(1, num_subjects + 1):
        try:
            # Load data
            path_list = eegbci.load_data(subject_id, [1], path=Config.DATA_PATH, update_path=False)
            if not path_list:
                print(f"  Skipping Subject {subject_id}: Download failed.")
                continue
            path = path_list[0]
            
            raw = mne.io.read_raw_edf(path, preload=True, verbose='ERROR')
            
            # Clean channel names and verify existence
            raw.rename_channels(lambda x: x.strip('.'))
            available_channels = set(raw.ch_names)
            missing = [ch for ch in Config.CHANNELS if ch not in available_channels]
            if missing:
                print(f"  Subject {subject_id} missing channels: {missing}. Skipping.")
                continue
                
            raw.pick(Config.CHANNELS)
            
            # Resample
            if raw.info['sfreq'] != Config.SFREQ:
                raw.resample(Config.SFREQ, verbose='ERROR')
                
            data = raw.get_data()
            processed_data = preprocess_signal(data)
            inputs = create_inputs(processed_data)
            
            if len(inputs) > 0:
                X_all.append(inputs)
                y_all.append(np.full(len(inputs), subject_id - 1)) 
                print(f"  Subject {subject_id}: {len(inputs)} samples loaded.")
                
        except Exception as e:
            print(f"  Failed to load Subject {subject_id}: {e}")

    if not X_all: 
        return None, None
    
    return np.concatenate(X_all), np.concatenate(y_all)

#5. Data Loading & Partitioning

In [5]:
# REQUESTED SIZES
REQ_TRAIN = 10 
REQ_TEST = 4

print("=== Phase 1: Loading Data ===")
X_all, y_all = load_dataset(num_subjects=REQ_TRAIN + REQ_TEST)

if X_all is None: 
    raise ValueError("No data loaded. Please check internet connection or data path.")

# 1. Shuffle immediately
X_all, y_all = shuffle(X_all, y_all, random_state=42)

# 2. Check actual number of loaded subjects
unique_subjects = np.unique(y_all)
num_loaded = len(unique_subjects)
print(f"\n--- DATA STATUS ---")
print(f"Requested: {REQ_TRAIN} Train + {REQ_TEST} Test = {REQ_TRAIN + REQ_TEST}")
print(f"Actually Loaded: {num_loaded} Subjects")

if num_loaded < 2:
    raise ValueError("Need at least 2 subjects to run.")

# 3. DYNAMIC SPLIT LOGIC
if num_loaded < (REQ_TRAIN + REQ_TEST):
    print("Warning: Fewer subjects loaded than requested.")
    N_TEST = 2
    N_TRAIN = num_loaded - N_TEST
    print(f"Adjusting split to: {N_TRAIN} Train, {N_TEST} Test")
else:
    N_TRAIN = REQ_TRAIN
    N_TEST = REQ_TEST

# 4. Apply Split
unique_subjects.sort()
train_ids = unique_subjects[:N_TRAIN]
test_ids = unique_subjects[N_TRAIN : N_TRAIN + N_TEST]

print(f"Training on Subjects: {train_ids}")
print(f"Testing on Subjects: {test_ids}")

train_mask = np.isin(y_all, train_ids)
test_mask = np.isin(y_all, test_ids)

X_train, y_train = X_all[train_mask], y_all[train_mask]
X_test, y_test = X_all[test_mask], y_all[test_mask]

# Remap training labels to 0..N-1
map_lbl = {old: new for new, old in enumerate(train_ids)}
y_train_map = np.array([map_lbl[y] for y in y_train])

=== Phase 1: Loading Data ===
Loading 14 subjects...
Downloading EEGBCI data


Downloading file 'S001/S001R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S001/S001R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.
Failed to download 'S001R01.edf'. Will attempt the download again 2 more times.
Failed to download 'S001R01.edf'. Will attempt the download again 1 more time.


  Failed to load Subject 1: HTTPSConnectionPool(host='physionet.org', port=443): Max retries exceeded with url: /files/eegmmidb/1.0.0/S001/S001R01.edf (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7fa707b01590>: Failed to resolve 'physionet.org' ([Errno -3] Temporary failure in name resolution)"))
Downloading EEGBCI data


Downloading file 'S002/S002R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S002/S002R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.
Failed to download 'S002R01.edf'. Will attempt the download again 2 more times.


Download complete in 19s (1.2 MB)
  Subject 2: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S003/S003R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S003/S003R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 12s (1.2 MB)
  Subject 3: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S004/S004R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S004/S004R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 23s (1.2 MB)
  Subject 4: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S005/S005R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S005/S005R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 05s (1.2 MB)
  Subject 5: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S006/S006R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S006/S006R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 13s (1.2 MB)
  Subject 6: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S007/S007R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S007/S007R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 05s (1.2 MB)
  Subject 7: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S008/S008R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S008/S008R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 05s (1.2 MB)
  Subject 8: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S009/S009R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S009/S009R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 04s (1.2 MB)
  Subject 9: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S010/S010R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S010/S010R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 05s (1.2 MB)
  Subject 10: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S011/S011R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S011/S011R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 10s (1.2 MB)
  Subject 11: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S012/S012R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S012/S012R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 11s (1.2 MB)
  Subject 12: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S013/S013R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S013/S013R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 05s (1.2 MB)
  Subject 13: 1191 samples loaded.
Downloading EEGBCI data


Downloading file 'S014/S014R01.edf' from 'https://physionet.org/files/eegmmidb/1.0.0/S014/S014R01.edf' to '/home/gamal/Projects/EEG-Person-Identification/eeg_data/MNE-eegbci-data/files/eegmmidb/1.0.0'.


Download complete in 12s (1.2 MB)
  Subject 14: 1171 samples loaded.

--- DATA STATUS ---
Requested: 10 Train + 4 Test = 14
Actually Loaded: 13 Subjects
Adjusting split to: 11 Train, 2 Test
Training on Subjects: [ 1  2  3  4  5  6  7  8  9 10 11]
Testing on Subjects: [12 13]


#6. CNN Model Architecture

![alt text](<Images/CNN Model Architecture.webp>)

In [6]:
def build_paper_model(num_classes):
    inputs = layers.Input(shape=(Config.ETA, Config.T, len(Config.CHANNELS)))
    
    x = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    x = layers.Flatten()(x)
    
    # The Fingerprint Layer
    fingerprint = layers.Dense(1024, activation='relu', name="fingerprint_layer")(x)
    x = layers.Dropout(Config.DROPOUT)(fingerprint)
    
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return models.Model(inputs=inputs, outputs=outputs)

#7. Model Training (Proxy Task)

In [None]:
print(f"\n=== Phase 2: Training Proxy Classifier ===")
model = build_paper_model(num_classes=len(train_ids))

model.compile(optimizer=optimizers.RMSprop(learning_rate=Config.LR), 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

history = model.fit(
    X_train, y_train_map, 
    epochs=Config.EPOCHS, 
    batch_size=Config.BATCH_SIZE, 
    validation_split=0.1, 
    verbose=1
)

#8. Authentication & Evaluation

![alt text](<Images/EEG authentication system.webp>)

In [None]:
# Helper Functions for Auth
def cosine_distance(v1, v2):
    epsilon = 1e-10
    v1_n = v1 / (np.linalg.norm(v1) + epsilon)
    v2_n = v2 / (np.linalg.norm(v2) + epsilon)
    return 1.0 - np.dot(v1_n, v2_n)

def find_optimal_threshold(gen_scores, imp_scores):
    if len(gen_scores) == 0 or len(imp_scores) == 0:
        return 0.5
    y_true = [1] * len(gen_scores) + [0] * len(imp_scores)
    y_scores = [-s for s in gen_scores] + [-s for s in imp_scores] 
    try:
        fpr, tpr, thresholds = roc_curve(y_true, y_scores)
        fnr = 1 - tpr
        eer_index = np.nanargmin(np.abs(fnr - fpr))
        return -thresholds[eer_index]
    except:
        return 0.5

# Execution
print("\n=== Phase 3: Extracting Fingerprinter ===")
fingerprint_model = models.Model(inputs=model.input, outputs=model.get_layer("fingerprint_layer").output)

print("\n=== Phase 4: Universal Authentication & Threshold Tuning ===")
if len(test_ids) >= 2:
    user_a = test_ids[0]
    user_b = test_ids[1]
    
    print(f"Scenario: Genuine User {user_a} vs Impostor User {user_b}")
    
    data_a = X_test[y_test == user_a]
    data_b = X_test[y_test == user_b]
    
    # Split A into Enrollment (50%) and Probe (50%)
    split = len(data_a) // 2
    enroll_a = data_a[:split]
    probe_a = data_a[split:]
    probe_b = data_b
    
    if len(enroll_a) > 0:
        # 1. Create Template
        enroll_fps = fingerprint_model.predict(enroll_a, verbose=0)
        template_a = np.mean(enroll_fps, axis=0)
        
        # 2. Collect Scores
        gen_fps = fingerprint_model.predict(probe_a, verbose=0)
        imp_fps = fingerprint_model.predict(probe_b, verbose=0)
        
        gen_scores = [cosine_distance(template_a, fp) for fp in gen_fps]
        imp_scores = [cosine_distance(template_a, fp) for fp in imp_fps]
        
        # 3. Find Best Threshold
        best_threshold = find_optimal_threshold(gen_scores, imp_scores)
        
        # 4. Apply
        accepted_gen = sum(1 for s in gen_scores if s < best_threshold)
        rejected_imp = sum(1 for s in imp_scores if s > best_threshold)
        
        print(f"\n--- Final Results (Threshold {best_threshold:.4f}) ---")
        print(f"Genuine Acceptance Rate (GAR): {accepted_gen/len(gen_scores)*100:.1f}%")
        print(f"Impostor Rejection Rate (GRR): {rejected_imp/len(imp_scores)*100:.1f}%")
    else:
        print("Insufficient data for User A.")
else:
    print("Not enough test subjects found for authentication.")