In [1]:
# Cell 1 — imports & reproducibility
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, StratifiedKFold, LeaveOneOut
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, optimizers

# reproducibility (best-effort)
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)




In [3]:
# Cell 2 — adjust these paths to your dataset folders
normal_path = r"C:\Users\HP\Downloads\norm"        # files ending with .eea
schizo_path = r"C:\Users\HP\Downloads\sch"

# quick print
print("Normal files:", len([f for f in os.listdir(normal_path) if f.endswith('.eea')]))
print("Schizo files:", len([f for f in os.listdir(schizo_path) if f.endswith('.eea')]))


Normal files: 39
Schizo files: 45


In [4]:
# Cell 3 — load file lengths to decide windowing/fixed length
def read_signal(filepath):
    return np.loadtxt(filepath)   # your files are plain numeric text

def get_lengths(folder):
    lengths = []
    files = [f for f in os.listdir(folder) if f.endswith('.eea')]
    for f in files:
        s = read_signal(os.path.join(folder, f))
        lengths.append(len(s))
    return np.array(lengths), files

lens_norm, files_norm = get_lengths(normal_path)
lens_scz, files_scz = get_lengths(schizo_path)

print("Normal lengths summary:", np.min(lens_norm), np.median(lens_norm), np.max(lens_norm))
print("Schizophrenic lengths summary:", np.min(lens_scz), np.median(lens_scz), np.max(lens_scz))


Normal lengths summary: 122880 122880.0 122880
Schizophrenic lengths summary: 122880 122880.0 122880


In [9]:
# Cell 4A — parameters
FIXED_LEN = 500  # choose (e.g., 500). You can also set FIXED_LEN = min(all_lengths) after inspecting.

def load_eeg_fixed(folder_path, label, fixed_len=FIXED_LEN):
    X, y, ids = [], [], []
    for fname in os.listdir(folder_path):
        if not fname.endswith('.eea'):
            continue
        path = os.path.join(folder_path, fname)
        sig = read_signal(path).astype(np.float32)
        # normalize per-signal (z-score)
        sig = (sig - sig.mean()) / (sig.std() + 1e-8)
        # pad/trim
        if len(sig) > fixed_len:
            sig = sig[:fixed_len]
        elif len(sig) < fixed_len:
            sig = np.pad(sig, (0, fixed_len - len(sig)), 'constant')
        X.append(sig)
        y.append(label)
        ids.append(fname)  # file/subject id
    return np.array(X), np.array(y), np.array(ids)

Xn, yn, idn = load_eeg_fixed(normal_path, 0)
Xs, ys, ids = load_eeg_fixed(schizo_path, 1)
X = np.vstack([Xn, Xs])
y = np.hstack([yn, ys])
ids_all = np.hstack([idn, ids])

# reshape for keras: (samples, timesteps, channels)
X = X.reshape((X.shape[0], X.shape[1], 1))
print("X shape:", X.shape, "y shape:", y.shape)


X shape: (84, 500, 1) y shape: (84,)


In [5]:
# Cell 4B — windowing parameters
WINDOW = 256        # window length in samples
STRIDE = 64         # step between windows (overlap = WINDOW - STRIDE)

def windows_from_file(sig, window=WINDOW, stride=STRIDE):
    segs = []
    for start in range(0, len(sig) - window + 1, stride):
        seg = sig[start:start + window]
        segs.append(seg)
    return np.array(segs)  # shape = (num_windows, window)

def load_eeg_windows(folder_path, label, window=WINDOW, stride=STRIDE):
    X_list, y_list, subj_list = [], [], []
    for fname in os.listdir(folder_path):
        if not fname.endswith('.eea'):
            continue
        path = os.path.join(folder_path, fname)
        sig = read_signal(path).astype(np.float32)
        sig = (sig - sig.mean()) / (sig.std() + 1e-8)
        segs = windows_from_file(sig, window, stride)
        if len(segs) == 0:
            # If signal shorter than window, pad to window once
            padded = np.pad(sig, (0, max(0, window - len(sig))), 'constant')[:window]
            segs = np.expand_dims(padded, axis=0)
        X_list.append(segs)
        y_list.append(np.ones(segs.shape[0], dtype=int) * label)
        subj_list.extend([fname]*segs.shape[0])
    X = np.vstack(X_list)
    y = np.hstack(y_list)
    subj_ids = np.array(subj_list)
    return X, y, subj_ids

# load windows for both classes
Xn_w, yn_w, idn_w = load_eeg_windows(normal_path, 0)
Xs_w, ys_w, ids_w = load_eeg_windows(schizo_path, 1)
X_w = np.vstack([Xn_w, Xs_w])         # (N_samples, WINDOW)
y_w = np.hstack([yn_w, ys_w])
ids_w = np.hstack([idn_w, ids_w])

# reshape for keras
X_w = X_w.reshape((X_w.shape[0], X_w.shape[1], 1))
print("Windowed X shape:", X_w.shape, "y shape:", y_w.shape)


Windowed X shape: (161028, 256, 1) y shape: (161028,)


In [6]:
# Cell 5 — model builders
def build_cnn(input_shape):
    model = models.Sequential([
        layers.Conv1D(64, kernel_size=5, activation='relu', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling1D(2),
        layers.Conv1D(128, kernel_size=3, activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling1D(2),
        layers.Conv1D(256, kernel_size=3, activation='relu'),
        layers.GlobalAveragePooling1D(),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=optimizers.Adam(1e-3), loss='binary_crossentropy', metrics=['accuracy'])
    return model

def build_lstm(input_shape):
    model = models.Sequential([
        layers.Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=input_shape),
        layers.Bidirectional(layers.LSTM(32)),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=optimizers.Adam(1e-3), loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [10]:
# Cell 6 — callbacks
es = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)
# model checkpoint will be added inside training loop per-model


In [11]:
# Cell 7 — choose data source
USE_WINDOWS = True

if USE_WINDOWS:
    X_data, y_data = X_w, y_w
else:
    X_data, y_data = X, y

# stratified split (note: if using windows, windows from same subject may leak -> do group-split instead)
X_train, X_test, y_train, y_test = train_test_split(
    X_data, y_data, test_size=0.2, stratify=y_data, random_state=seed
)

print("Train:", X_train.shape, "Test:", X_test.shape)

# build model and train
input_shape = X_train.shape[1:]
model = build_cnn(input_shape)   # or build_lstm(input_shape)

checkpoint_path = "best_model.h5"
mc = callbacks.ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, verbose=1)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=8,
    callbacks=[es, reduce_lr, mc],
    verbose=2
),


Train: (128822, 256, 1) Test: (32206, 256, 1)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100

Epoch 1: val_loss improved from None to 0.35301, saving model to best_model.h5




12883/12883 - 156s - 12ms/step - accuracy: 0.7445 - loss: 0.4986 - val_accuracy: 0.8501 - val_loss: 0.3530 - learning_rate: 1.0000e-03
Epoch 2/100

Epoch 2: val_loss improved from 0.35301 to 0.28868, saving model to best_model.h5




12883/12883 - 147s - 11ms/step - accuracy: 0.8533 - loss: 0.3482 - val_accuracy: 0.8819 - val_loss: 0.2887 - learning_rate: 1.0000e-03
Epoch 3/100

Epoch 3: val_loss improved from 0.28868 to 0.25796, saving model to best_model.h5




12883/12883 - 446s - 35ms/step - accuracy: 0.8757 - loss: 0.3010 - val_accuracy: 0.8999 - val_loss: 0.2580 - learning_rate: 1.0000e-03
Epoch 4/100

Epoch 4: val_loss improved from 0.25796 to 0.24496, saving model to best_model.h5




12883/12883 - 147s - 11ms/step - accuracy: 0.8924 - loss: 0.2636 - val_accuracy: 0.9043 - val_loss: 0.2450 - learning_rate: 1.0000e-03
Epoch 5/100

Epoch 5: val_loss did not improve from 0.24496
12883/12883 - 258s - 20ms/step - accuracy: 0.9056 - loss: 0.2295 - val_accuracy: 0.9052 - val_loss: 0.2584 - learning_rate: 1.0000e-03
Epoch 6/100

Epoch 6: val_loss improved from 0.24496 to 0.20827, saving model to best_model.h5




12883/12883 - 146s - 11ms/step - accuracy: 0.9195 - loss: 0.2013 - val_accuracy: 0.9223 - val_loss: 0.2083 - learning_rate: 1.0000e-03
Epoch 7/100

Epoch 7: val_loss did not improve from 0.20827
12883/12883 - 146s - 11ms/step - accuracy: 0.9305 - loss: 0.1781 - val_accuracy: 0.9138 - val_loss: 0.2319 - learning_rate: 1.0000e-03
Epoch 8/100

Epoch 8: val_loss improved from 0.20827 to 0.18647, saving model to best_model.h5




12883/12883 - 713s - 55ms/step - accuracy: 0.9406 - loss: 0.1544 - val_accuracy: 0.9300 - val_loss: 0.1865 - learning_rate: 1.0000e-03
Epoch 9/100

Epoch 9: val_loss did not improve from 0.18647
12883/12883 - 147s - 11ms/step - accuracy: 0.9479 - loss: 0.1365 - val_accuracy: 0.9224 - val_loss: 0.2132 - learning_rate: 1.0000e-03
Epoch 10/100

Epoch 10: val_loss did not improve from 0.18647
12883/12883 - 147s - 11ms/step - accuracy: 0.9531 - loss: 0.1243 - val_accuracy: 0.9346 - val_loss: 0.1989 - learning_rate: 1.0000e-03
Epoch 11/100

Epoch 11: val_loss improved from 0.18647 to 0.15400, saving model to best_model.h5




12883/12883 - 520s - 40ms/step - accuracy: 0.9578 - loss: 0.1123 - val_accuracy: 0.9448 - val_loss: 0.1540 - learning_rate: 1.0000e-03
Epoch 12/100

Epoch 12: val_loss did not improve from 0.15400
12883/12883 - 147s - 11ms/step - accuracy: 0.9621 - loss: 0.1038 - val_accuracy: 0.9346 - val_loss: 0.1947 - learning_rate: 1.0000e-03
Epoch 13/100

Epoch 13: val_loss did not improve from 0.15400
12883/12883 - 153s - 12ms/step - accuracy: 0.9659 - loss: 0.0962 - val_accuracy: 0.9356 - val_loss: 0.1980 - learning_rate: 1.0000e-03
Epoch 14/100

Epoch 14: val_loss did not improve from 0.15400
12883/12883 - 147s - 11ms/step - accuracy: 0.9678 - loss: 0.0889 - val_accuracy: 0.9308 - val_loss: 0.2189 - learning_rate: 1.0000e-03
Epoch 15/100

Epoch 15: val_loss improved from 0.15400 to 0.15349, saving model to best_model.h5




12883/12883 - 331s - 26ms/step - accuracy: 0.9711 - loss: 0.0810 - val_accuracy: 0.9520 - val_loss: 0.1535 - learning_rate: 1.0000e-03
Epoch 16/100

Epoch 16: val_loss did not improve from 0.15349
12883/12883 - 149s - 12ms/step - accuracy: 0.9732 - loss: 0.0777 - val_accuracy: 0.9298 - val_loss: 0.2395 - learning_rate: 1.0000e-03
Epoch 17/100

Epoch 17: val_loss improved from 0.15349 to 0.14256, saving model to best_model.h5




12883/12883 - 533s - 41ms/step - accuracy: 0.9747 - loss: 0.0718 - val_accuracy: 0.9532 - val_loss: 0.1426 - learning_rate: 1.0000e-03
Epoch 18/100

Epoch 18: val_loss did not improve from 0.14256
12883/12883 - 149s - 12ms/step - accuracy: 0.9763 - loss: 0.0695 - val_accuracy: 0.9551 - val_loss: 0.1671 - learning_rate: 1.0000e-03
Epoch 19/100

Epoch 19: val_loss did not improve from 0.14256
12883/12883 - 147s - 11ms/step - accuracy: 0.9780 - loss: 0.0646 - val_accuracy: 0.9550 - val_loss: 0.1611 - learning_rate: 1.0000e-03
Epoch 20/100

Epoch 20: val_loss did not improve from 0.14256
12883/12883 - 151s - 12ms/step - accuracy: 0.9793 - loss: 0.0627 - val_accuracy: 0.9577 - val_loss: 0.1733 - learning_rate: 1.0000e-03
Epoch 21/100

Epoch 21: val_loss did not improve from 0.14256
12883/12883 - 150s - 12ms/step - accuracy: 0.9808 - loss: 0.0586 - val_accuracy: 0.9555 - val_loss: 0.1632 - learning_rate: 1.0000e-03
Epoch 22/100

Epoch 22: val_loss did not improve from 0.14256
12883/12883 - 1



12883/12883 - 77s - 6ms/step - accuracy: 0.9934 - loss: 0.0222 - val_accuracy: 0.9698 - val_loss: 0.1387 - learning_rate: 5.0000e-04
Epoch 27/100

Epoch 27: val_loss did not improve from 0.13873
12883/12883 - 78s - 6ms/step - accuracy: 0.9940 - loss: 0.0208 - val_accuracy: 0.9686 - val_loss: 0.1477 - learning_rate: 5.0000e-04
Epoch 28/100

Epoch 28: val_loss did not improve from 0.13873
12883/12883 - 81s - 6ms/step - accuracy: 0.9941 - loss: 0.0196 - val_accuracy: 0.9672 - val_loss: 0.1861 - learning_rate: 5.0000e-04
Epoch 29/100

Epoch 29: val_loss did not improve from 0.13873
12883/12883 - 76s - 6ms/step - accuracy: 0.9942 - loss: 0.0196 - val_accuracy: 0.9689 - val_loss: 0.1501 - learning_rate: 5.0000e-04
Epoch 30/100

Epoch 30: val_loss did not improve from 0.13873
12883/12883 - 76s - 6ms/step - accuracy: 0.9945 - loss: 0.0178 - val_accuracy: 0.9599 - val_loss: 0.2421 - learning_rate: 5.0000e-04
Epoch 31/100

Epoch 31: val_loss did not improve from 0.13873
12883/12883 - 76s - 6ms/s

In [12]:
# Continue training for more epochs
history2 = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,          # more epochs
    initial_epoch=len(history.epoch),  # resume from previous epoch
    batch_size=8,
    callbacks=[es, reduce_lr, mc],
    verbose=2
)


Epoch 37/100

Epoch 37: val_loss did not improve from 0.13873
12883/12883 - 150s - 12ms/step - accuracy: 0.9973 - loss: 0.0094 - val_accuracy: 0.9719 - val_loss: 0.1570 - learning_rate: 1.2500e-04
Epoch 38/100

Epoch 38: val_loss did not improve from 0.13873
12883/12883 - 148s - 11ms/step - accuracy: 0.9984 - loss: 0.0060 - val_accuracy: 0.9734 - val_loss: 0.1622 - learning_rate: 1.2500e-04
Epoch 39/100

Epoch 39: val_loss did not improve from 0.13873
12883/12883 - 205s - 16ms/step - accuracy: 0.9986 - loss: 0.0054 - val_accuracy: 0.9726 - val_loss: 0.1578 - learning_rate: 1.2500e-04
Epoch 40/100

Epoch 40: val_loss did not improve from 0.13873
12883/12883 - 150s - 12ms/step - accuracy: 0.9988 - loss: 0.0049 - val_accuracy: 0.9753 - val_loss: 0.1537 - learning_rate: 1.2500e-04
Epoch 41/100

Epoch 41: val_loss did not improve from 0.13873
12883/12883 - 157s - 12ms/step - accuracy: 0.9991 - loss: 0.0039 - val_accuracy: 0.9734 - val_loss: 0.1721 - learning_rate: 1.2500e-04
Epoch 42/100

E

In [14]:
# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")


[1m1007/1007[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.9747 - loss: 0.1383
Test Accuracy: 0.9747, Test Loss: 0.1383


In [None]:
# Save the trained model
model.save("schizophrenia_detection_model_final.h5")

# Load it anytime later
from tensorflow.keras.models import load_model
model = load_model("schizophrenia_detection_model_final.h5")


In [18]:

# Generate random EEG-like data (you can adjust size to match your training, e.g., 256 samples)
sample_eeg = np.random.normal(0, 100, 256)  # mean=0, std=100 to mimic EEG voltage variations

# Save as .eea file
np.savetxt("C:/Users/HP/Downloads/new_sample.eea", sample_eeg, fmt="%.2f")

print("✅ Sample EEG file saved at: C:/Users/HP/Downloads/new_sample.eea")


✅ Sample EEG file saved at: C:/Users/HP/Downloads/new_sample.eea


In [19]:

# Example: load one new EEG sample
new_data = np.loadtxt("C:/Users/HP/Downloads/new_sample.eea")
new_data = new_data.reshape(1, 256, 1)  # reshape like your training data

# Predict
prediction = model.predict(new_data)

if prediction[0] > 0.5:
    print("🧠 Schizophrenia Detected")
else:
    print("🙂 Normal (Healthy)")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 271ms/step
🙂 Normal (Healthy)


In [2]:
import os
os.getcwd()

'C:\\Users\\HP\\jupyter notebook'