In [2]:
!pip install numpy matplotlib seaborn scikit-learn tensorflow opencv-python


Defaulting to user installation because normal site-packages is not writeable
Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
   ---------------------------------------- 0.0/39.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/39.5 MB 660.6 kB/s eta 0:01:00
   ---------------------------------------- 0.1/39.5 MB 975.2 kB/s eta 0:00:41
   ---------------------------------------- 0.2/39.5 MB 1.5 MB/s eta 0:00:27
   ---------------------------------------- 0.4/39.5 MB 2.4 MB/s eta 0:00:17
    --------------------------------------- 0.8/39.5 MB 3.8 MB/s eta 0:00:11
   - -------------------------------------- 1.4/39.5 MB 5.0 MB/s eta 0:00:08
   - -------------------------------------- 1.4/39.5 MB 5.3 MB/s eta 0:00:08
   - -------------------------------------- 1.7/39.5 MB 5.0 MB/s eta 0:00:08
   -- ------------------------------------- 2.0/39.5 MB 5.


[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
import cv2
from collections import Counter
import glob

In [2]:


# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print("Setup complete!")

All libraries imported successfully!
TensorFlow version: 2.19.0
Setup complete!


In [3]:
# UPDATE THESE PATHS TO YOUR ACTUAL DATA FOLDERS
SCHIZOPHRENIA_PATH = "D:/Milon2/HHT/S"  # Replace with your actual path
HEALTHY_PATH = "D:/Milon2/HHT/H"              # Replace with your actual path

# Image settings
IMG_SIZE = (224, 224)  # Adjust based on your image size

def load_and_preprocess_data(schizophrenia_path, healthy_path, img_size):
    """Load and preprocess HHT plot images"""
    print("Loading and preprocessing data...")
    
    X = []
    y = []
    
    # Load schizophrenia images (label: 1)
    print("Loading schizophrenia images...")
    schizo_files = glob.glob(os.path.join(schizophrenia_path, "*.png"))
    print(f"Found {len(schizo_files)} schizophrenia images")
    
    for i, file_path in enumerate(schizo_files):
        if i % 1000 == 0:  # Progress indicator
            print(f"Processing schizophrenia image {i+1}/{len(schizo_files)}")
        try:
            img = load_img(file_path, target_size=img_size)
            img_array = img_to_array(img) / 255.0  # Normalize to [0,1]
            X.append(img_array)
            y.append(1)  # Schizophrenia
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
    
    print(f"Loaded {len([i for i in y if i == 1])} schizophrenia images")
    
    # Load healthy images (label: 0)
    print("Loading healthy images...")
    healthy_files = glob.glob(os.path.join(healthy_path, "*.png"))
    print(f"Found {len(healthy_files)} healthy images")
    
    for i, file_path in enumerate(healthy_files):
        if i % 1000 == 0:  # Progress indicator
            print(f"Processing healthy image {i+1}/{len(healthy_files)}")
        try:
            img = load_img(file_path, target_size=img_size)
            img_array = img_to_array(img) / 255.0  # Normalize to [0,1]
            X.append(img_array)
            y.append(0)  # Healthy
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
    
    print(f"Loaded {len([i for i in y if i == 0])} healthy images")
    
    # Convert to numpy arrays
    X = np.array(X)
    y = np.array(y)
    
    print(f"\nData loading complete!")
    print(f"Total images loaded: {len(X)}")
    print(f"Schizophrenia samples: {np.sum(y == 1)}")
    print(f"Healthy samples: {np.sum(y == 0)}")
    print(f"Image shape: {X.shape}")
    
    return X, y

# Load the data (MAKE SURE TO UPDATE PATHS ABOVE!)
X, y = load_and_preprocess_data(SCHIZOPHRENIA_PATH, HEALTHY_PATH, IMG_SIZE)

# Create results directory
os.makedirs("results", exist_ok=True)
print("Results directory created!")

Loading and preprocessing data...
Loading schizophrenia images...
Found 5146 schizophrenia images
Processing schizophrenia image 1/5146
Processing schizophrenia image 1001/5146
Processing schizophrenia image 2001/5146
Processing schizophrenia image 3001/5146
Processing schizophrenia image 4001/5146
Processing schizophrenia image 5001/5146
Loaded 5146 schizophrenia images
Loading healthy images...
Found 4235 healthy images
Processing healthy image 1/4235
Processing healthy image 1001/4235
Processing healthy image 2001/4235
Processing healthy image 3001/4235
Processing healthy image 4001/4235
Loaded 4235 healthy images

Data loading complete!
Total images loaded: 9381
Schizophrenia samples: 5146
Healthy samples: 4235
Image shape: (9381, 224, 224, 3)
Results directory created!


In [4]:
from tensorflow.keras.layers import Input

def create_cnn_model(img_size):
    """Create CNN architecture for EEG classification"""
    model = Sequential([
        # Input layer (fixes the warning)
        Input(shape=(*img_size, 3)),
        
        # First Conv Block
        Conv2D(32, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        
        # Second Conv Block
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        
        # Third Conv Block
        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        
        # Fourth Conv Block
        Conv2D(256, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        
        # Flatten and Dense layers
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model

def create_data_augmentation():
    """Create data augmentation generator"""
    return ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        zoom_range=0.1,
        fill_mode='nearest'
    )

# Test model creation
print("Testing model creation...")
test_model = create_cnn_model(IMG_SIZE)
print("Model created successfully!")
print("\nModel Summary:")
test_model.summary()

# Test data augmentation
print("\nTesting data augmentation...")
datagen = create_data_augmentation()
print("Data augmentation generator created successfully!")

Testing model creation...
Model created successfully!

Model Summary:



Testing data augmentation...
Data augmentation generator created successfully!


In [5]:
# Cross-validation parameters
N_SPLITS = 3
EPOCHS = 50
BATCH_SIZE = 32

# Initialize variables for storing results
fold_results = []
fold_accuracies = []
best_accuracy = 0
best_model_path = "best_eeg_cnn_model.h5"

# Initialize StratifiedKFold
print(f"Setting up {N_SPLITS}-fold cross validation...")
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

# Show the data distribution for each fold
print("\nFold distribution preview:")
for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    train_schizo = np.sum(y[train_idx])
    train_healthy = len(train_idx) - train_schizo
    val_schizo = np.sum(y[val_idx])
    val_healthy = len(val_idx) - val_schizo
    
    print(f"Fold {fold+1}:")
    print(f"  Training: {len(train_idx)} samples (Schizo: {train_schizo}, Healthy: {train_healthy})")
    print(f"  Validation: {len(val_idx)} samples (Schizo: {val_schizo}, Healthy: {val_healthy})")

print(f"\nCross-validation setup complete!")
print(f"Ready to train {N_SPLITS} models with {EPOCHS} epochs each.")

Setting up 3-fold cross validation...

Fold distribution preview:
Fold 1:
  Training: 6254 samples (Schizo: 3430, Healthy: 2824)
  Validation: 3127 samples (Schizo: 1716, Healthy: 1411)
Fold 2:
  Training: 6254 samples (Schizo: 3431, Healthy: 2823)
  Validation: 3127 samples (Schizo: 1715, Healthy: 1412)
Fold 3:
  Training: 6254 samples (Schizo: 3431, Healthy: 2823)
  Validation: 3127 samples (Schizo: 1715, Healthy: 1412)

Cross-validation setup complete!
Ready to train 3 models with 50 epochs each.


In [None]:
# CHANGE THIS VALUE TO TRAIN DIFFERENT FOLDS (0, 1, 2, 3, 4)
CURRENT_FOLD = 0  # Change this to 0, 1, 2, 3, or 4 for each fold

def train_single_fold(fold_number, X, y, skf, epochs=50):
    """Train a single fold"""
    print(f"\n{'='*50}")
    print(f"TRAINING FOLD {fold_number + 1}/{N_SPLITS}")
    print(f"{'='*50}")
    
    # Get the specific fold split
    folds = list(skf.split(X, y))
    train_idx, val_idx = folds[fold_number]
    
    # Split data for current fold
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]
    
    print(f"Training samples: {len(X_train)} (Schizo: {np.sum(y_train)}, Healthy: {len(y_train) - np.sum(y_train)})")
    print(f"Validation samples: {len(X_val)} (Schizo: {np.sum(y_val)}, Healthy: {len(y_val) - np.sum(y_val)})")
    
    # Create model for this fold
    model = create_cnn_model(IMG_SIZE)
    
    # Callbacks
    checkpoint = ModelCheckpoint(
        f'results/fold_{fold_number+1}_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    )
    
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    )
    
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
    
    callbacks = [checkpoint, early_stopping, reduce_lr]
    
    # Data augmentation for training
    datagen = create_data_augmentation()
    
    # Train model
    print(f"Starting training for fold {fold_number + 1}...")
    history = model.fit(
        datagen.flow(X_train, y_train, batch_size=BATCH_SIZE),
        steps_per_epoch=len(X_train) // BATCH_SIZE,
        epochs=epochs,
        validation_data=(X_val, y_val),
        callbacks=callbacks,
        verbose=1
    )
    
    # Load best model for this fold
    best_fold_model = load_model(f'results/fold_{fold_number+1}_model.h5')
    
    # Evaluate on validation set
    val_predictions = best_fold_model.predict(X_val)
    val_predictions_binary = (val_predictions > 0.5).astype(int).flatten()
    
    # Calculate accuracy
    fold_accuracy = accuracy_score(y_val, val_predictions_binary)
    
    print(f"\nFold {fold_number + 1} Validation Accuracy: {fold_accuracy:.4f}")
    
    # Generate confusion matrix for this fold
    cm = confusion_matrix(y_val, val_predictions_binary)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
               xticklabels=['Healthy', 'Schizophrenia'],
               yticklabels=['Healthy', 'Schizophrenia'])
    plt.title(f'Confusion Matrix - Fold {fold_number + 1}\nAccuracy: {fold_accuracy:.4f}')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig(f'results/confusion_matrix_fold_{fold_number+1}.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # Classification report
    print(f"\nClassification Report - Fold {fold_number + 1}:")
    print(classification_report(y_val, val_predictions_binary, 
                              target_names=['Healthy', 'Schizophrenia']))
    
    return fold_accuracy, cm, history.history, best_fold_model

# Train the current fold
fold_accuracy, confusion_mat, history, trained_model = train_single_fold(
    CURRENT_FOLD, X, y, skf, EPOCHS
)

# Store results
fold_results.append({
    'fold': CURRENT_FOLD + 1,
    'accuracy': fold_accuracy,
    'confusion_matrix': confusion_mat,
    'history': history
})

fold_accuracies.append(fold_accuracy)

# Check if this is the best model so far
if fold_accuracy > best_accuracy:
    best_accuracy = fold_accuracy
    trained_model.save(best_model_path)
    print(f"New best model saved with accuracy: {best_accuracy:.4f}")

print(f"Fold {CURRENT_FOLD + 1} training complete!")
print(f"To train the next fold, change CURRENT_FOLD to {CURRENT_FOLD + 1} and run this section again.")


TRAINING FOLD 1/3
Training samples: 6254 (Schizo: 3430, Healthy: 2824)
Validation samples: 3127 (Schizo: 1716, Healthy: 1411)
Starting training for fold 1...


  self._warn_if_super_not_called()


Epoch 1/50
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5111 - loss: 4.7669
Epoch 1: val_accuracy improved from -inf to 0.55037, saving model to results/fold_1_model.h5




[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 1s/step - accuracy: 0.5112 - loss: 4.7567 - val_accuracy: 0.5504 - val_loss: 0.7251 - learning_rate: 0.0010
Epoch 2/50
[1m  1/195[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:26[0m 1s/step - accuracy: 0.5938 - loss: 0.6906




Epoch 2: val_accuracy did not improve from 0.55037
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 97ms/step - accuracy: 0.5938 - loss: 0.6906 - val_accuracy: 0.5497 - val_loss: 0.7284 - learning_rate: 0.0010
Epoch 3/50
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5495 - loss: 0.6923
Epoch 3: val_accuracy did not improve from 0.55037
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 1s/step - accuracy: 0.5494 - loss: 0.6923 - val_accuracy: 0.4579 - val_loss: 0.7986 - learning_rate: 0.0010
Epoch 4/50
[1m  1/195[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:35[0m 1s/step - accuracy: 0.5000 - loss: 0.6955
Epoch 4: val_accuracy did not improve from 0.55037
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 111ms/step - accuracy: 0.5000 - loss: 0.6955 - val_accuracy: 0.4583 - val_loss: 0.7962 - learning_rate: 0.0010
Epoch 5/50
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step