In [2]:
!pip install numpy pandas matplotlib seaborn scikit-learn tensorflow opencv-python pillow


Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-win_amd64.whl.metadata (4.1 kB)
Collecting opencv-python
  Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Using cached absl_py-2.3.0-py3-none-any.whl.metadata (2.4 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py

  You can safely remove it manually.
  You can safely remove it manually.


In [3]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.utils import to_categorical
import cv2
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

class EEGClassifier:
    def __init__(self, schizo_path, healthy_path, img_size=(224, 224)):
        self.schizo_path = schizo_path
        self.healthy_path = healthy_path
        self.img_size = img_size
        self.X = None
        self.y = None
        self.fold_results = []
        
    def load_and_preprocess_data(self):
        """Load images and create labels"""
        print("Loading and preprocessing data...")
        
        images = []
        labels = []
        
        # Load schizophrenia images (label = 1)
        schizo_files = [f for f in os.listdir(self.schizo_path) if f.endswith('.png')]
        print(f"Loading {len(schizo_files)} schizophrenia images...")
        
        for img_file in schizo_files:
            img_path = os.path.join(self.schizo_path, img_file)
            try:
                img = load_img(img_path, target_size=self.img_size)
                img_array = img_to_array(img) / 255.0  # Normalize to [0,1]
                images.append(img_array)
                labels.append(1)  # Schizophrenia
            except Exception as e:
                print(f"Error loading {img_file}: {e}")
        
        # Load healthy images (label = 0)
        healthy_files = [f for f in os.listdir(self.healthy_path) if f.endswith('.png')]
        print(f"Loading {len(healthy_files)} healthy images...")
        
        for img_file in healthy_files:
            img_path = os.path.join(self.healthy_path, img_file)
            try:
                img = load_img(img_path, target_size=self.img_size)
                img_array = img_to_array(img) / 255.0  # Normalize to [0,1]
                images.append(img_array)
                labels.append(0)  # Healthy
            except Exception as e:
                print(f"Error loading {img_file}: {e}")
        
        self.X = np.array(images)
        self.y = np.array(labels)
        
        print(f"Data loaded: {self.X.shape[0]} images")
        print(f"Schizophrenia samples: {np.sum(self.y == 1)}")
        print(f"Healthy samples: {np.sum(self.y == 0)}")
        
        return self.X, self.y
    
    def create_cnn_model(self, input_shape):
        """Create CNN model architecture"""
        model = models.Sequential([
            # First Convolutional Block
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),
            
            # Second Convolutional Block
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),
            
            # Third Convolutional Block
            layers.Conv2D(128, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),
            
            # Fourth Convolutional Block
            layers.Conv2D(256, (3, 3), activation='relu'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            layers.Dropout(0.25),
            
            # Flatten and Dense layers
            layers.GlobalAveragePooling2D(),
            layers.Dense(512, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.5),
            layers.Dense(256, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.5),
            layers.Dense(1, activation='sigmoid')  # Binary classification
        ])
        
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
            loss='binary_crossentropy',
            metrics=['accuracy', 'precision', 'recall']
        )
        
        return model
    
    def create_data_augmentation(self):
        """Create data augmentation generator"""
        datagen = ImageDataGenerator(
            rotation_range=10,
            width_shift_range=0.1,
            height_shift_range=0.1,
            horizontal_flip=True,
            zoom_range=0.1,
            fill_mode='nearest'
        )
        return datagen
    
    def train_with_kfold(self, n_folds=5, epochs=50):
        """Train model with k-fold cross validation"""
        if self.X is None or self.y is None:
            raise ValueError("Data not loaded. Call load_and_preprocess_data() first.")
        
        # Initialize k-fold cross validation
        skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
        
        fold_accuracies = []
        fold_precisions = []
        fold_recalls = []
        fold_f1_scores = []
        all_y_true = []
        all_y_pred = []
        
        # Create data augmentation
        datagen = self.create_data_augmentation()
        
        print(f"\nStarting {n_folds}-fold cross validation...")
        
        for fold, (train_idx, val_idx) in enumerate(skf.split(self.X, self.y)):
            print(f"\n{'='*50}")
            print(f"FOLD {fold + 1}/{n_folds}")
            print(f"{'='*50}")
            
            # Split data
            X_train, X_val = self.X[train_idx], self.X[val_idx]
            y_train, y_val = self.y[train_idx], self.y[val_idx]
            
            print(f"Training samples: {len(X_train)}")
            print(f"Validation samples: {len(X_val)}")
            
            # Create model for this fold
            model = self.create_cnn_model(input_shape=self.X.shape[1:])
            
            # Callbacks
            early_stopping = callbacks.EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True,
                verbose=1
            )
            
            reduce_lr = callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=5,
                min_lr=1e-7,
                verbose=1
            )
            
            model_checkpoint = callbacks.ModelCheckpoint(
                f'best_model_fold_{fold+1}.h5',
                monitor='val_accuracy',
                save_best_only=True,
                verbose=1
            )
            
            # Train model with data augmentation
            history = model.fit(
                datagen.flow(X_train, y_train, batch_size=32),
                steps_per_epoch=len(X_train) // 32,
                epochs=epochs,
                validation_data=(X_val, y_val),
                callbacks=[early_stopping, reduce_lr, model_checkpoint],
                verbose=1
            )
            
            # Evaluate on validation set
            val_loss, val_accuracy, val_precision, val_recall = model.evaluate(X_val, y_val, verbose=0)
            val_f1 = 2 * (val_precision * val_recall) / (val_precision + val_recall + 1e-7)
            
            # Predictions
            y_pred_proba = model.predict(X_val, verbose=0)
            y_pred = (y_pred_proba > 0.5).astype(int).flatten()
            
            # Store results
            fold_accuracies.append(val_accuracy)
            fold_precisions.append(val_precision)
            fold_recalls.append(val_recall)
            fold_f1_scores.append(val_f1)
            
            all_y_true.extend(y_val)
            all_y_pred.extend(y_pred)
            
            # Store fold results
            fold_result = {
                'fold': fold + 1,
                'accuracy': val_accuracy,
                'precision': val_precision,
                'recall': val_recall,
                'f1_score': val_f1,
                'history': history.history
            }
            self.fold_results.append(fold_result)
            
            print(f"\nFold {fold + 1} Results:")
            print(f"Accuracy: {val_accuracy:.4f}")
            print(f"Precision: {val_precision:.4f}")
            print(f"Recall: {val_recall:.4f}")
            print(f"F1-Score: {val_f1:.4f}")
        
        # Calculate overall metrics
        overall_results = {
            'mean_accuracy': np.mean(fold_accuracies),
            'std_accuracy': np.std(fold_accuracies),
            'mean_precision': np.mean(fold_precisions),
            'std_precision': np.std(fold_precisions),
            'mean_recall': np.mean(fold_recalls),
            'std_recall': np.std(fold_recalls),
            'mean_f1': np.mean(fold_f1_scores),
            'std_f1': np.std(fold_f1_scores),
            'all_y_true': all_y_true,
            'all_y_pred': all_y_pred
        }
        
        return overall_results
    
    def plot_results(self, results):
        """Plot training results and confusion matrix"""
        # Create subplots
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        
        # Plot 1: Accuracy across folds
        fold_nums = [r['fold'] for r in self.fold_results]
        accuracies = [r['accuracy'] for r in self.fold_results]
        
        axes[0, 0].bar(fold_nums, accuracies, color='skyblue', alpha=0.7)
        axes[0, 0].axhline(y=results['mean_accuracy'], color='red', linestyle='--', 
                          label=f'Mean: {results["mean_accuracy"]:.4f}')
        axes[0, 0].set_xlabel('Fold')
        axes[0, 0].set_ylabel('Accuracy')
        axes[0, 0].set_title('Accuracy Across Folds')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        # Plot 2: Training history for last fold
        last_history = self.fold_results[-1]['history']
        axes[0, 1].plot(last_history['accuracy'], label='Training Accuracy', color='blue')
        axes[0, 1].plot(last_history['val_accuracy'], label='Validation Accuracy', color='red')
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('Accuracy')
        axes[0, 1].set_title('Training History (Last Fold)')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
        
        # Plot 3: Confusion Matrix
        cm = confusion_matrix(results['all_y_true'], results['all_y_pred'])
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[1, 0])
        axes[1, 0].set_xlabel('Predicted')
        axes[1, 0].set_ylabel('Actual')
        axes[1, 0].set_title('Confusion Matrix (All Folds)')
        axes[1, 0].set_xticklabels(['Healthy', 'Schizophrenia'])
        axes[1, 0].set_yticklabels(['Healthy', 'Schizophrenia'])
        
        # Plot 4: Metrics comparison
        metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
        means = [results['mean_accuracy'], results['mean_precision'], 
                results['mean_recall'], results['mean_f1']]
        stds = [results['std_accuracy'], results['std_precision'],
               results['std_recall'], results['std_f1']]
        
        axes[1, 1].bar(metrics, means, yerr=stds, capsize=5, color='lightgreen', alpha=0.7)
        axes[1, 1].set_ylabel('Score')
        axes[1, 1].set_title('Overall Performance Metrics')
        axes[1, 1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('eeg_classification_results.png', dpi=300, bbox_inches='tight')
        plt.show()
    
    def print_summary(self, results):
        """Print detailed results summary"""
        print("\n" + "="*70)
        print("FINAL RESULTS SUMMARY")
        print("="*70)
        
        print(f"\n5-Fold Cross Validation Results:")
        print(f"Mean Accuracy: {results['mean_accuracy']:.4f} ± {results['std_accuracy']:.4f}")
        print(f"Mean Precision: {results['mean_precision']:.4f} ± {results['std_precision']:.4f}")
        print(f"Mean Recall: {results['mean_recall']:.4f} ± {results['std_recall']:.4f}")
        print(f"Mean F1-Score: {results['mean_f1']:.4f} ± {results['std_f1']:.4f}")
        
        print(f"\nDetailed Fold Results:")
        for i, fold_result in enumerate(self.fold_results):
            print(f"Fold {i+1}: Acc={fold_result['accuracy']:.4f}, "
                  f"Prec={fold_result['precision']:.4f}, "
                  f"Rec={fold_result['recall']:.4f}, "
                  f"F1={fold_result['f1_score']:.4f}")
        
        # Confusion Matrix Analysis
        cm = confusion_matrix(results['all_y_true'], results['all_y_pred'])
        print(f"\nConfusion Matrix:")
        print(f"True Negatives (Healthy correctly classified): {cm[0,0]}")
        print(f"False Positives (Healthy misclassified as Schizo): {cm[0,1]}")
        print(f"False Negatives (Schizo misclassified as Healthy): {cm[1,0]}")
        print(f"True Positives (Schizo correctly classified): {cm[1,1]}")
        
        # Classification Report
        print(f"\nClassification Report:")
        print(classification_report(results['all_y_true'], results['all_y_pred'], 
                                  target_names=['Healthy', 'Schizophrenia']))

# Main execution
if __name__ == "__main__":
    # Define paths to your image folders
    SCHIZO_PATH = "D:/HHT/S"  # Update this path
    HEALTHY_PATH = "D:/HHT/H"       # Update this path
    
    # Initialize classifier
    classifier = EEGClassifier(
        schizo_path=SCHIZO_PATH,
        healthy_path=HEALTHY_PATH,
        img_size=(224, 224)  # Adjust image size as needed
    )
    
    try:
        # Load and preprocess data
        X, y = classifier.load_and_preprocess_data()
        
        # Train with 5-fold cross validation
        results = classifier.train_with_kfold(n_folds=5, epochs=50)
        
        # Print summary
        classifier.print_summary(results)
        
        # Plot results
        classifier.plot_results(results)
        
        print("\nTraining completed! Best models saved as 'best_model_fold_X.h5'")
        print("Results plot saved as 'eeg_classification_results.png'")
        
    except Exception as e:
        print(f"Error during execution: {e}")
        print("Please check your data paths and ensure images are accessible.")

Loading and preprocessing data...
Loading 5146 schizophrenia images...
Loading 4235 healthy images...
Data loaded: 9381 images
Schizophrenia samples: 5146
Healthy samples: 4235

Starting 5-fold cross validation...

FOLD 1/5
Training samples: 7504
Validation samples: 1877
Epoch 1/50
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5375 - loss: 0.9321 - precision: 0.5754 - recall: 0.5853
Epoch 1: val_accuracy improved from -inf to 0.45125, saving model to best_model_fold_1.h5




[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m639s[0m 3s/step - accuracy: 0.5375 - loss: 0.9319 - precision: 0.5754 - recall: 0.5854 - val_accuracy: 0.4513 - val_loss: 1.5564 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 0.0010
Epoch 2/50
[1m  1/234[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10:26[0m 3s/step - accuracy: 0.5625 - loss: 0.7785 - precision: 0.4783 - recall: 0.8462
Epoch 2: val_accuracy did not improve from 0.45125
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 100ms/step - accuracy: 0.5625 - loss: 0.7785 - precision: 0.4783 - recall: 0.8462 - val_accuracy: 0.4513 - val_loss: 1.6618 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 0.0010
Epoch 3/50
[1m151/234[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m3:32[0m 3s/step - accuracy: 0.5479 - loss: 0.7801 - precision: 0.5674 - recall: 0.6238Error during execution: Graph execution error:

Detected at node StatefulPartitionedCall/sequential_1/conv2d