In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.feature_selection import SelectKBest, f_classif
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import time

SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

BATCH_SIZE = 8
EPOCHS = 100
LEARNING_RATE = 5e-4
NUM_CHANNELS = 64
NUM_BANDS = 5
CHANNEL_NAMES = [f"{band}{i}" for band in ["alpha", "beta", "delta", "theta", "gamma"] for i in range(1, 65)]

CHANNEL_COORDS = {
    1: (0.75, 0.15), 2: (0.65, 0.2), 3: (0.55, 0.25), 4: (0.45, 0.3), 5: (0.35, 0.2),
    6: (0.25, 0.2), 7: (0.4, 0.35), 8: (0.5, 0.15), 9: (0.45, 0.25), 10: (0.35, 0.15),
    11: (0.3, 0.25), 12: (0.4, 0.3), 13: (0.25, 0.25), 14: (0.35, 0.3), 15: (0.3, 0.35),
    16: (0.35, 0.4), 17: (0.2, 0.2), 18: (0.25, 0.3), 19: (0.2, 0.3), 20: (0.3, 0.4),
    21: (0.4, 0.45), 22: (0.25, 0.4), 23: (0.15, 0.35), 24: (0.2, 0.4), 25: (0.25, 0.45),
    26: (0.3, 0.5), 27: (0.2, 0.5), 28: (0.25, 0.55), 29: (0.15, 0.55), 30: (0.2, 0.6),
    31: (0.35, 0.55), 32: (0.15, 0.65), 33: (0.3, 0.65), 34: (0.45, 0.35), 35: (0.4, 0.7),
    36: (0.35, 0.6), 37: (0.4, 0.65), 38: (0.35, 0.65), 39: (0.45, 0.7), 40: (0.45, 0.6),
    41: (0.55, 0.45), 42: (0.55, 0.6), 43: (0.6, 0.65), 44: (0.65, 0.65), 45: (0.6, 0.55),
    46: (0.55, 0.5), 47: (0.65, 0.6), 48: (0.55, 0.55), 49: (0.6, 0.4), 50: (0.6, 0.35),
    51: (0.55, 0.4), 52: (0.65, 0.45), 53: (0.65, 0.35), 54: (0.55, 0.35), 55: (0.7, 0.4),
    56: (0.65, 0.3), 57: (0.6, 0.3), 58: (0.7, 0.25), 59: (0.65, 0.25), 60: (0.55, 0.3),
    61: (0.75, 0.2), 62: (0.5, 0.1), 63: (0.45, 0.05), 64: (0.15, 0.2)
}

class EEGDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

class SimpleEEGModel(nn.Module):
    def __init__(self, input_features):
        super(SimpleEEGModel, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(input_features, 64),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),

            nn.Linear(64, 32),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.5),

            nn.Linear(32, 16),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.4),

            nn.Linear(16, 1),
            nn.Sigmoid()
        )

        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.0)

    def forward(self, x):
        return self.model(x).squeeze(1)

def augment_data(X, y, noise_levels=[0.01, 0.02, 0.05], num_per_level=1):
    X_aug, y_aug = X.copy(), y.copy()

    for noise_level in noise_levels:
        for _ in range(num_per_level):
            noise = np.random.normal(0, noise_level, X.shape)
            X_noisy = X + noise

            X_aug = np.vstack((X_aug, X_noisy))
            y_aug = np.append(y_aug, y)

            # Add horizontal flips for selected features
            if X.shape[1] > 10:  # Only if we have enough features
                flip_indices = np.random.choice(X.shape[1], size=int(X.shape[1]*0.1), replace=False)
                X_flipped = X.copy()
                X_flipped[:, flip_indices] = -X_flipped[:, flip_indices]
                X_aug = np.vstack((X_aug, X_flipped))
                y_aug = np.append(y_aug, y)

    # Mix samples within same class (mixup-like augmentation)
    for c in np.unique(y):
        class_indices = np.where(y == c)[0]
        if len(class_indices) >= 2:
            for _ in range(min(10, len(class_indices))):
                idx1, idx2 = np.random.choice(class_indices, 2, replace=False)
                alpha = np.random.beta(0.4, 0.4)
                mixed_sample = alpha * X[idx1] + (1 - alpha) * X[idx2]
                X_aug = np.vstack((X_aug, mixed_sample.reshape(1, -1)))
                y_aug = np.append(y_aug, c)

    return X_aug, y_aug

def train_and_evaluate():
    print("Loading data...")
    df = pd.read_csv('/eeg data.csv')

    X = df.iloc[:, 1:-1].values
    y = df.iloc[:, -1].values
    y = y.astype(np.float32)

    print("Performing feature selection...")
    selector = SelectKBest(f_classif, k=40)  # Reduced from 200 to 40 features
    X_selected = selector.fit_transform(X, y)
    selected_indices = selector.get_support(indices=True)

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_selected)

    X_aug, y_aug = augment_data(X_scaled, y, noise_levels=[0.01, 0.03, 0.05, 0.08, 0.1], num_per_level=2)

    n_splits = 5
    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    fold_results = []

    val_fold_predictions = np.zeros((len(X_scaled), n_splits))
    oof_indices = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X_scaled, y)):
        print(f"\nTraining fold {fold+1}/{n_splits}")

        # Split data
        X_train, X_val = X_aug[train_idx], X_scaled[val_idx]
        y_train, y_val = y_aug[train_idx], y[val_idx]

        # Store indices for OOF predictions
        oof_indices.append(val_idx)

        train_dataset = EEGDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
        val_dataset = EEGDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val))

        # Handle class imbalance with weighted sampling
        class_counts = np.bincount(y_train.astype(int))
        class_weights = 1. / torch.tensor(class_counts, dtype=torch.float)
        sample_weights = class_weights[y_train.astype(int)]
        sampler = WeightedRandomSampler(weights=sample_weights,
                                        num_samples=len(sample_weights),
                                        replacement=True)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

        # Initialize simpler model
        model = SimpleEEGModel(input_features=X_train.shape[1]).to(device)

        # Define loss and optimizer
        criterion = nn.BCEWithLogitsLoss()  # More numerically stable than BCE
        optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-2)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

        # Training loop
        best_val_loss = float('inf')
        early_stop_counter = 0
        early_stop_patience = 30
        val_predictions = np.zeros(len(X_val))

        for epoch in range(EPOCHS):
            # Training phase
            model.train()
            train_loss = 0.0

            for inputs, targets in train_loader:
                inputs, targets = inputs.to(device), targets.to(device)

                # Forward pass
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                # L1 regularization
                l1_lambda = 1e-5
                l1_norm = sum(p.abs().sum() for p in model.parameters())
                loss = loss + l1_lambda * l1_norm

                # Backward pass and optimize
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()

                train_loss += loss.item()

            # Learning rate scheduler step
            scheduler.step()

            # Validation phase
            model.eval()
            val_loss = 0.0
            all_preds = []
            all_targets = []
            fold_val_preds = []

            with torch.no_grad():
                for inputs, targets in val_loader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    val_loss += loss.item()

                    # Store raw predictions for later ensemble
                    fold_val_preds.extend(outputs.cpu().numpy())

                    # Convert outputs to binary predictions
                    preds = (outputs > 0.5).float()
                    all_preds.extend(preds.cpu().numpy())
                    all_targets.extend(targets.cpu().numpy())

            # Store predictions for this fold
            val_predictions = np.array(fold_val_preds)
            val_fold_predictions[val_idx, fold] = val_predictions

            # Calculate metrics
            accuracy = accuracy_score(all_targets, all_preds)
            f1 = f1_score(all_targets, all_preds, zero_division=1)

            # Print progress
            if (epoch + 1) % 10 == 0:
                print(f"Epoch {epoch+1}/{EPOCHS}, Train Loss: {train_loss/len(train_loader):.4f}, "
                      f"Val Loss: {val_loss/len(val_loader):.4f}, Accuracy: {accuracy:.4f}, F1: {f1:.4f}")

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                early_stop_counter = 0
                torch.save(model.state_dict(), f'best_model_fold{fold+1}.pt')
            else:
                early_stop_counter += 1
                if early_stop_counter >= early_stop_patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break

        # Load best model for evaluation
        model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pt'))
        model.eval()

        # Final evaluation on validation set
        all_preds = []
        all_targets = []

        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                preds = (outputs > 0.5).float()
                all_preds.extend(preds.cpu().numpy())
                all_targets.extend(targets.cpu().numpy())

        # Calculate metrics
        accuracy = accuracy_score(all_targets, all_preds)
        f1 = f1_score(all_targets, all_preds, zero_division=1)
        conf_matrix = confusion_matrix(all_targets, all_preds)

        print(f"\nFold {fold+1} Results:")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print(f"Confusion Matrix:\n{conf_matrix}")

        fold_results.append({
            'fold': fold + 1,
            'accuracy': accuracy,
            'f1': f1
        })

    # Compute OOF predictions
    oof_preds = np.zeros(len(X_scaled))
    for fold, val_idx in enumerate(oof_indices):
        oof_preds[val_idx] = val_fold_predictions[val_idx, fold]

    # Convert to binary predictions
    oof_preds_binary = (oof_preds > 0.5).astype(float)

    # Calculate OOF metrics
    oof_accuracy = accuracy_score(y, oof_preds_binary)
    oof_f1 = f1_score(y, oof_preds_binary, zero_division=1)

    # Print overall results
    accuracies = [result['accuracy'] for result in fold_results]
    f1_scores = [result['f1'] for result in fold_results]

    print("\nOverall Results:")
    print(f"Mean CV Accuracy: {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
    print(f"Mean CV F1 Score: {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
    print(f"OOF Accuracy: {oof_accuracy:.4f}")
    print(f"OOF F1 Score: {oof_f1:.4f}")

    # Create ensemble of best models
    print("\nCreating ensemble of models...")
    ensemble_preds = np.zeros(len(y))

    # Use the entire dataset for final prediction
    full_dataset = EEGDataset(torch.FloatTensor(X_scaled), torch.FloatTensor(y))
    full_loader = DataLoader(full_dataset, batch_size=BATCH_SIZE, shuffle=False)

    for fold in range(n_splits):
        # Load model
        model = SimpleEEGModel(input_features=X_scaled.shape[1]).to(device)
        model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pt'))
        model.eval()

        # Get predictions
        fold_preds = []
        with torch.no_grad():
            for inputs, _ in full_loader:
                inputs = inputs.to(device)
                outputs = model(inputs)
                preds = outputs.cpu().numpy()
                fold_preds.extend(preds)

        # Add to ensemble predictions
        ensemble_preds += np.array(fold_preds)

    # Average predictions and convert to binary
    ensemble_preds /= n_splits
    ensemble_preds_binary = (ensemble_preds > 0.5).astype(float)

    # Calculate ensemble metrics
    ensemble_accuracy = accuracy_score(y, ensemble_preds_binary)
    ensemble_f1 = f1_score(y, ensemble_preds_binary, zero_division=1)
    ensemble_conf_matrix = confusion_matrix(y, ensemble_preds_binary)

    print("\nEnsemble Model Results:")
    print(f"Accuracy: {ensemble_accuracy:.4f}")
    print(f"F1 Score: {ensemble_f1:.4f}")
    print(f"Confusion Matrix:\n{ensemble_conf_matrix}")

    return ensemble_accuracy, ensemble_f1

if __name__ == "__main__":
    start_time = time.time()
    acc, f1 = train_and_evaluate()
    end_time = time.time()

    print(f"\nTotal execution time: {(end_time - start_time) / 60:.2f} minutes")
    print(f"Final Accuracy: {acc:.4f}")
    print(f"Final F1 Score: {f1:.4f}")

Using device: cuda
Loading data...
Performing feature selection...

Training fold 1/5
Epoch 10/100, Train Loss: 0.6991, Val Loss: 0.7322, Accuracy: 0.3750, F1: 0.0000
Epoch 20/100, Train Loss: 0.6761, Val Loss: 0.7308, Accuracy: 0.3750, F1: 0.0000
Epoch 30/100, Train Loss: 0.7162, Val Loss: 0.7280, Accuracy: 0.3750, F1: 0.0000
Epoch 40/100, Train Loss: 0.7908, Val Loss: 0.7285, Accuracy: 0.3750, F1: 0.0000
Epoch 50/100, Train Loss: 0.7082, Val Loss: 0.7274, Accuracy: 0.3750, F1: 0.0000
Epoch 60/100, Train Loss: 0.7047, Val Loss: 0.7273, Accuracy: 0.3750, F1: 0.0000
Epoch 70/100, Train Loss: 0.7275, Val Loss: 0.7272, Accuracy: 0.3750, F1: 0.0000
Epoch 80/100, Train Loss: 0.7037, Val Loss: 0.7273, Accuracy: 0.3750, F1: 0.0000
Epoch 90/100, Train Loss: 0.6925, Val Loss: 0.7274, Accuracy: 0.3750, F1: 0.0000
Early stopping at epoch 95

Fold 1 Results:
Accuracy: 0.3750
F1 Score: 0.0000
Confusion Matrix:
[[3 1]
 [4 0]]

Training fold 2/5
Epoch 10/100, Train Loss: 0.7697, Val Loss: 0.7279, Acc