In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
ranjittanneru_impulse_path = kagglehub.dataset_download('ranjittanneru/impulse')

print('Data source import complete.')


In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
train_path = "/kaggle/input/impulse/Impulse/EEG_Data/train_data"
latent_dim = 100  # Size of the latent space
input_dim = 19 * 500  # Flattened size of each EEG sample
num_classes = 4  # Number of classes
batch_size = 128  # Batch size for training
epochs = 1000  # Number of epochs
learning_rate = 0.0001  # Learning rate
beta = 0.1   # Weight for KL divergence in the loss

In [None]:
def normalize_data(data):
    min_val = np.min(data)
    max_val = np.max(data)
    return 2 * (data - min_val) / (max_val - min_val + 1e-8) - 1  # Normalize to [-1, 1]


In [None]:
# Class mapping
class_map = {
    "Normal": 0,
    "Complex_Partial_Seizures": 1,
    "Electrographic_Seizures": 2,
    "Video_detected_Seizures_with_no_visual_change_over_EEG": 3
}

In [None]:
train_data = []
train_labels = []

for class_name, class_label in class_map.items():
    class_folder = os.path.join(train_path, class_name)
    for file_name in os.listdir(class_folder):
        file_path = os.path.join(class_folder, file_name)
        signal = np.load(file_path)  # Assuming .npy files
        if signal.shape == (19, 500):  # Ensure correct shape
            normalized_signal = normalize_data(signal)
            train_data.append(normalized_signal.flatten())  # Flatten to (19 * 500,)
            train_labels.append(class_label)

In [None]:
# Convert to tensors
train_data = torch.tensor(np.array(train_data), dtype=torch.float32)
train_labels = torch.tensor(train_labels, dtype=torch.long)


In [None]:
class EEGDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


In [None]:
dataset = EEGDataset(train_data, train_labels)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)


In [None]:
class Generator(nn.Module):
    def __init__(self, z_dim, num_classes, input_dim):
        super(Generator, self).__init__()

        self.class_emb = nn.Sequential(
            nn.Linear(num_classes, 16),
            nn.LeakyReLU(0.2, inplace=True)
        )

        input_dim = z_dim + 16
        self.net = nn.Sequential(
        nn.Linear(input_dim, 256),
        nn.BatchNorm1d(256),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Linear(256, 512),
        nn.BatchNorm1d(512),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Linear(512, 19 * 500),
        nn.Tanh()
        )

    def forward(self, z, labels):
        class_emb = self.class_emb(labels)
        x = torch.cat([z, class_emb], dim=1)
        out = self.net(x)
        return out


In [None]:
class Discriminator(nn.Module):
    def __init__(self, num_classes, input_dim):
        super(Discriminator, self).__init__()

        self.class_emb = nn.Sequential(
            nn.Linear(num_classes, 16),
            nn.LeakyReLU(0.2, inplace=True)
        )

        # Corrected input_dim calculation
        input_dim = 19 * 500 + 16  # EEG flattened size + class embedding size
        self.net = nn.Sequential(
        nn.Linear(input_dim, 512),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Dropout(0.3),
        nn.Linear(512, 256),
        nn.LeakyReLU(0.2, inplace=True),
        nn.Dropout(0.3),
        nn.Linear(256, 1),
        nn.Sigmoid()
        )


    def forward(self, x, labels):
        # x: (batch_size, 19 * 500)
        class_emb = self.class_emb(labels)  # (batch_size, 16)
        combined = torch.cat([x, class_emb], dim=1)  # Concatenate along feature dimension
        validity = self.net(combined)
        return validity


In [None]:
criterion = nn.BCELoss()


In [None]:
# Instantiate models
generator = Generator(latent_dim, num_classes, input_dim).to(device)
discriminator = Discriminator(num_classes, input_dim).to(device)

# Optimizers
optimizer_G = optim.Adam(generator.parameters(), lr=learning_rate*5, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=learning_rate, betas=(0.5, 0.999))




In [None]:

# ----------------------------
#  Training Loop
# ----------------------------
for epoch in range(epochs):
    epoch_d_loss = 0
    epoch_g_loss = 0
    num_batches = len(dataloader)

    for real_eeg, real_labels in dataloader:
        real_eeg = real_eeg.to(device)
        real_labels = torch.nn.functional.one_hot(real_labels, num_classes=num_classes).float().to(device)
        batch_size = real_eeg.size(0)

        # ---------------------
        #  Train Discriminator
        # ---------------------
        optimizer_D.zero_grad()

        # Real EEG
        real_targets = torch.ones(batch_size, 1).to(device)
        pred_real = discriminator(real_eeg, real_labels)
        loss_real = criterion(pred_real, real_targets)

        # Fake EEG
        z = torch.randn(batch_size, latent_dim).to(device)
        fake_labels = real_labels
        fake_eeg = generator(z, fake_labels)
        fake_targets = torch.zeros(batch_size, 1).to(device)
        pred_fake = discriminator(fake_eeg.detach(), fake_labels)
        loss_fake = criterion(pred_fake, fake_targets)

        # Combine & update
        d_loss = loss_real + loss_fake
        d_loss.backward()
        optimizer_D.step()

        # -----------------
        #  Train Generator
        # -----------------
        optimizer_G.zero_grad()

        pred_fake_for_g = discriminator(fake_eeg, fake_labels)
        g_loss = criterion(pred_fake_for_g, real_targets)
        g_loss.backward()
        optimizer_G.step()

        epoch_d_loss += d_loss.item()
        epoch_g_loss += g_loss.item()

    print(f"Epoch [{epoch}/{epochs}] | Avg D_loss: {epoch_d_loss/num_batches:.4f} | Avg G_loss: {epoch_g_loss/num_batches:.4f}")

In [None]:
import os
import numpy as np
import torch

# Assuming generator, latent_dim, num_classes, and device are already defined
output_dir = "./output"
os.makedirs(output_dir, exist_ok=True)

# Calculate class distribution in training data
class_counts = np.bincount(train_labels.numpy())
total_samples = len(train_labels)
class_ratios = class_counts / total_samples

# Set the total number of synthetic samples to generate
total_synthetic_samples = 5608  # Adjust as needed
synthetic_samples_per_class = (class_ratios * total_synthetic_samples).astype(int)

# Generate synthetic EEG data while maintaining class ratio
generator.eval()
for class_idx, num_samples in enumerate(synthetic_samples_per_class):
    if num_samples == 0:  # Skip classes with no samples
        continue

    z = torch.randn(num_samples, latent_dim).to(device)
    class_label = torch.zeros(num_samples, num_classes).to(device)
    class_label[:, class_idx] = 1  # One-hot encode the class label

    with torch.no_grad():
        synthetic_eeg = generator(z, class_label)

        # Save synthetic EEG data class-wise
        output_file = os.path.join(output_dir, f"synthetic_eeg_{class_idx}.npy")
        np.save(output_file, synthetic_eeg.cpu().numpy())
        print(f"Saved synthetic EEG data for class {class_idx} to {output_file}")


In [None]:
total_samples

In [None]:
synthetic_samples_per_class

In [None]:
# import numpy as np
# from scipy.linalg import sqrtm

# def calculate_fid(real_features, generated_features):
#     # Calculate mean and covariance of real features
#     mu_r = np.mean(real_features, axis=0)
#     sigma_r = np.cov(real_features, rowvar=False)

#     # Calculate mean and covariance of generated features
#     mu_g = np.mean(generated_features, axis=0)
#     sigma_g = np.cov(generated_features, rowvar=False)

#     # Calculate squared difference of means
#     diff = mu_r - mu_g
#     mean_diff = np.sum(diff**2)

#     # Compute square root of product of covariance matrices
#     covmean, _ = sqrtm(sigma_r @ sigma_g, disp=False)

#     # Handle numerical errors (non-positive semi-definite results)
#     if np.iscomplexobj(covmean):
#         covmean = covmean.real

#     # Calculate FID score
#     fid = mean_diff + np.trace(sigma_r + sigma_g - 2 * covmean)
#     return fid

# # Example usage
# # real_features: Extracted features from real EEG samples (numpy array)
# # generated_features: Extracted features from generated EEG samples (numpy array)

# fid_score = calculate_fid(real_features, generated_features)
# print(f"FID Score: {fid_score}")
