In [2]:
import mne
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader,Subset
from torch import Tensor
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, auc, roc_curve
import matplotlib.pyplot as plt
from sklearn.preprocessing import label_binarize
import json
import time
from einops.layers.torch import Rearrange, Reduce
from einops import rearrange

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

mne.set_log_level('ERROR')
mne.cuda.init_cuda(verbose=True)

CUDA not enabled in config, skipping initialization


In [3]:
# Used to read our json file of each dictionary mapped to it's file name
def read_dict_from_json_file(filepath):
    with open(filepath, 'r') as file:
        return json.load(file)

In [18]:
class CHBData(Dataset):
    def __init__(self, alpha=0.1):
        
        self.segment_dict = read_dict_from_json_file('\CHB-MIT\Segment_dicts\segment_dict_laptop.json')
        # Mapping label names to integers
        self.label_to_int = {'interictal': 0, 'preictal': 1, 'ictal': 2}
        self.alpha = alpha  # Smoothing factor for EMA. Smaller alpha places greater importance on more recent elements
        # Initialize EMA and variance (for standard deviation calculation) for each channel
        self.EMA = {}
        self.variance = {}
        # Initial values could be adjusted based on dataset characteristics
        self.initialized_channels = False

    def __len__(self):
        return len(self.segment_dict)

    def __getitem__(self, index):
        segment_filepath = f'CHB-MIT/Segments/chb01/{index}-chb01.pt'
        segment = torch.load(segment_filepath)
        segment = segment.to(device)
        segment = self.pad_sequence(segment)
        
        # Ensure EMA and variance are initialized
        if not self.initialized_channels:
            self.initialize_ema_variance(segment.shape[1])
            self.initialized_channels = True

        # Computes z-normalization based on Exponential moving average
        self.ema_normalize(segment)

        # Retrieve the label and map it to an integer
        label_name = self.segment_dict[segment_filepath]
        label = self.label_to_int[label_name]
        
        segment = segment.float()  # Assuming you're using a device in the outer scope

        return segment, label

    def pad_sequence(self, segment):
        total_padding = 28 - segment.shape[1]
        pad_top = total_padding // 2
        pad_bottom = total_padding - pad_top
        padded_tensor = F.pad(segment, (0, 0, pad_top, pad_bottom), mode='constant', value=0)
        return padded_tensor

    def initialize_ema_variance(self, num_channels):
        for i in range(num_channels):
            self.EMA[i] = None
            self.variance[i] = None

    def ema_normalize(self, segment):
        # Update EMA and standard deviation for each channel and normalize
        for i in range(segment.shape[1]):  
            channel_data = segment[0, i, :] # select each channel
            if self.EMA[i] is None:  # First update
                self.EMA[i] = channel_data.mean()
                self.variance[i] = channel_data.var()
            else:
                #update EMA and variance based
                self.EMA[i] = self.alpha * channel_data.mean() + (1 - self.alpha) * self.EMA[i]
                self.variance[i] = self.alpha * ((channel_data - self.EMA[i])**2).mean() + (1 - self.alpha) * self.variance[i]

            std = torch.sqrt(self.variance[i])
            # Normalize this channel
            segment[0,i, :] = (channel_data - self.EMA[i]) / std

In [19]:
num_workers = 0
full_dataset = CHBData()

# Split the dataset into train and test sets
train_indices, test_indices = train_test_split(range(len(full_dataset)), test_size=0.3, random_state=42)

#  split the test set into validation and actual test sets
val_indices, test_indices = train_test_split(test_indices, test_size=0.5, random_state=42)

# dataloaders for training, validation, and test
train_dataset = Subset(full_dataset, train_indices)
val_dataset = Subset(full_dataset, val_indices)
test_dataset = Subset(full_dataset, test_indices)

# Instantiate dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=num_workers, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=num_workers, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=num_workers, drop_last=True)
#training_dataset = CHBData(segment_dict)
#mean, std = calculate_mean_std(training_dataset)

FileNotFoundError: [Errno 2] No such file or directory: '\\CHB-MIT\\Segment_dicts\\segment_dict_laptop.json'

In [29]:
num_workers = 0
full_dataset = CHBData(segment_dict)

# Split the dataset into train and test sets
train_indices, val_indices = train_test_split(range(len(full_dataset)), test_size=0.3, random_state=42)

#  split the test set into validation and actual test sets
train_indices, val_indices = train_test_split(range(len(full_dataset)), test_size=0.3, random_state=42)


# dataloaders for training, validation, and test
train_dataset = Subset(full_dataset, train_indices)
val_dataset = Subset(full_dataset, val_indices)

# Instantiate dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=num_workers, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=num_workers, drop_last=True)


In [21]:
class SeizureSense(nn.Module):
    
    def __init__(self):
        super(SeizureSense,self).__init__()
        
        #Block 1
        #should be taking in an input of 23x512
        #first layer temporal filters
        self.conv1=nn.Conv2d(1,8,(1,128),stride=1,padding=0)
        self.batchnorm1 = nn.BatchNorm2d(8, False)
        
        #spatial layer(depthwise layer)
        self.conv2_23=nn.Conv2d(8,32,(28,1))
        self.batchnorm2 = nn.BatchNorm2d(32, False)
        self.avgpool1 = nn.AvgPool2d((1,2))
        #apply dropout here in forward
        
        
        #Block 2
        #sepereable convolutional 2d
        self.conv3=nn.Conv2d(32,32,(1,16),stride=1)
        self.batchnorm3 = nn.BatchNorm2d(32,False) #CHECK THIS!
        self.avgpool2 = nn.AvgPool2d((1,16))
        
     
        #Block 3
        
        self.fc1= nn.Linear(96, 30)
        self.fc2=nn.Linear(30,3)

        #apply dropout here in forward
        self.dropout = nn.Dropout(0.25)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.elu(x)
        x = self.batchnorm1(x)
        x = self.dropout(x)    
        x = self.conv2_23(x)
        x = F.elu(x)
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.avgpool1(x)
        
        x = F.elu(self.conv3(x))
        x = self.batchnorm3(x)
        x = self.dropout(x)
        x = self.avgpool2(x)
        x = x.view(x.size(0), -1)  
        x = self.fc1(x)
        x = F.elu(x)
        x = torch.sigmoid(self.fc2(x))  # Use torch.sigmoid instead of F.sigmoid (deprecated)
        return x

In [31]:
model = SeizureSense()
model.to(device)

<All keys matched successfully>

In [1]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 5
num_classes = 3
num_patients = 16
losses = []

for epoch in range(num_epochs):
    for patient_number in range(1,num_patients):
        #ensures correct formatting for dataset
        if patient_number < 10:
            full_dataset = CHBData(f"0{patient_number}")
        else:
            full_dataset = CHBData(patient_number)

        # Split the dataset into train and test sets
        total_samples = len(full_dataset)
        split_point = int(total_samples * 0.7)  # For a 70-30 split

        train_indices = list(range(0, split_point))
        val_indices = list(range(split_point, total_samples))

        # dataloaders for training and validation
        train_dataset = Subset(full_dataset, train_indices)
        val_dataset = Subset(full_dataset, val_indices)

        # Instantiate dataloaders
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=0, drop_last=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, drop_last=True)
        start_time = time.time()  # Start timing
        
        model.train()
        for inputs, labels in train_loader:
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            aggregated_outputs = outputs.mean(dim=1)
            labels = labels.float()
            loss = criterion(aggregated_outputs, labels)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()

        # Validation Phase
        model.eval()
        val_labels = []
        val_probabilities = []  # Store probabilities for AUROC calculation
        val_predictions = []
        with torch.no_grad():
            for inputs, labels in val_loader:
                labels = labels.to(device)
                logits = model(inputs)
                aggregated_logits = logits.mean(dim=1)
                outputs = F.softmax(aggregated_logits, dim=1)

                positive_class_probabilities = outputs[:, 1]
                predictions = torch.max(outputs, 1)[1].cpu().numpy()  # Fixed to use argmax for multi-class

                val_labels.extend(labels.cpu().numpy())  # Convert to numpy array
                val_predictions.extend(predictions)
                val_probabilities.extend(outputs.cpu().numpy())  # Convert to numpy array

        # Calculate metrics
        accuracy = accuracy_score(val_labels, val_predictions)
        recall = recall_score(val_labels, val_predictions, average=None, zero_division=0)  # Adjusted for multi-class
        # Calculate AUROC
        # For multi-class AUROC, adjust according to your class strategy
        val_labels_binarized = label_binarize(val_labels, classes=np.unique(val_labels))
        auroc = roc_auc_score(val_labels_binarized, np.array(val_probabilities), multi_class='ovr')  # Adjusted for multi-class

        # Timing end and calculate duration
        end_time = time.time()
        epoch_duration = end_time - start_time

        print(f'Epoch {epoch+1}/{num_epochs} - Loss: {np.mean(losses):.4f}, Accuracy: {accuracy:.4f}, Recall: {recall.mean():.4f}, AUROC: {auroc:.4f}, Time: {epoch_duration:.2f}s')

        # Reset losses for next epoch
        losses = []

NameError: name 'nn' is not defined

In [35]:
accuracy = accuracy_score(val_labels, val_predictions)
recall = recall_score(val_labels,val_predictions, average=None)[1]
print(accuracy)
print(recall)

0.027023940058479533
1.0


In [44]:
model.eval()
val_labels = []
val_probabilities = []  # Store probabilities for AUROC calculation
val_predictions = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.float().to(device), labels.to(device)
        logits = model(inputs)
        outputs = F.softmax(logits, dim=1)            
        
        positive_class_probabilities = outputs[:, 1]
        predictions = (positive_class_probabilities.cpu().numpy() > optimal_threshold).astype(int)
        
        val_labels.extend(labels.cpu().numpy())  # Convert to numpy array
        val_predictions.extend(predictions)
        val_probabilities.extend(outputs.cpu().numpy())  # Convert to numpy array


In [45]:
accuracy = accuracy_score(val_labels, val_predictions)
recall = recall_score(val_labels,val_predictions, average=None)[1]

print(accuracy)
print(recall)

0.906889619883041
0.9812925170068028


In [50]:
print(len(val_predictions))
print(len(val_labels))

21888
21888


In [53]:
df = pd.DataFrame({"predictions":val_predictions, "labels":val_labels})
df.to_csv("sdfsdoodfio.csv",index=False)