# CNN classification

# Imports

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
import numpy as np
import h5py

# Define file path and read h5 data provided in project 3

In [None]:
h5_path = 'Part1SubjectHB10.h5' # File path to the h5 file that contains the data to process

f = h5py.File(h5_path, 'r') # using the library allows h5 to be read in as a dictionary

# Read the sampling frequency
fs = f.attrs['fs'][0]
print("Sampling rate: %.1f Hz" % (fs))

# Read states
states = []
for name, grp in f.items():
    states.append(name)
    print("State: %s" % (name))
    print("Segment IDs:", list(grp.keys()))

# Extract LFP segments
lfp = {key: [] for key in states}
for key in states:
    group = f[key]
    n = len(group)
    for i in range(n):
        lfp[key].append(group[str(i+1)][()].astype(float))

# Combine signals and labels
all_signals = lfp['NREM'] + lfp['WAKE']
all_labels = [0] * len(lfp['NREM']) + [1] * len(lfp['WAKE'])

# Find the maximum length of the signals
max_length = max(signal.shape[0] for signal in all_signals)

# Pad or trunc all signals to the same length
padded_signals = [np.pad(signal, (0, max_length - signal.shape[0]), mode='constant') if signal.shape[0] < max_length else signal[:max_length] for signal in all_signals]

# Stack the signals and convert labels to a NumPy array
signals = np.stack(padded_signals)
labels = np.array(all_labels)

print("Signals shape:", signals.shape)
print("Labels shape:", labels.shape)


Sampling rate: 1000.0 Hz
State: NREM
Segment IDs: ['1', '10', '11', '12', '13', '14', '15', '16', '17', '2', '3', '4', '5', '6', '7', '8', '9']
State: WAKE
Segment IDs: ['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '4', '5', '6', '7', '8', '9']
Signals shape: (55, 285000)
Labels shape: (55,)


In [None]:
class H5SignalDataset(Dataset):
    def __init__(self, signals, labels):
        self.data = torch.tensor(signals, dtype=torch.float32)
        if len(self.data.shape) == 2:
            self.data = self.data.unsqueeze(1)  # (N, 1, Length) if necessary
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self): # method to return length of dataset data passed in
        return len(self.labels)

    def __getitem__(self, idx): # method to return the data and label at the index passed in
        return self.data[idx], self.labels[idx]

full_dataset = H5SignalDataset(signals, labels) # create the ds obj

train_size = int(0.8 * len(full_dataset)) # 80% of the data for training
val_size = len(full_dataset) - train_size # 20% of the data for validation
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) # split the dataset into training and validation sets

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) # create the data loader for the training set
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False) # create the data loader for the validation set


In [None]:
class SignalCNN(nn.Module):
    def __init__(self, input_length): # constructor for the CNN model
        super().__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=5, stride=1, padding=2)
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=5, stride=1, padding=2)
        self.pool2 = nn.MaxPool1d(kernel_size=2)
        
        feature_length = input_length // 2 // 2
        self.fc1 = nn.Linear(32 * feature_length, 64)
        self.fc2 = nn.Linear(64, 2)

    def forward(self, x): #Forward pass through the model to calculate the output using RELU activation function
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
if len(signals.shape) == 3:
    input_length = signals.shape[2]
else:
    input_length = signals.shape[1]

model = SignalCNN(input_length) # instantiate the model 

criterion = nn.CrossEntropyLoss() # loss function
optimizer = optim.Adam(model.parameters(), lr=0.001) # optimizer

epochs = 5 # number of iterations for training the model
train_losses = [] 
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch in range(epochs): # loop through the number of epoches to train the model and create the loss and accuracy scores
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for signals, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(signals)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = correct / total
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for signals, labels in val_loader:
            outputs = model(signals)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = val_loss / len(val_loader)
    val_acc = correct / total
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

Epoch 1/5, Train Loss: 73126.2297, Val Loss: 38434.8008, Train Acc: 0.5227, Val Acc: 0.1818
Epoch 2/5, Train Loss: 23603.7729, Val Loss: 12546.4160, Train Acc: 0.4091, Val Acc: 0.8182
Epoch 3/5, Train Loss: 9898.1824, Val Loss: 10854.3281, Train Acc: 0.6591, Val Acc: 0.8182
Epoch 4/5, Train Loss: 3588.3537, Val Loss: 8240.7061, Train Acc: 0.7500, Val Acc: 0.8182
Epoch 5/5, Train Loss: 2037.0518, Val Loss: 6004.2559, Train Acc: 0.8409, Val Acc: 0.8182


# Conclusion:



The model performs well as the accuracy and training loss goes down which means it is generalizing well and not overfitting to the dataset. The input for the model is the raw data that was extracted from project 3 and used for training the model through 5 epoches. The CNN model using pytorch learns features from this dataset and is able to generate and predict accurately using the loss and validation set. During training it uses filters to find out patterns in the data and predict the new outcome. The forward pass allows the model to predict and adapt and the backwards allows it to change its weights if the error is way off.