In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
import os, os.path

In [None]:
"""
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
"""

'\nif torch.cuda.is_available():\n  device = torch.device("cuda")\nelse:\n  device = torch.device("cpu")\n'

In [None]:
device = torch.device("cpu")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
patients = ['01575', '01828', '01709', '01870', '01842', '01844', '01097', '00182', '01676', '01849', '01808', '00501', '00172', '01763', '00502']

In [None]:
class Patient():
  def __init__(self, id, mode= "Train", max_segments = -1):
    self.fields = ["BVP", "EDA", "HR", "TEMP","Mag", "x", "y", "z"]
    self.id = id
    self.targets = self.getPatientTarget()

    #if max segments is set to -1 load all segments
    self.max_segments = max_segments
    self.length = self.getNumberOfSegments(id)
    if self.max_segments != -1:
      self.length = min(self.max_segments, self.length)

    self.segments = np.zeros((self.length, 8, 7679))

    if mode == "Train":
      self.loadSegments()

  def __len__(self):
    return self.length

  def getNumberOfSegments(self, patient_id):
    #get number of segments for a given patient
    path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/segments/x/'.format(patient_id)
    length = 0
    try:
      length = len([name for name in os.listdir(path)])
    except:
      print("missing data for patient: {}".format(patient_id))
    return length

  def getPatientData(self, field, number):
    path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/segments/{}/segment{}.parquet'.format(self.id, field, number)

    return pd.read_parquet(path, engine="auto")[field].to_numpy()

  def getPatientTarget(self):
    path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/MSEL_{}_labels_split.csv'.format(self.id, self.id)

    df = pd.read_csv(path)
    df = df.drop(columns=["name", "study_start_time", "study_duration", "label_start_time", "label_duration", "label_note"])

    return df["label"].to_numpy()

  def loadSegments(self):
    print("loading data for patient: {}".format(self.id))
    for i in range(self.length):
      for j in range(len(self.fields)):
        try:
          self.segments[i, j, :] = self.getPatientData(self.fields[j], i)[:7679]
        except:
          print("whoops! Patient data for field {} and segment {} is too small.".format(self.fields[j], i))

  def getTarget(self, number):
    return self.targets[number]

  def getTargetRange(self, i, j):
    return self.targets[i:j]

In [None]:
class Patients():
  def __init__(self, patient_ids, seq_len=10, seq_step=10, mode="Train", max_segments=-1):
    self.patients = {}
    self.seq_len = seq_len
    self.seq_step = seq_step
    
    # patient_ids: list of patients by ids
    print("List of patients received: {}".format(patient_ids))
    self.patient_ids = self.filterPatients(patient_ids)
    self.patient_ids.sort()
    print("List of patients filtered: {}".format(self.patient_ids))

    for patient_id in self.patient_ids:
      self.patients[patient_id] = Patient(patient_id, mode=mode, max_segments=max_segments)

    self.length = self.getNumberOfSegmentsForAllPatients(self.patients)


    self.patient_lengths = [self.patients[patient_id].length - (self.seq_len-1) for patient_id in self.patient_ids]

    self.patient_accumulated_lengths = [0]
    for patient_length in self.patient_lengths:
      self.patient_accumulated_lengths.append(patient_length + self.patient_accumulated_lengths[-1])

    print(self.patient_accumulated_lengths)

  

  def __len__(self):
    return self.length

  def getNumberOfSegmentsForAllPatients(self, patients):
    length = 0

    for id in patients:
      length += self.getPatient(id).length

    return length

  def getPatient(self, id):
    return self.patients[id]

  def getNumberOfPatients(self):
    return len(self.patients)

  def filterPatients(self, patients):
    filtered_list = []

    for patient_id in patients:
      path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/segments/x/segment59.parquet'.format(patient_id)
      if os.path.isfile(path):
        filtered_list.append(patient_id)

    return filtered_list

  def getPatientIndex(self, value):
    #returns the patient index

    value = value * self.seq_step

    for i in range(1, len(self.patient_accumulated_lengths)):
      if value < self.patient_accumulated_lengths[i]:
        return i - 1

    print("Something weird might be happening. Value: {}".format(value))
    return -1



In [None]:
class TimeseriesDataset(torch.utils.data.Dataset):   
  def __init__(self, patients, seq_len=10, seq_step=10, mode="Train", max_segments_per_patient=-1):
    self.patients = Patients(patients, seq_len, seq_step, mode, max_segments_per_patient)
    self.seq_len = seq_len
    self.seq_step = seq_step
    self.fields = ["BVP", "EDA", "HR", "TEMP","Mag", "x", "y", "z"]

  def __len__(self):
    return (self.patients.length - (self.seq_len-1) * self.patients.getNumberOfPatients()) // self.seq_step

  def __getitem__(self, relative_index):
    patient_index = self.patients.getPatientIndex(relative_index)
    patient_accumulated = self.patients.patient_accumulated_lengths[patient_index]

    start = relative_index * self.seq_step - patient_accumulated
    end = start + self.seq_len

    patient_id = self.patients.patient_ids[patient_index]
    patient = self.patients.getPatient(patient_id)
    
    sequence = patient.segments[start:end, :, :]

    return (sequence, patient.getTargetRange(start, end))

In [None]:
train_dataset = TimeseriesDataset(patients, seq_len=10, seq_step=5, mode="Train")
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 64, shuffle = False)

List of patients received: ['01575', '01828', '01709', '01870', '01842', '01844', '01097', '00182', '01676', '01849', '01808', '00501', '00172', '01763', '00502']
List of patients filtered: ['00172', '00182', '01097', '01575', '01709', '01808', '01842', '01844', '01870']
loading data for patient: 00172
loading data for patient: 00182
loading data for patient: 01097
loading data for patient: 01575
loading data for patient: 01709
loading data for patient: 01808


In [None]:
for i, d in enumerate(train_loader):
    #d[0] is x. shape: (64, 60, 8, 7679) = (batch_size, segment_size, fields, # rows in a segment)
    #d[1] is y. shape: (64, 60) (batch size, segment size)
    sequences, labels = d
    print("Interictal count: {}".format(np.count_nonzero(labels == 2)))
    print("Preictal count: {}".format(np.count_nonzero(labels == 1)))
    print("Ictal count: {}".format(np.count_nonzero(labels == 0)))

Interictal count: 629
Preictal count: 11
Ictal count: 0
Interictal count: 525
Preictal count: 2
Ictal count: 113
Interictal count: 100
Preictal count: 0
Ictal count: 0


In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Set initial hidden and cell states 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        #print(h0.shape)
        #print(c0.shape)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        #print('out shape: {}'.format(out.shape))
        #print('out shape transformed: {}'.format(out[:, -1, :].shape))
        # Decode the hidden state of the last time step
        out = self.fc(out)
        return out

model = RNN(input_size=61432, hidden_size=128, num_layers=4, num_classes=1).to(device)

In [None]:
learning_rate = 0.001
num_epochs = 20
sequence_length = 60
input_size = 8

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (sequence, labels) in enumerate(train_loader):
        labels = labels[:, -1]
        labels = labels.long().to(device)
        labels = labels[:, None]
        #labels = torch.nn.functional.one_hot(labels, num_classes = 3)
        #print(labels)

        sequence = torch.nan_to_num(sequence, nan=-1.0)
        sequence = torch.flatten(sequence, start_dim=2).float().to(device)
        # Forward pass
        outputs = model(sequence)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        
    print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
            .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/20], Step [3/3], Loss: 2.2643
Epoch [2/20], Step [3/3], Loss: 2.1364
Epoch [3/20], Step [3/3], Loss: 1.8506
Epoch [4/20], Step [3/3], Loss: 1.8269
Epoch [5/20], Step [3/3], Loss: 1.8384
Epoch [6/20], Step [3/3], Loss: 1.6226
Epoch [7/20], Step [3/3], Loss: 1.5162
Epoch [8/20], Step [3/3], Loss: 1.4920
Epoch [9/20], Step [3/3], Loss: 1.4110
Epoch [10/20], Step [3/3], Loss: 1.3723
Epoch [11/20], Step [3/3], Loss: 1.2744
Epoch [12/20], Step [3/3], Loss: 1.1487
Epoch [13/20], Step [3/3], Loss: 0.9377
Epoch [14/20], Step [3/3], Loss: 0.7127
Epoch [15/20], Step [3/3], Loss: 0.5365
Epoch [16/20], Step [3/3], Loss: 0.4190
Epoch [17/20], Step [3/3], Loss: 0.3417
Epoch [18/20], Step [3/3], Loss: 0.2456
Epoch [19/20], Step [3/3], Loss: 0.1937
Epoch [20/20], Step [3/3], Loss: 0.1271


In [None]:
test_patients = ['00182']
test_dataset = TimeseriesDataset(test_patients, seq_len=10, seq_step=5, mode="Train", max_segments_per_patient = 240)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 64, shuffle = False)

List of patients received: ['00182']
List of patients filtered: ['00182']
loading data for patient: 00182
[0, 231]


In [None]:
# Test the model
model.eval()
with torch.no_grad():
  correct = 0
  total = 0
  for sequence, labels in test_loader:
    sequence = torch.nan_to_num(sequence, nan=-1.0)
    sequence = torch.flatten(sequence, start_dim=2).float().to(device)
    
    labels = labels[:, -1]
    labels = labels.long().to(device)
    labels = labels[:, None]

    outputs = model(sequence)
    _, predicted = torch.max(outputs.data, 1)
    print(predicted)
    print(labels)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    

  print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')


tensor([[2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [1],
        [2],
        [2],
        [2],
        [1],
        [2],
        [2],
        [2],
        [2]])
tensor([[2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2]