In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
import os, os.path

In [None]:
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")

In [None]:
device

device(type='cuda')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: ignored

In [None]:
patients = ['01575', '01828', '01709', '01870', '01842', '01844', '01097', '00182', '01676', '01849', '01808', '00501', '00172', '01763', '00502']


In [None]:
class Patient():
  def __init__(self, id):
    self.fields = ["BVP", "EDA", "HR", "TEMP","Mag", "x", "y", "z"]
    self.id = id
    self.length = self.getNumberOfSegments(id)
    self.targets = self.getPatientTarget()
    self.segments = np.zeros((self.length, 8, 7679))
    #self.loadSegments()

  def __len__(self):
    return self.length

  def getNumberOfSegments(self, patient_id):
    #get number of segments for a given patient
    path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/segments/x/'.format(patient_id)
    length = 0
    try:
      length = len([name for name in os.listdir(path)])
    except:
      print("missing data for patient: {}".format(patient_id))
    return length

  def getPatientData(self, field, number):
    path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/segments/{}/segment{}.parquet'.format(self.id, field, number)

    return pd.read_parquet(path, engine="auto")[field].to_numpy()

  def getPatientTarget(self):
    path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/MSEL_{}_labels_split.csv'.format(self.id, self.id)

    df = pd.read_csv(path)
    df = df.drop(columns=["name", "study_start_time", "study_duration", "label_start_time", "label_duration", "label_note"])

    return df["label"].to_numpy()

  def loadSegments(self):
    print("loading data for patient: {}".format(self.id))
    for i in range(self.length):
      for j in range(len(self.fields)):
        self.segments[i, j, :] = self.getPatientData(self.fields[j], i)

  def getTarget(self, i):
    return self.targets[i]

  def getTargetRange(self, i, j):
    return self.targets[i:j]

In [None]:
class Patients():
  def __init__(self, patient_ids, seq_len=60):
    self.patients = {}
    self.seq_len = seq_len
    
    # patients: list of patients
    self.patient_ids = self.filterPatients(patient_ids)
    self.patient_ids.sort()

    for patient_id in self.patient_ids:
      self.patients[patient_id] = Patient(patient_id)

    self.patient_lengths = [self.patients[patient_id].length - self.seq_len for patient_id in self.patient_ids]

    self.patient_indexes = [0]
  
    for patient_length in self.patient_lengths:
      self.patient_indexes.append(patient_length + self.patient_indexes[-1])

    self.patient_indexes.pop()
    print("patient indexes")
    print(self.patient_indexes)

    self.length = self.getNumberOfSegmentsForAllPatients(self.patients)
    print(self.length)

  def __len__(self):
    return self.length

  def getNumberOfSegmentsForAllPatients(self, patients):
    length = 0

    for id in patients:
      length += self.getPatient(id).length

    return length

  def getPatient(self, id):
    return self.patients[id]

  def getNumberOfPatients(self):
    return len(self.patients)

  def filterPatients(self, patients):
    filtered_list = []

    for patient_id in patients:
      path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/segments/x/segment59.parquet'.format(patient_id)
      if os.path.isfile(path):
        filtered_list.append(patient_id)

    return filtered_list

  def getIndex(self, target):
    if target == 0:
      return 0

    start = 0;
    end = len(self.patient_indexes) - 1
    # Minimum size of the array should be 1
    if (end == 0):
        return -1
    '''
    If target lies beyond the max element, than the index of strictly smaller
    value than target should be (end - 1)
    '''
    if (target > self.patient_indexes[end]):
        return end
   
    ans = -1;
    while (start <= end):
        mid = (start + end) // 2
   
        # Move to the left side if target is
        # smaller
        if (self.patient_indexes[mid] >= target):
            end = mid - 1
   
        # Move right side
        else:
            ans = mid
            start = mid + 1
 
    return ans

In [None]:
class TimeseriesDataset(torch.utils.data.Dataset):   
  def __init__(self, patients, seq_len=60):
    self.patients = Patients(patients)
    self.seq_len = seq_len
    self.fields = ["BVP", "EDA", "HR", "TEMP","Mag", "x", "y", "z"]

  def __len__(self):
    return self.patients.length - (self.seq_len-1) * self.patients.getNumberOfPatients()

  def __getitem__(self, relative_index):
    segment_index = self.patients.getIndex(relative_index)
    patient_accumulated = self.patients.patient_indexes[segment_index+1]
    patient_length = self.patients.patient_lengths[segment_index]

    start = patient_accumulated - patient_length
    end = start + self.seq_len

    patient_id = self.patients.patient_ids[segment_index]
    patient = self.patients.getPatient(patient_id)

    print(" relative index: {} \n segment index: {}\n patient cumulative: {} \n patient length: {}".format(
        relative_index, segment_index, patient_accumulated, patient_length))
    """
    segment_list = np.zeros((self.seq_len, 8, 7679))
    for i in range(start, end):
      for j in range(len(self.fields)):
        segment_list[i-start, j, :] = patient.getPatientData(self.fields[j], i)
    """
      
    sequence = patient.segments[start:end, :, :]

    return (sequence, patient.getTargetRange(start, end))

In [None]:
train_dataset = TimeseriesDataset(patients)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 64, shuffle = False)

for i, d in enumerate(train_loader):
    print(i, d[0].shape, d[1].shape)

loading data for patient: 00182


In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Set initial hidden and cell states 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

model = RNN(input_size=8, hidden_size=128, num_layers=4, num_classes=3).to(device)

In [None]:
learning_rate = 0.001
num_epochs = 10
sequence_length = 60
input_size = 8

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
"""
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

"""