In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
import os, os.path

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [None]:

if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")


In [None]:
#device = torch.device("cpu")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
patients = ['01709','00182','01575']
#, '01828',  '01870', '01842', '01844', '01097', '01676', '01849', '01808', '00501', '00172', '01763', '00502']


In [None]:
class Patient():
  def __init__(self, id):
    self.fields = ["BVP", "EDA", "HR", "TEMP","Mag", "x", "y", "z"]
    self.id = id
    self.length = self.getNumberOfSegments(id)
    self.targets = self.getPatientTarget()
    self.segments = np.zeros((self.length, 8, 200))
    #self.loadSegments()

  def __len__(self):
    return self.length

  def getNumberOfSegments(self, patient_id):
    #get number of segments for a given patient
    path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/segments/x/'.format(patient_id)
    length = 0
    try:
      length = len([name for name in os.listdir(path)])
    except:
      print("missing data for patient: {}".format(patient_id))
    return length

  def getPatientData(self, field, number):
    path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/segments/{}/segment{}.parquet'.format(self.id, field, number)

    return pd.read_parquet(path, engine="auto")[field].to_numpy()

  def getPatientTarget(self):
    path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/MSEL_{}_labels_split.csv'.format(self.id, self.id)

    df = pd.read_csv(path)
    df = df.drop(columns=["name", "study_start_time", "study_duration", "label_start_time", "label_duration", "label_note"])

    return df["label"].to_numpy()

  def loadSegments(self):
    print("loading data for patient: {}".format(self.id))
    for i in range(self.length):
      for j in range(len(self.fields)):
        self.segments[i, j, :] = self.getPatientData(self.fields[j], i)

  def getTarget(self, number):
    return self.targets[number]

  def getTargetRange(self, i, j):
    return self.targets[i:j]

In [None]:
class Patients():
  def __init__(self, patient_ids, seq_len=10, seq_step=10):
    self.patients = {}
    self.seq_len = seq_len
    self.seq_step = seq_step
    
    # patient_ids: list of patients by ids
    self.patient_ids = self.filterPatients(patient_ids)
    self.patient_ids.sort()

    for patient_id in self.patient_ids:
      self.patients[patient_id] = Patient(patient_id)

    self.length = self.getNumberOfSegmentsForAllPatients(self.patients)


    self.patient_lengths = [self.patients[patient_id].length - (self.seq_len-1) for patient_id in self.patient_ids]

    self.patient_accumulated_lengths = [0]
    for patient_length in self.patient_lengths:
      self.patient_accumulated_lengths.append(patient_length + self.patient_accumulated_lengths[-1])

    print(self.patient_accumulated_lengths)

  

  def __len__(self):
    return self.length

  def getNumberOfSegmentsForAllPatients(self, patients):
    length = 0

    for id in patients:
      length += self.getPatient(id).length

    return length

  def getPatient(self, id):
    return self.patients[id]

  def getNumberOfPatients(self):
    return len(self.patients)

  def filterPatients(self, patients):
    filtered_list = []

    for patient_id in patients:
      path = '/content/drive/My Drive/epi_data_folders/MSEL_{}/segments/x/segment59.parquet'.format(patient_id)
      if os.path.isfile(path):
        filtered_list.append(patient_id)

    return filtered_list

  def getPatientIndex(self, value):
    #returns the patient index

    value = value * self.seq_step

    for i in range(1, len(self.patient_accumulated_lengths)):
      if value < self.patient_accumulated_lengths[i]:
        return i - 1

    print("Something weird might be happening. Value: {}".format(value))
    return -1



In [None]:
class TimeseriesDataset(torch.utils.data.Dataset):   
  def __init__(self, patients, seq_len=10, seq_step=10):
    self.patients = Patients(patients, seq_len, seq_step)
    self.seq_len = seq_len
    self.seq_step = seq_step
    self.fields = ["BVP", "EDA", "HR", "TEMP","Mag", "x", "y", "z"]

  def __len__(self):
    return (self.patients.length - (self.seq_len-1) * self.patients.getNumberOfPatients()) // self.seq_step

  def __getitem__(self, relative_index):
    patient_index = self.patients.getPatientIndex(relative_index)
    patient_accumulated = self.patients.patient_accumulated_lengths[patient_index]

    start = relative_index * self.seq_step - patient_accumulated
    end = start + self.seq_len

    patient_id = self.patients.patient_ids[patient_index]
    patient = self.patients.getPatient(patient_id)
    
    sequence = patient.segments[start:end, :, :]
    sequence.reshape(-1, )

    return (sequence, patient.getTargetRange(start, end))

4 patients

[500, 100, 700, 1000]

[500, 600, 1300, 2300]

total_length = 2300

In [None]:
train_dataset = TimeseriesDataset(patients, seq_len=10, seq_step=10)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 64, shuffle = False)


"""
batch_size: 4
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

[[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [4, 5, 6, 7]]
"""

for i, d in enumerate(train_loader):
    #d[0] is x. shape: (64, 60, 8, 7679) = (batch_size, segment_size, fields, # rows in a segment)
    #d[1] is y. shape: (64, 60) (batch size, segment size)
    #TO-DO: do one hot encoding for y
    print(i, d[0].shape, d[1].shape)

[0, 1297, 3469, 7218]
0 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
1 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
2 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
3 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
4 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
5 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
6 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
7 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
8 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
9 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
10 torch.Size([64, 10, 8, 200]) torch.Size([64, 10])
11 torch.Size([17, 10, 8, 200]) torch.Size([17, 10])


In [None]:
#CNN model
#creating model

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # model = Sequential()
        # model.add(Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(30, 5)))
        # model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
        # model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
        # model.add(MaxPooling1D(pool_size=2))
        # model.add(Flatten())
        # model.add(Dense(100, activation='relu'))
        # model.add(Dense(1))
        # model.compile(optimizer='adam', loss='mse')
        # # fit model
        # model.fit(Xtrain, Ytrain, epochs=200, verbose=0)

        self.net = nn.Sequential()
        #activation map of size Bx3x32x32
        #activation map of size Bx8x32
        self.net.add_module('cv1', nn.Conv1d(in_channels=8, out_channels=32, kernel_size=2, stride=1, padding=0, dilation=1))
        #activation map of size Bx32x30x30
        #activation map of size Bx32x30
        self.net.add_module('rl1', nn.ReLU())
        #activation map of size Bx32x30x30
        # self.net.add_module('mp1', nn.MaxPool1d(kernel_size=2, stride=None, padding=0, dilation=1))
        #activation map of size Bx32x15x15
        self.net.add_module('cv2', nn.Conv1d(in_channels=32, out_channels=64, kernel_size=2, stride=1, padding=0, dilation=1))
        #activation map of size Bx64x13x13
        self.net.add_module('rl2', nn.ReLU())
        #activation map of size Bx64x13x13
        self.net.add_module('mp2', nn.MaxPool1d(kernel_size=2, stride=None, padding=0, dilation=1))
        #input size Bx64x6x6
        self.net.add_module('fl1', nn.Flatten())
        # self.net.add_module('fc1', nn.Linear(in_features=2304, out_features=128))
        self.net.add_module('fc1', nn.Linear(in_features=384, out_features=128))
        self.net.add_module('rl3', nn.ReLU())
        self.net.add_module('fc1', nn.Linear(in_features=128, out_features=1))
        # self.net.add_module('dp2', nn.Dropout(p=0.5))
        # self.net.add_module('fc2', nn.Linear(in_features=128, out_features=10))

    def forward(self, x):
        return self.net(x)
    
model = Net().to(device)

# Let's define an optimizer

optimizer = optim.Adam(model.parameters(), lr=0.001)

# Let's define a Loss function

lossfun = nn.CrossEntropyLoss()  # Use nn.NLLLoss with Logsoftmax

In [None]:
def train(model, train_loader, epochs):
    model.train()

    # Define train epochs

    epoch_loss = []
    epoch_accu = []
    for epoch in range(epochs):

        # iterate through train dataset

        for batch_idx, (data, target) in enumerate(train_loader):

            data, target = data.to(device), target.to(device)

            data = data.float()

            # get output
            output = model(data)

            # compute loss function
            loss = lossfun(output, target)

            # backward pass
            optimizer.zero_grad()
            loss.backward()

            # run optimizer 
            optimizer.step()

            # bookkeeping
            accuracy = (output.argmax(-1) == target).float().mean()
            epoch_loss.append(loss.item())
            epoch_accu.append(accuracy.item())

            if batch_idx % 50 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.2f}'.format(
                    epoch+1, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item(), accuracy.item()))

        print('Train Epoch: {}\tAverage Loss: {:.6f}\tAverage Accuracy: {:.2f}'.format(
            epoch+1, sum(epoch_loss)/len(epoch_loss), sum(epoch_accu)/len(epoch_accu)))
            
    # save network
    torch.save(model.state_dict(), "epi_cnn.pt")
    
    return epoch_loss, epoch_accu

epoch_loss, epoch_accu = train(model, train_loader, epochs=10)

NameError: ignored

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Set initial hidden and cell states 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

model = RNN(input_size=8, hidden_size=128, num_layers=4, num_classes=3).to(device)

In [None]:
learning_rate = 0.001
num_epochs = 10
sequence_length = 60
input_size = 8

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (sequence, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
"""
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

"""

RuntimeError: ignored