In [1]:
RunningInCOLAB = 'google.colab' in str(get_ipython())
if RunningInCOLAB:
    !git clone https://github.com/MJC598/Neuron_Burst_Analysis.git

In [2]:
%matplotlib notebook
import torch.nn as nn
import numpy as np
import torch
torch.manual_seed(0)

from torch.utils.data import TensorDataset, DataLoader
import scipy.io
import random
import pandas as pds
import time

from scipy import stats
from sklearn.metrics import r2_score
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

### Class Explanations

These are 3 regression RNN-based models. In order to change it to a classifier the 
nn.Linear layers must have their second parameter changed to match the number of 
expected outputs.

* Expected Input Shape: (batch_size, time_sequence, features)

* Input_Size - number of features
* Hidden_Size - number of connections between the hidden layers
* Batch_Size - How many samples you want to push through the network before executing backprop
    (this is a hyperparameter that can change how fast or slow a model converges)
* Batch_First - Should always be set to True to keep input shape the same
* Dropout - Only really does anything with more than 1 layer on the LSTM, RNN, GRU. Useful to help generalize training

In [3]:
class baselineRNN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size=1,
                 batch_size=1,num_layers=1,batch_first=True,dropout=0.0):
        super(baselineRNN, self).__init__()
        self.rnn1 = nn.RNN(input_size=input_size,hidden_size=hidden_size,
                           num_layers=num_layers,batch_first=batch_first,dropout=dropout)
        self.lin = nn.Linear(hidden_size,output_size)
        self.h0 = torch.randn(num_layers, batch_size, hidden_size)

    def forward(self, x):
        x, h_n  = self.rnn1(x,self.h0)

        # take last cell output
        out = self.lin(x[:, -1, :])

        return out

class baselineLSTM(nn.Module):
    def __init__(self,input_size,hidden_size,output_size=1,
                 batch_size=1,num_layers=1,batch_first=True,dropout=0.0):
        super(baselineLSTM, self).__init__()
        self.rnn = nn.LSTM(input_size=input_size,hidden_size=hidden_size,
                           num_layers=num_layers,batch_first=batch_first,dropout=dropout)
        self.lin = nn.Linear(hidden_size,output_size)
        self.h0 = torch.randn(num_layers, batch_size, hidden_size)
        self.c0 = torch.randn(num_layers, batch_size, hidden_size)

    def forward(self, x):
        x, (h_n, c_n)  = self.rnn(x,(self.h0,self.c0))

        # take last cell output
        out = self.lin(x[:, -1, :])

        return out

class baselineGRU(nn.Module):
    def __init__(self,input_size,hidden_size,output_size=1,
                 batch_size=1,num_layers=1,batch_first=True,dropout=0.0):
        super(baselineGRU, self).__init__()
        self.rnn = nn.GRU(input_size=input_size,hidden_size=hidden_size,
                          num_layers=num_layers,batch_first=batch_first,dropout=dropout)
        self.lin = nn.Linear(hidden_size,output_size)
        self.h0 = torch.randn(num_layers, batch_size, hidden_size)

    def forward(self, x):
        # print(self.h0.shape)
        x, h_n  = self.rnn(x,self.h0)

        # take last cell output
        out = self.lin(x[:, -1, :])

        return out
    
class conv1DLSTM(nn.Module):
    def __init__(self,input_size,hidden_size,output_size=1,
                 batch_size=1,num_layers=1,batch_first=True,dropout=0.0):
        super(conv1DLSTM, self).__init__()
        self.c1 = nn.Conv1d(input_size, hidden_size, 3)
        self.p1 = nn.AvgPool1d(3)
        self.c2 = nn.Conv1d(hidden_size, hidden_size, 2)
        self.p2 = nn.AvgPool1d(2)
        self.c3 = nn.Conv1d(hidden_size, hidden_size, 1)
#         self.p1 = nn.AvgPool1d(2)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(p=0.2)
        self.rnn = nn.LSTM(input_size=hidden_size,hidden_size=hidden_size,
                           num_layers=num_layers,batch_first=batch_first,dropout=dropout)
        self.lin = nn.Linear(hidden_size,output_size)
        self.h0 = torch.randn(num_layers, batch_size, hidden_size)
        self.c0 = torch.randn(num_layers, batch_size, hidden_size)
        
    def forward(self, x):
        
        #switch (batch, sequence, feature) to (batch, feature, sequence)
        x = x.transpose(1,2)
        x = self.dropout(x)
        x = self.c1(x)
        x = self.p1(x)
#         x = self.tanh(x)
#         x = self.c2(x)
#         x = self.p2(x)
#         x = self.dropout(x)
        x = self.c3(x)
        x = self.sigmoid(x)
        
        #switch backwards
        x = x.transpose(1,2)
        x, (h_n, c_n)  = self.rnn(x,(self.h0,self.c0))

        # take last cell output
        out = self.lin(x[:, -1, :])

        return out
    
class conv1DGRU(nn.Module):
    def __init__(self,input_size,hidden_size,output_size=1,
                 batch_size=1,num_layers=1,batch_first=True,dropout=0.0):
        super(conv1DGRU, self).__init__()
        self.c1 = nn.Conv1d(input_size, hidden_size, 5)
        self.p1 = nn.AvgPool1d(5)
        self.c2 = nn.Conv1d(hidden_size, hidden_size, 3)
        self.p2 = nn.AvgPool1d(3)
        self.c3 = nn.Conv1d(hidden_size, hidden_size, 1)
#         self.p1 = nn.AvgPool1d(2)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(p=0.2)
        self.rnn = nn.GRU(input_size=hidden_size,hidden_size=hidden_size,
                           num_layers=num_layers,batch_first=batch_first,dropout=dropout)
        self.lin = nn.Linear(hidden_size,output_size)
        self.h0 = torch.randn(num_layers, batch_size, hidden_size)
        
    def forward(self, x):
        
        #switch (batch, sequence, feature) to (batch, feature, sequence)
        x = x.transpose(1,2)
        x = self.dropout(x)
        x = self.c1(x)
        x = self.p1(x)
        x = self.c2(x)
#         x = self.p2(x)
#         x = self.c3(x)
        x = self.sigmoid(x)
        
        #switch backwards
        x = x.transpose(1,2)
        x, h_n  = self.rnn(x,self.h0)

        # take last cell output
        out = self.lin(x[:, -1, :])

        return out

In [4]:
TIMESTEPS = 80
FRONT_TIME = -50
BACK_TIME = 5
T_START = 50+FRONT_TIME
T_END = 50+BACK_TIME
MODEL = conv1DLSTM
OUTPUT = 'full'
LOSS_FILE = ('losses/cycles/losses_' + str(MODEL) + 
             '_' + OUTPUT + '_' + str(FRONT_TIME) + 
             '_' + str(T_END) + '_fullin.csv')
PATH = ('models/cycles/' + str(MODEL) + 
        '_' + OUTPUT + '_' + str(FRONT_TIME) + 
        '_' + str(T_END) + '_fullin.pth')
DATA_PATH = 'data/cycles/cycle_separatePNITNv1.mat'
OLD_DATA_PATH = 'data/cycles/cycle_old_data.mat'
COLAB_PRE = 'Neuron_Burst_Analysis/'
if RunningInCOLAB:
    LOSS_FILE = COLAB_PRE + LOSS_FILE
    PATH = COLAB_PRE + PATH
    DATA_PATH = COLAB_PRE + DATA_PATH

# Specific Model Parameters
input_size = 8
hidden_size = 20
output_size = 1
batch_size = 32
num_layers = 1
batch_first = True
dropout = 0.0
epochs = 40

In [5]:
def get_data_from_mat(file_path, output_index=None, type='pre_pn'):
    data = scipy.io.loadmat(file_path)
    duration = []
    amp1 = []
    amp2 = []
    bur_len = []
    pn = []
    itn = []
    aff_pn = []
    aff_itn = []
    exc_pn = []
    inh_pn = []
    exc_itn = []
    inh_itn = []

#     print(data['info_collect'][0])
    for i in range(1, data['info_collect'].shape[0]):
        arr = data['info_collect'][i]
#         print(arr[5])
        duration.append(arr[0])
        amp1.append(arr[1])
        amp2.append(arr[2])
        bur_len.append(arr[3])
        pn.append(arr[4])
        itn.append(arr[5])
        aff_pn.append(arr[6])
        aff_itn.append(arr[7])
        exc_pn.append(arr[8])
        inh_pn.append(arr[9])
        exc_itn.append(arr[10])
        inh_itn.append(arr[11])
        
    full_labels = np.concatenate((amp1, amp2, duration, bur_len), axis=2)
    
    full_data = np.concatenate((pn, itn, aff_pn, aff_itn, exc_pn, inh_pn, exc_itn, inh_itn), axis=2)    
    
    for j in range(4):
        x = full_labels[:,:,j]
        full_labels[:,:,j] = (x - np.min(x))/(np.max(x)-np.min(x))
    
    for i in range(full_data.shape[0]):
        for j in range(input_size):
            x = full_data[i,:,j]
            full_data[i,:,j] = (x - np.min(x))/(np.max(x)-np.min(x))
    
    random.seed(10)
    data_samples = 24832 #24848
    k = 19808
    full = np.arange(data_samples)
    training_indices = np.random.choice(full, size=k, replace=False)
    validation_indices = np.delete(full,training_indices)
    
    training_data = full_data[training_indices,T_START:T_END,:] 
    validation_data = full_data[validation_indices,T_START:T_END,:]
    
    if output_index is None:
        training_labels = full_labels[training_indices,:,:] 
    else:
        training_labels = full_labels[training_indices,:,output_index]
    
    if output_index is None:
        validation_labels = full_labels[validation_indices,:,:]
    else:
        validation_labels = full_labels[validation_indices,:,output_index]
    
#     print(training_data.shape)
#     print(training_labels.shape)
#     print(validation_data.shape)
#     print(validation_labels.shape)

    training_dataset = TensorDataset(torch.Tensor(training_data), torch.Tensor(training_labels))
    validation_dataset = TensorDataset(torch.Tensor(validation_data), torch.Tensor(validation_labels))

    return training_dataset, validation_dataset
# get_data_from_mat(DATA_PATH)

### Training Method
* Model - Model initialized based on classes above
* Save_Filepath - Where you want to save the model to. Should end with a .pt or .pth extension. This is how you are able to load the model later for testing, etc.
* training_loader - dataloader iterable with training dataset samples
* validation_loader - dataloader iterable with validation dataset samples

In [6]:
def train_model(model,save_filepath,training_loader,validation_loader,epochs,count):
    
    epochs_list = []
    train_loss_list = []
    val_loss_list = []
    training_len = len(training_loader.dataset)
    validation_len = len(validation_loader.dataset)

    #splitting the dataloaders to generalize code
    data_loaders = {"train": training_loader, "val": validation_loader}

    """
    This is your optimizer. It can be changed but Adam is generally used. 
    Learning rate (alpha in gradient descent) is set to 0.001 but again 
    can easily be adjusted if you are getting issues

    Loss function is set to Mean Squared Error. If you switch to a classifier 
    I'd recommend switching the loss function to nn.CrossEntropyLoss(), but this 
    is also something that can be changed if you feel a better loss function would work
    """
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_func = nn.MSELoss()
#     loss_func = nn.L1Loss()
    decay_rate = 0.93 #decay the lr each step to 93% of previous lr
    lr_sch = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=decay_rate)

    total_start = time.time()

    """
    You can easily adjust the number of epochs trained here by changing the number in the range
    """
    for epoch in tqdm(range(epochs), position=0, leave=True):
        start = time.time()
        train_loss = 0.0
        val_loss = 0.0
        temp_loss = 100000000000000.0
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0
            for i, (x, y) in enumerate(data_loaders[phase]):  
                output = model(x)                       
                loss = loss_func(torch.squeeze(output), torch.squeeze(y))  
                #backprop             
                optimizer.zero_grad()           
                if phase == 'train':
                    loss.backward()
                    optimizer.step()                                      

                #calculating total loss
                running_loss += loss.item()
            
            if phase == 'train':
                train_loss = running_loss
                lr_sch.step()
            else:
                val_loss = running_loss

        end = time.time()
        # shows total loss
        if epoch%10 == 0:
            print('[%d, %5d] train loss: %.6f val loss: %.6f' % (epoch + 1, i + 1, train_loss, val_loss))
#         print(end - start)
        
        #saving best model
        if val_loss < temp_loss:
            torch.save(model, save_filepath)
            temp_loss = val_loss
        epochs_list.append(epoch)
        train_loss_list.append(train_loss)
        val_loss_list.append(val_loss)
    total_end = time.time()
#     print(total_end - total_start)
    #Creating loss csv
    loss_df = pds.DataFrame(
        {
            'epoch': epochs_list,
            'training loss': train_loss_list,
            'validation loss': val_loss_list
        }
    )
    # Writing loss csv, change path to whatever you want to name it
    lf = ('losses/losses_' + str(MODEL) + '_' 
          + OUTPUT + '_' + str(FRONT_TIME) + '_' 
          + str(T_END) + str(count) + '_fullin.csv')
    loss_df.to_csv(lf, index=None)
    return train_loss_list, val_loss_list

### R2 Scoring
* Model - same model as sent to train_model
* testing_dataloader - whichever dataloader you want to R2 Score

In [7]:
def r2_score_eval(model, testing_dataloader, title):
    output_list = []
    labels_list = []
    for i, (x, y) in enumerate(testing_dataloader):
        output = model(x) 
        output_list.append(np.transpose(output.detach().cpu().numpy()))
        labels_list.append(np.transpose(y.detach().cpu().numpy()))
    output_list = np.transpose(np.hstack(output_list))
    labels_list = np.transpose(np.hstack(labels_list)).reshape((-1,output_size))
#     print(output_list.shape)
#     print(labels_list.shape)
    print(str(title) + ': ' + str(r2_score(np.squeeze(labels_list), output_list)))
    return output_list, labels_list

### Program Start

In [8]:
model1 = MODEL(input_size,hidden_size,output_size,batch_size,num_layers,batch_first,dropout)
model2 = MODEL(input_size,hidden_size,output_size,batch_size,num_layers,batch_first,dropout)
model3 = MODEL(input_size,hidden_size,output_size,batch_size,num_layers,batch_first,dropout)

a1_training_dataset, a1_validation_dataset = get_data_from_mat(DATA_PATH, 0) #retrieve amps1
a2_training_dataset, a2_validation_dataset = get_data_from_mat(DATA_PATH, 1) #retrieve amps2
d_training_dataset, d_validation_dataset = get_data_from_mat(DATA_PATH, 2) #retrieve duration
# b_training_dataset, b_validation_dataset = get_data_from_mat(DATA_PATH, 3) #retrieve burst

# Turn datasets into iterable dataloaders
a1_training_loader = DataLoader(dataset=a1_training_dataset,batch_size=batch_size,shuffle=True)
a1_validation_loader = DataLoader(dataset=a1_validation_dataset,batch_size=batch_size)

a2_training_loader = DataLoader(dataset=a2_training_dataset,batch_size=batch_size,shuffle=True)
a2_validation_loader = DataLoader(dataset=a2_validation_dataset,batch_size=batch_size)

d_training_loader = DataLoader(dataset=d_training_dataset,batch_size=batch_size,shuffle=True)
d_validation_loader = DataLoader(dataset=d_validation_dataset,batch_size=batch_size)

# b_training_loader = DataLoader(dataset=b_training_dataset,batch_size=batch_size,shuffle=True)
# b_validation_loader = DataLoader(dataset=b_validation_dataset,batch_size=batch_size)

p1 = 'models/' + str(MODEL) + '_' + OUTPUT + '_' + str(FRONT_TIME) + '_' + str(T_END) + str(0) + '_fullin.pth'
a1_training_loss, a1_validation_loss = train_model(model1,p1,a1_training_loader,a1_validation_loader,epochs,0)
p2 = 'models/' + str(MODEL) + '_' + OUTPUT + '_' + str(FRONT_TIME) + '_' + str(T_END) + str(1) + '_fullin.pth'
a2_training_loss, a2_validation_loss = train_model(model2,p2,a2_training_loader,a2_validation_loader,epochs,1)
p3 = 'models/' + str(MODEL) + '_' + OUTPUT + '_' + str(FRONT_TIME) + '_' + str(T_END) + str(2) + '_fullin.pth'
d_training_loss, d_validation_loss = train_model(model3,p3,d_training_loader,d_validation_loader,epochs,2)
# p4 = 'models/' + str(MODEL) + '_' + OUTPUT + '_' + str(TIMESTEPS) + str(3) + '_fullin.pth'
# b_training_loss, b_validation_loss = train_model(model,p4,b_training_loader,b_validation_loader,epochs,3)


model1 = torch.load(p1)
model1.eval()
a1_t_output_list, a1_t_labels_list = r2_score_eval(model1, a1_training_loader, 'Amp1 Train')
a1_v_output_list, a1_v_labels_list = r2_score_eval(model1, a1_validation_loader, 'Amp1 Val')

model2 = torch.load(p2)
model2.eval()
a2_t_output_list, a2_t_labels_list = r2_score_eval(model2, a2_training_loader, 'Amp2 Train')
a2_v_output_list, a2_v_labels_list = r2_score_eval(model2, a2_validation_loader, 'Amp2 Val')

model3 = torch.load(p3)
model3.eval()
d_t_output_list, d_t_labels_list = r2_score_eval(model3, d_training_loader, 'Dur Train')
d_v_output_list, d_v_labels_list = r2_score_eval(model3, d_validation_loader, 'Dur Val')

# model = torch.load(p4)
# model.eval()
# b_t_output_list, b_t_labels_list = r2_score_eval(model, b_training_loader)
# b_v_output_list, b_v_labels_list = r2_score_eval(model, b_validation_loader)

  0%|          | 0/40 [00:00<?, ?it/s]

[1,   157] train loss: 14.026637 val loss: 2.276126
[11,   157] train loss: 8.971486 val loss: 1.804526
[21,   157] train loss: 8.699338 val loss: 1.847774
[31,   157] train loss: 8.503581 val loss: 1.758526


  0%|          | 0/40 [00:00<?, ?it/s]

[1,   157] train loss: 11.539812 val loss: 2.401503
[11,   157] train loss: 9.287747 val loss: 2.268298
[21,   157] train loss: 8.598092 val loss: 2.094455
[31,   157] train loss: 8.449679 val loss: 2.060053


  0%|          | 0/40 [00:00<?, ?it/s]

[1,   157] train loss: 5.255433 val loss: 1.163101
[11,   157] train loss: 4.378474 val loss: 1.071830
[21,   157] train loss: 3.492990 val loss: 0.853504
[31,   157] train loss: 3.431271 val loss: 0.838907
Amp1 Train: 0.6353244289528284
Amp1 Val: 0.6222241921390461
Amp2 Train: 0.21816017085199968
Amp2 Val: 0.20765901060744252
Dur Train: 0.28455850005708194
Dur Val: 0.2637371110295105


In [9]:
# t_output_list, t_labels_list = r2_score_eval(model, training_loader)
# v_output_list, v_labels_list = r2_score_eval(model, validation_loader)
fig, ax = plt.subplots(nrows=4, ncols=2)
fig.tight_layout()
ax[0,0].plot(range(epochs), a1_training_loss)
ax[0,0].set_title('Validation Loss')
ax[0,0].set_ylabel('Loss')
ax[0,0].set_xlabel('Epoch')

ax[0,1].plot(range(epochs), a1_validation_loss)
ax[0,1].set_title('Training Loss')
ax[0,1].set_ylabel('Loss')
ax[0,1].set_xlabel('Epoch')


ax[1,0].plot(np.arange(a1_v_labels_list.shape[0]), a1_v_labels_list[:,0], color='blue')
ax[1,0].plot(np.arange(a1_v_labels_list.shape[0]), a1_v_output_list[:,0], color='red')
ax[1,0].set_title('Validation Amps1 per Sample')
ax[1,0].set_ylabel('Amp1')
ax[1,0].set_xlabel('Sample')

ax[1,1].plot(np.arange(a1_t_labels_list.shape[0]), a1_t_labels_list[:,0], color='blue')
ax[1,1].plot(np.arange(a1_t_labels_list.shape[0]), a1_t_output_list[:,0], color='red')
ax[1,1].set_title('Training Amps1 per Sample')
ax[1,1].set_ylabel('Amp1')
ax[1,1].set_xlabel('Sample')

ax[2,0].plot(np.arange(a2_v_labels_list.shape[0]), a2_v_labels_list[:,0], color='blue')
ax[2,0].plot(np.arange(a2_v_labels_list.shape[0]), a2_v_output_list[:,0], color='red')
ax[2,0].set_title('Validation Amps2 per Sample')
ax[2,0].set_ylabel('Amp2')
ax[2,0].set_xlabel('Sample')

ax[2,1].plot(np.arange(a2_t_labels_list.shape[0]), a2_t_labels_list[:,0], color='blue')
ax[2,1].plot(np.arange(a2_t_labels_list.shape[0]), a2_t_output_list[:,0], color='red')
ax[2,1].set_title('Training Amps2 per Sample')
ax[2,1].set_ylabel('Amp2')
ax[2,1].set_xlabel('Sample')

ax[3,0].plot(np.arange(d_v_labels_list.shape[0]), d_v_labels_list[:,0], color='blue')
ax[3,0].plot(np.arange(d_v_labels_list.shape[0]), d_v_output_list[:,0], color='red')
ax[3,0].set_title('Validation Duration per Sample')
ax[3,0].set_ylabel('Duration')
ax[3,0].set_xlabel('Sample')

ax[3,1].plot(np.arange(d_t_labels_list.shape[0]), d_t_labels_list[:,0], color='blue')
ax[3,1].plot(np.arange(d_t_labels_list.shape[0]), d_t_output_list[:,0], color='red')
ax[3,1].set_title('Training Duration per Sample')
ax[3,1].set_ylabel('Duration')
ax[3,1].set_xlabel('Sample')

plt.show()

<IPython.core.display.Javascript object>