### Libraries

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= "1"

from tools import*
from models import*
from tqdm import tqdm_notebook as tqdm
from torch.autograd import Variable, Function
from sklearn.metrics import mean_absolute_error
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence

import time
import torch
import math
import numpy as np
import torchvision
import pandas as pd
import seaborn as sns
import torch.nn as nn
import nibabel as nib
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.utils.data as data_utils
import torch.optim.lr_scheduler as schd  
import torchvision.transforms as transforms

torch.backends.cudnn.benchmark=True 
%matplotlib inline

### Input Variable

In [27]:
batch = 32
task = "classification"
iteration = 50

### Training Data

In [3]:
path = "/home/emy24/tadpole/data/"
xTrain, xVal, xTest, _, _, _ = preprocess(path,elim_sparse_feature=True, cutoff=0.60)
xTrain, xVal, xTest = make_list(path, xTrain, xVal, xTest)

# xTrain.to_pickle("/home/emy24/tadpole/data/xTrain_pd_60")
# xVal.to_pickle("/home/emy24/tadpole/data/xVal_pd_60")
# xTest.to_pickle("/home/emy24/tadpole/data/xTest_pd_60")

# xTrain = pd.read_pickle("/home/emy24/tadpole/data/xTrain_pd_60")
# xVal = pd.read_pickle("/home/emy24/tadpole/data/xVal_pd_60")
# xTest = pd.read_pickle("/home/emy24/tadpole/data/xTest_pd_60")
# xTrain, xVal, xTest = make_list(path, xTrain, xVal, xTest)

xTrain = get_matrix(xTrain)
xTrain, xTrain_seq= get_pad(xTrain)

xVal = get_matrix(xVal)
xVal, xVal_seq= get_pad(xVal)

xTest = get_matrix(xTest)
xTest, xTest_seq= get_pad(xTest)

"""Load Target"""

#yTrain
datapath = "/home/emy24/tadpole/data/TADPOLE_TargetData_train.csv"
resampled_train, orig_dates_train = resample_data(datapath, interpolation = True)
adas, ventricle, mmse = get_average(resampled_train)
yTrain = get_standarize(resampled_train, adas, ventricle, mmse)
yTrain = get_features(yTrain, task)
yTrain = [yTrain for _, yTrain in yTrain.groupby("PTID_Key")]
yTrain = get_matrix(yTrain)
yTrain, yTrain_seq= get_pad(yTrain)

#yVal
datapath = "/home/emy24/tadpole/data/TADPOLE_TargetData_test.csv"
resampled_val, orig_dates_val = resample_data(datapath, interpolation = True)
yVal = get_standarize(resampled_val, adas, ventricle, mmse)
yVal = get_features(yVal, task)
yVal = [yVal for _, yVal in yVal.groupby("PTID_Key")]
yVal = get_matrix(yVal)
yVal, yVal_seq= get_pad(yVal)

#yTest
datapath = "/home/emy24/tadpole/data/TADPOLE_PredictTargetData_valid.csv"
resampled_test, orig_dates_test = resample_data(datapath, interpolation = True)
resampled_test = resampled_test.fillna(0)
yTest = get_standarize(resampled_test, adas, ventricle, mmse)
yTest = get_features(yTest, task)
yTest = [yTest for _, yTest in yTest.groupby("PTID_Key")]
yTest = get_matrix(yTest)
yTest, yTest_seq= get_pad(yTest)


### Model

In [4]:
def masked_loss(predicted, target, sequence, time_step):
    """
    sequence: is the total numbers of visit from a patient
    time_step: is the current visit
    ----
    ind: indicates whether we have information about the patient at this time step
    k: number of elements in M that we actually care/have information. 
    loss: (1/k)*sum(ind*M)
    Note:
    squeeze at the beggining is to make sure to have a nxm matrix instead of a 3d tensor
    the unsqueze thing in loss is just to make the matrix have the correct format for bmm function
    """
    predicted = predicted.squeeze()
    target = target.squeeze()
    ind = ((time_step < np.asarray(sequence))*1).reshape(1,-1)
    num_features = predicted.size(1)
    k = np.sum(ind*num_features)
    ind = Variable(torch.from_numpy(ind).cuda(), requires_grad = False).float()
    M = torch.abs(predicted - target)
    loss = torch.mul(torch.sum(torch.mm(ind, M)),(1/k))
    return loss

def get_accuracy(output, y_batch, t):
    _, pred = torch.max(output.data, 1)
    pred = pred.cpu().numpy()
    label = y_batch[:,:,:]
    label = label[:,t,-1]
    correct = np.sum(pred==label)
    
    return 100*correct/label.shape[0]

def more_visit(time_step,sequence):
    return (np.sum((time_step < np.asarray(y_sequence))*1) > 0)

"""GRU"""
class GRU_Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, batch):
        super(GRU_Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch = batch
        self.gru = nn.GRU(input_size = input_size, hidden_size = hidden_size,
                            batch_first=True, num_layers = num_layers)
    
    def forward(self, x, seq):
        # Set initial states
        h0 = Variable(torch.randn(self.num_layers, self.batch, self.hidden_size).float().cuda())
        
        # Forward propagate RNN
        x = pack_padded_sequence(x, seq, batch_first=True)
        out, hidden = self.gru(x)  
        return out, hidden
    

class GRU_Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, batch, num_classes):
        super(GRU_Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.batch = batch
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(in_features= self.hidden_size, out_features= self.hidden_size),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features= self.hidden_size, out_features= self.hidden_size),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features= self.hidden_size, out_features= 3))
 
    def forward(self, x, h):    
        # Forward propagate RNN
        _, hidden = self.gru(x, h)
        out = hidden[-1] #Grab the last output of the sequence
        out = self.fc(out)  
        return hidden, out

### Training

In [28]:
encoder = GRU_Encoder(input_size = 431 , hidden_size = 100, num_layers= 3, batch = batch )
encoder = torch.nn.DataParallel(encoder)
encoder.cuda()

decoder = GRU_Decoder(input_size = 3 , hidden_size = 100, num_layers= 3, batch= batch, num_classes = 3)
decoder = torch.nn.DataParallel(decoder)
decoder.cuda()

# Loss and Optimizer
enc_opt = optim.Adam(encoder.parameters())
dec_opt = optim.Adam(decoder.parameters())

In [29]:
bag_test = []
bag_val = []
for b in tqdm(range(iteration)):
    teacher_forcing = True
    loss_store = []
    for epoch in tqdm(range(600)):  
        # Zero the parameter gradients
        enc_opt.zero_grad()
        dec_opt.zero_grad()    

        running_loss = 0.0
        correct = 0
        total = 0

        encoder.train()
        decoder.train()

        #Mini-batch
        x_batch, y_batch, x_sequence, y_sequence = make_batch(batch, xTrain, yTrain, xTrain_seq, yTrain_seq)
        x_batch = Variable(torch.from_numpy(x_batch[:,:,:-1]).float().cuda()) # last column is the ID, we dont need it for training

        # Encoder
        hidden = encoder(x_batch,x_sequence)
        hidden = hidden[-1]

        # Decoder
        if teacher_forcing:
            loss = Variable(torch.zeros(1).cuda())
            max_seq_length = yTrain.shape[1] # Maximum visits sequence
            input = Variable(torch.zeros(batch,1,3).float().cuda()) 
            for t in range(max_seq_length): #go through each visit sequentially
                if more_visit(t,y_sequence):
                    y = y_batch[:,:,1:-1]
                    y = y[:,t,:]
                    y = np.ma.expand_dims(y,1)
                    y = Variable(torch.from_numpy(y).cuda()).float()
                    hidden, output = decoder(input, h=hidden)
                    loss += torch.mul(masked_loss(predicted=output, target=y, sequence=y_sequence, time_step=t),
                                     (1/np.max(y_sequence)))
                    input = y
                    # input to the next sequence is the correct one

            loss.backward()
            loss_store += [loss.data.cpu().numpy()]
            enc_opt.step()
            dec_opt.step()
            teacher_forcing = False

        else:
            loss = Variable(torch.zeros(1).cuda())
            max_seq_length = yTrain.shape[1] # Maximum visits sequence
            input = Variable(torch.zeros(batch,1,3).float().cuda()) 
            for t in range(max_seq_length): #go through each visit sequentially
                if more_visit(t,y_sequence):
                    y = y_batch[:,:,1:-1]
                    y = y[:,t,:]
                    y = np.ma.expand_dims(y,1)
                    y = Variable(torch.from_numpy(y).cuda()).float()
                    hidden, output = decoder(input, h=hidden)
                    loss += torch.mul(masked_loss(predicted=output, target=y, sequence=y_sequence, time_step=t),
                                     (1/np.max(y_sequence)))
                    input = output.unsqueeze(1)
                    # output of this time step becomes the input of the next
            loss.backward()
            loss_store += [loss.data.cpu().numpy()]
            enc_opt.step()
            dec_opt.step()

            teacher_forcing = True

    """Val Prediction"""

    encoder.eval()
    decoder.eval()

    x_input = xVal
    x_seq_input = xVal_seq
    y_input = yVal
    y_seq_input = yVal_seq

    all_subject_prediction = []
    for s in range(x_input.shape[0]):
        #Encoder
        x = torch.from_numpy(x_input[s])[:,:-1]
        x = Variable(x.unsqueeze(0).float(), volatile = True)
        x_seq  = x_seq_input[s]
        hidden = encoder(x, [x_seq])
        hidden = hidden[-1]

        #Decoder
        y_seq = y_seq_input[s]
        input = Variable(torch.zeros(1,1,3).float().cuda(), volatile = True)
        individual_pred = []
        for t in range(y_seq):
            y = y_input[s,:,1:-1]
            y = np.ma.expand_dims(y,0)
            y = y[:,t,:]
            y = np.ma.expand_dims(y,1)
            y = Variable(torch.from_numpy(y).cuda(), volatile = True).float()
            hidden, output = decoder(input, h=hidden)
            input = output.unsqueeze(1)

            _, pred = torch.max(output.data, 1)
            pred = pred.cpu().numpy()        
            individual_pred += [pred]


        all_subject_prediction += [np.asarray(individual_pred).flatten()]
    all_subject_prediction = np.hstack(all_subject_prediction) 
    bag_val += [all_subject_prediction]
    
    """Test Prediction"""

    encoder.eval()
    decoder.eval()

    x_input = xTest
    x_seq_input = xTest_seq
    y_input = yTest
    y_seq_input = yTest_seq

    all_subject_prediction = []
    for s in range(x_input.shape[0]):
        #Encoder
        x = torch.from_numpy(x_input[s])[:,:-1]
        x = Variable(x.unsqueeze(0).float(), volatile = True)
        x_seq  = x_seq_input[s]
        hidden = encoder(x, [x_seq])
        hidden = hidden[-1]

        #Decoder
        y_seq = y_seq_input[s]
        input = Variable(torch.zeros(1,1,3).float().cuda(), volatile = True)
        individual_pred = []
        for t in range(y_seq):
            y = y_input[s,:,1:-1]
            y = np.ma.expand_dims(y,0)
            y = y[:,t,:]
            y = np.ma.expand_dims(y,1)
            y = Variable(torch.from_numpy(y).cuda(), volatile = True).float()
            hidden, output = decoder(input, h=hidden)
            input = output.unsqueeze(1)

            _, pred = torch.max(output.data, 1)
            pred = pred.cpu().numpy()        
            individual_pred += [pred]


        all_subject_prediction += [np.asarray(individual_pred).flatten()]
    all_subject_prediction = np.hstack(all_subject_prediction)
    bag_test += [all_subject_prediction]




### Bagging Statistic

In [30]:
avg_out= np.round(np.sum(np.vstack(bag_val), axis=0)/iteration)


x_input = xVal
x_seq_input = xVal_seq
y_input = yVal
y_seq_input = yVal_seq

"""Target"""

all_subject_target= []
for s in range(x_input.shape[0]):
    y_seq = y_seq_input[s]
    individual_target = []
    for t in range(y_seq):
        y = y_input[s,:,1:-1]
        target = np.argmax(y[t,:])       
        individual_target += [target]
        
    all_subject_target += [np.asarray(individual_target).flatten()]
all_subject_target = np.hstack(all_subject_target)


accuracy = np.sum(avg_out == all_subject_target)/all_subject_target.shape[0]
print(accuracy)

0.781938325991


In [40]:
avg_out_val= np.round(np.sum(np.vstack(bag_val), axis=0)/iteration)
avg_out_val = avg_out_val.reshape(-1,1)
out_val = pd.DataFrame(avg_out_val)
out_val.iloc[:,0] =  out_val.iloc[:,0].astype('category')
out_val = pd.get_dummies(out_val)
out_val = out_val.as_matrix()
np.savetxt("/home/emy24/tadpole/data/val_class.csv", out_val ,delimiter=',')

In [42]:
avg_out_test= np.round(np.sum(np.vstack(bag_test), axis=0)/iteration)
avg_out_test = avg_out_test.reshape(-1,1)
out_test = pd.DataFrame(avg_out_test)
out_test.iloc[:,0] =  out_test.iloc[:,0].astype('category')
out_test = pd.get_dummies(out_test)
out_test = out_test.as_matrix()
np.savetxt("/home/emy24/tadpole/data/test_class.csv", out_test ,delimiter=',')