In [1]:

# coding: utf-8

# naive LSTM model trained with smooth data

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import numpy as np
from torch.autograd import Variable

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
class DecoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.lstm_1 = nn.LSTM(self.input_size, self.hidden_size)
        self.lstm_2 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_3 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_4 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_5 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_6 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_7 = nn.LSTM(self.hidden_size, self.hidden_size)
        
        self.out = nn.Linear(self.hidden_size, self.output_size)
          
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, input, ch_1, ch_2, ch_3, ch_4, ch_5, ch_6, ch_7):
        (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5) = ch_1, ch_2, ch_3, ch_4, ch_5
        (hidden_6, cell_6), (hidden_7, cell_7) = ch_6, ch_7
        
        
        output, (hidden_1, cell_1) = self.lstm_1(input.view(1,1,-1).float(), (hidden_1, cell_1))
        output_1 = output
        
        output, (hidden_2, cell_2) = self.lstm_2(output, (hidden_2, cell_2))
        output_2 = output
        
        output, (hidden_3, cell_3) = self.lstm_3(output + output_1, (hidden_3, cell_3)) # skip_connection 1
        output_3 = output
        
        output, (hidden_4, cell_4) = self.lstm_4(output + output_2, (hidden_4, cell_4)) # skip_connection 2
        output_4 = output
        
        output, (hidden_5, cell_5) = self.lstm_5(output + output_3, (hidden_5, cell_5)) # skip_connection 3
        output_5 = output
        
        output, (hidden_6, cell_6) = self.lstm_6(output + output_4, (hidden_6, cell_6)) # skip_connection 4
        
        output, (hidden_7, cell_7) = self.lstm_7(output + output_5, (hidden_7, cell_7)) # skip_connection 5
        
        output = self.out(output[0])
        #output = self.softmax(output)
        return output, (hidden_1, cell_1),(hidden_2, cell_2),(hidden_3, cell_3),(hidden_4, cell_4),(hidden_5, cell_5),(hidden_6, cell_6),(hidden_7, cell_7)
    
    def init_hidden(self):
        return torch.rand((1, 1, self.hidden_size), device=device)/100
    
    def init_cell(self):
        return torch.rand((1, 1, self.hidden_size), device=device)/100
    
    
print(torch.cuda.is_available())

True


In [3]:
import pickle

# load data from file

with open("/home/yiqin/2018summer_project/data/smooth_data.pkl", "rb") as f:
    dic = pickle.load(f)
    train_X = dic["X"]
    train_Y = dic["Y"]
    
    
    
target_Tensor = train_Y
maximum_target = len(train_Y)


In [10]:
def focal_loss(gamma, rescale, criterion, output, target):
    if int(target) == 1:
        p_negative = (1 - output[0,1])**gamma
        loss = rescale * p_negative * criterion(output, target.unsqueeze(0).long())
    else:
        p_negative = (1 - output[0,0])**gamma
        loss = p_negative * criterion(output, target.unsqueeze(0).long())
    return loss


def penalty_loss(penalty, criterion, output, target):
    if int(target) == 1:
        loss = penalty[0] * criterion(output, target)
    else:
        loss = penalty[1] * criterion(output, target)
    return loss

In [15]:
def train(input_tensor, target_tensor, decoder, decoder_optimizer, criterion, verbose = False, penalty = (3, 1)):
    decoder_optimizer.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    
    hidden_1 = decoder.init_hidden()
    hidden_2 = decoder.init_hidden()
    hidden_3 = decoder.init_hidden()
    hidden_4 = decoder.init_hidden()
    hidden_5 = decoder.init_hidden()
    hidden_6 = decoder.init_hidden()
    hidden_7 = decoder.init_hidden()
    cell_1 = decoder.init_cell()
    cell_2 = decoder.init_cell()
    cell_3 = decoder.init_cell()
    cell_4 = decoder.init_cell()
    cell_5 = decoder.init_cell()
    cell_6 = decoder.init_cell()
    cell_7 = decoder.init_cell()
    
    loss = 0
    
    
    temp = []
    temp_score = []
    
    decoder_input = input_tensor[0]
    
    for di in range(0, target_length):
        decoder_output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7) = decoder(decoder_input, 
                        (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7))
        if verbose:
            output = float(decoder_output.data.cpu().numpy())
            temp.append(str('%.4f'%output))
            #temp_score.append(decoder_output)

        loss += penalty_loss(penalty, criterion, decoder_output.squeeze(0), target_tensor[di].float())
        print(decoder_output.squeeze(0), target_tensor[di].float())
        print("loss:", penalty_loss(penalty, criterion, decoder_output.squeeze(0), target_tensor[di].float()))

        if di + 1 < target_length:
            decoder_input = input_tensor[di + 1]

    loss.backward()

    if verbose:
        print("Prediction :", temp) 
        print("Target:", target_tensor.squeeze())
        #print("Score :", temp_score)
        
    

    decoder_optimizer.step()

    return loss.item() / target_length

# In[63]:

import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))


# In[64]:

def trainIters(decoder, n_iters, print_every = 1000, plot_every = 100, learning_rate = 0.01, total_batch = maximum_target, gamma = 0.1):    
    start = time.time()
    
    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0
    
    decoder_optimizer = optim.SGD(decoder.parameters(), lr = learning_rate)
    
    criterion = nn.MSELoss()
    
    scheduler = optim.lr_scheduler.StepLR(decoder_optimizer, step_size = total_batch, gamma = gamma)
    
    
    for iter in range(1, n_iters + 1):
        num = iter % total_batch
        verbose = (iter % print_every == 0)
        input_tensor = train_X[num].to(device)
        target_tensor = target_Tensor[num].to(device)
        input_tensor = Variable(input_tensor, requires_grad = True)
        #print(input_tensor.shape, target_tensor.shape)
        if input_tensor.shape[0]<2:
            continue
        if input_tensor.shape[0] != target_tensor.shape[0]:
            continue
        
        loss = train(input_tensor, target_tensor, decoder, 
                     decoder_optimizer, criterion, verbose = verbose)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * print_every, print_loss_avg))
            torch.save(decoder.state_dict(), 'naive_lstm_train.pt')
        
        scheduler.step()
        

In [16]:
input_size = 2
hidden_size = 256
output_size = 1

decoder = DecoderRNN(input_size, hidden_size, output_size).to(device)

trainIters(decoder, 100000, print_every=1000, learning_rate=1e-2, gamma=0.2)



tensor(1.00000e-02 *
       [ 4.7733], device='cuda:0') tensor(0.5000, device='cuda:0')
loss: tensor(0.2045, device='cuda:0')
tensor(1.00000e-02 *
       [ 4.4124], device='cuda:0') tensor(0.2500, device='cuda:0')
loss: tensor(1.00000e-02 *
       4.2385, device='cuda:0')
tensor(1.00000e-02 *
       [ 4.2688], device='cuda:0') tensor(0.2500, device='cuda:0')
loss: tensor(1.00000e-02 *
       4.2978, device='cuda:0')
tensor(1.00000e-02 *
       [ 4.2194], device='cuda:0') tensor(0.5000, device='cuda:0')
loss: tensor(0.2096, device='cuda:0')
tensor(1.00000e-02 *
       [ 4.2021], device='cuda:0') tensor(1., device='cuda:0')
loss: tensor(2.7532, device='cuda:0')
tensor(1.00000e-02 *
       [ 4.1905], device='cuda:0') tensor(0.5000, device='cuda:0')
loss: tensor(0.2099, device='cuda:0')
tensor(1.00000e-02 *
       [ 4.1768], device='cuda:0') tensor(0.2500, device='cuda:0')
loss: tensor(1.00000e-02 *
       4.3360, device='cuda:0')
tensor(1.00000e-02 *
       [ 4.1611], device='cuda:0') ten

tensor([ 0.3359], device='cuda:0') tensor(0.5000, device='cuda:0')
loss: tensor(1.00000e-02 *
       2.6931, device='cuda:0')
tensor([ 0.3657], device='cuda:0') tensor(0.2500, device='cuda:0')
loss: tensor(1.00000e-02 *
       1.3391, device='cuda:0')
tensor([ 0.3893], device='cuda:0') tensor(0.1250, device='cuda:0')
loss: tensor(1.00000e-02 *
       6.9858, device='cuda:0')
tensor([ 0.4079], device='cuda:0') tensor(1.00000e-02 *
       6.2500, device='cuda:0')
loss: tensor(0.1193, device='cuda:0')
tensor([ 0.4224], device='cuda:0') tensor(1.00000e-02 *
       3.1250, device='cuda:0')
loss: tensor(0.1530, device='cuda:0')
tensor([ 0.4335], device='cuda:0') tensor(1.00000e-02 *
       1.5625, device='cuda:0')
loss: tensor(0.1746, device='cuda:0')
tensor([ 0.4418], device='cuda:0') tensor(1.00000e-02 *
       1.5625, device='cuda:0')
loss: tensor(0.1816, device='cuda:0')
tensor([ 0.4479], device='cuda:0') tensor(1.00000e-02 *
       3.1250, device='cuda:0')
loss: tensor(0.1736, device='c

tensor([ 0.2633], device='cuda:0') tensor(0.5000, device='cuda:0')
loss: tensor(1.00000e-02 *
       5.6031, device='cuda:0')
tensor([ 0.2842], device='cuda:0') tensor(0.2500, device='cuda:0')
loss: tensor(1.00000e-03 *
       1.1689, device='cuda:0')
tensor([ 0.3015], device='cuda:0') tensor(0.1250, device='cuda:0')
loss: tensor(1.00000e-02 *
       3.1165, device='cuda:0')
tensor([ 0.3157], device='cuda:0') tensor(1.00000e-02 *
       6.2500, device='cuda:0')
loss: tensor(1.00000e-02 *
       6.4105, device='cuda:0')
tensor([ 0.3269], device='cuda:0') tensor(1.00000e-02 *
       3.1250, device='cuda:0')
loss: tensor(1.00000e-02 *
       8.7435, device='cuda:0')
tensor([ 0.3357], device='cuda:0') tensor(1.00000e-02 *
       3.1250, device='cuda:0')
loss: tensor(1.00000e-02 *
       9.2685, device='cuda:0')
tensor([ 0.3423], device='cuda:0') tensor(1.00000e-02 *
       3.1250, device='cuda:0')
loss: tensor(1.00000e-02 *
       9.6763, device='cuda:0')
tensor([ 0.3472], device='cuda:0')

tensor([ 0.2562], device='cuda:0') tensor(0.5000, device='cuda:0')
loss: tensor(1.00000e-02 *
       5.9458, device='cuda:0')
tensor([ 0.2769], device='cuda:0') tensor(0.2500, device='cuda:0')
loss: tensor(1.00000e-04 *
       7.2435, device='cuda:0')
tensor([ 0.2947], device='cuda:0') tensor(0.1250, device='cuda:0')
loss: tensor(1.00000e-02 *
       2.8808, device='cuda:0')
tensor([ 0.3097], device='cuda:0') tensor(1.00000e-02 *
       6.2500, device='cuda:0')
loss: tensor(1.00000e-02 *
       6.1118, device='cuda:0')
tensor([ 0.3220], device='cuda:0') tensor(1.00000e-02 *
       3.1250, device='cuda:0')
loss: tensor(1.00000e-02 *
       8.4534, device='cuda:0')
tensor([ 0.3318], device='cuda:0') tensor(1.00000e-02 *
       1.5625, device='cuda:0')
loss: tensor(1.00000e-02 *
       9.9944, device='cuda:0')
tensor([ 0.3393], device='cuda:0') tensor(1.00000e-02 *
       1.5625, device='cuda:0')
loss: tensor(0.1048, device='cuda:0')
tensor([ 0.3451], device='cuda:0') tensor(1.00000e-02 *

KeyboardInterrupt: 