In [93]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline 
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
from torch.autograd import Variable

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
    
class DecoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.lstm_1 = nn.LSTM(self.input_size, self.hidden_size)
        self.lstm_2 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_3 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_4 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_5 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_6 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_7 = nn.LSTM(self.hidden_size, self.hidden_size)
        
        self.out = nn.Linear(self.hidden_size, self.output_size)
          
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, input, ch_1, ch_2, ch_3, ch_4, ch_5, ch_6, ch_7):
        (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5) = ch_1, ch_2, ch_3, ch_4, ch_5
        (hidden_6, cell_6), (hidden_7, cell_7) = ch_6, ch_7
        
        
        output, (hidden_1, cell_1) = self.lstm_1(input.view(1,1,-1).float(), (hidden_1, cell_1))
        output_1 = output
        
        output, (hidden_2, cell_2) = self.lstm_2(output, (hidden_2, cell_2))
        output_2 = output
        
        output, (hidden_3, cell_3) = self.lstm_3(output + output_1, (hidden_3, cell_3)) # skip_connection 1
        output_3 = output
        
        output, (hidden_4, cell_4) = self.lstm_4(output + output_2, (hidden_4, cell_4)) # skip_connection 2
        output_4 = output
        
        output, (hidden_5, cell_5) = self.lstm_5(output + output_3, (hidden_5, cell_5)) # skip_connection 3
        output_5 = output
        
        output, (hidden_6, cell_6) = self.lstm_6(output + output_4, (hidden_6, cell_6)) # skip_connection 4
        
        output, (hidden_7, cell_7) = self.lstm_7(output + output_5, (hidden_7, cell_7)) # skip_connection 5
        
        output = self.out(output[0])
        output = self.softmax(output)
        return output, (hidden_1, cell_1),(hidden_2, cell_2),(hidden_3, cell_3),(hidden_4, cell_4),(hidden_5, cell_5),(hidden_6, cell_6),(hidden_7, cell_7)
    
    def init_hidden(self):
        return torch.rand((1, 1, self.hidden_size), device=device)/100
    
    def init_cell(self):
        return torch.rand((1, 1, self.hidden_size), device=device)/100
    
    
print(torch.cuda.is_available())



""" 
NOTE: 
Encoder RNN input of size (Sentence_length * input_feature)
Encoder RNN output of size (1 * 1 * hidden_size)  should be (num_sentences * 1 * hidden_size)

Decoder RNN input of size 0 (scalar value)
Decoder RNN output of size (1 * target_num)   should be (num_sentences * target_num)
"""

True


' \nNOTE: \nEncoder RNN input of size (Sentence_length * input_feature)\nEncoder RNN output of size (1 * 1 * hidden_size)  should be (num_sentences * 1 * hidden_size)\n\nDecoder RNN input of size 0 (scalar value)\nDecoder RNN output of size (1 * target_num)   should be (num_sentences * target_num)\n'

In [121]:
import pickle

# load data from file
with open("/home/yiqin/2018summer_project/DeepMusic/pitch_data.pkl", "rb") as f:
    dic = pickle.load(f)
    train_X = dic["X"]
    train_Y = dic["Y"]
    #time_X = dic["time"]
    
for i in range(train_Y.shape[0]):
    train_Y[i] = torch.from_numpy((train_Y[i] == 4).astype(int)).float()
    
    

In [122]:
"""def input_transform(train_x, time_x, i):
    output = torch.from_numpy(np.array([train_x[i], time_x[i]]))
    return output.transpose(1, 0).to(device)
"""

def input_transform(train_x, i):
    output = torch.from_numpy(np.array([train_x[i][:,1] - train_x[i][:,0], train_x[i][:,2]]))
    return output.squeeze(0).transpose(1,0).to(device)
    
    
def input_factorize(train_x):
    output = []
    for i in range(train_x.shape[0]):
        for item in np.array_split(train_x[i], train_x[i].shape[0] / 30):
            output.append(item)
    return output


def target_factorize(train_y):
    output = []
    for i in range(train_y.shape[0]):
        for item in np.array_split(train_y[i].numpy(), train_y[i].shape[0] / 30):
            output.append(torch.Tensor(item))
    return output

def target_transform(train_y):
    output = torch.zeros((1, 2))
    output[0, int(train_y)] = 1
    return output.unsqueeze(1).to(device)


train_X = input_factorize(train_X)
#time_X = input_factorize(time_X)
target_Tensor = target_factorize(train_Y)

maximum_target = len(target_Tensor)


In [None]:
    print()

In [96]:
def focal_loss(gamma, rescale, criterion, output, target):
    if int(target) == 1:
        p_negative = (1 - output[0,1])**gamma
        loss = rescale * p_negative * criterion(output, target.unsqueeze(0).long())
    else:
        p_negative = (1 - output[0,0])**gamma
        loss = p_negative * criterion(output, target.unsqueeze(0).long())
    return loss

In [97]:
import random
teacher_forcing_ratio = 1


def train(input_tensor, target_tensor, decoder, decoder_optimizer, criterion, verbose = False):
    decoder_optimizer.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    
    hidden_1 = decoder.init_hidden()
    hidden_2 = decoder.init_hidden()
    hidden_3 = decoder.init_hidden()
    hidden_4 = decoder.init_hidden()
    hidden_5 = decoder.init_hidden()
    hidden_6 = decoder.init_hidden()
    hidden_7 = decoder.init_hidden()
    cell_1 = decoder.init_cell()
    cell_2 = decoder.init_cell()
    cell_3 = decoder.init_cell()
    cell_4 = decoder.init_cell()
    cell_5 = decoder.init_cell()
    cell_6 = decoder.init_cell()
    cell_7 = decoder.init_cell()
    
    loss = 0
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    temp = []
    temp_score = []
    
    decoder_input = input_tensor[0]
    
    if use_teacher_forcing:
        for di in range(0, target_length):
            decoder_output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7) = decoder(decoder_input, 
                            (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7))
            if verbose:
                temp.append(int(torch.argmax(decoder_output, dim = 1).cpu().numpy()))
                temp_score.append(decoder_output)
                #print("decoder_output:", decoder_output)
                
            #print(input_tensor[di])
            #temp = float(loss)
            loss += focal_loss(2, 5, criterion,decoder_output, target_tensor[di].squeeze())
            if verbose:
                print(decoder_output, target_tensor[di])
                print(focal_loss(2, 5, criterion,decoder_output, target_tensor[di].squeeze()))
            #loss += criterion(decoder_output, target_tensor[di].long())
            if di + 1 < target_length:
                decoder_input = input_tensor[di + 1]
                
    else:
        for di in range(1, input_length):
            decoder_output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5) = decoder(decoder_input, 
                            (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5))
            if verbose:
                temp.append(int(torch.argmax(decoder_output, dim = 1).cpu().numpy()))
            loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
            
            #print(loss)
            decoder_input = decoder_output

    loss.backward()
    """if verbose:
        for name, item in decoder.named_parameters():
            if item.requires_grad:
                print(name)
                print(item.shape)
                print(item.grad)
"""
    if verbose:
        print("Prediction :", temp) 
        print("Score :", temp_score)
        print("Target:", target_tensor.squeeze()) 
    

    decoder_optimizer.step()

    return loss.item() / target_length

In [98]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [125]:
def trainIters(decoder, n_iters, print_every = 1000, plot_every = 100, learning_rate = 0.01, CEL_weight=[1,5], total_batch = maximum_target):    
    start = time.time()
    
    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0
    
    decoder_optimizer = optim.Adagrad(decoder.parameters(), lr = learning_rate)
    
    criterion = nn.CrossEntropyLoss(weight = torch.Tensor(CEL_weight).to(device))
    
    
    for iter in range(1, n_iters + 1):
        num = iter % total_batch
        verbose = (iter % print_every == 0)
        input_tensor = input_transform(train_X, num - 1).to(device)
        target_tensor = target_Tensor[num - 1].to(device)
        input_tensor = Variable(input_tensor, requires_grad = True)
        #print(input_tensor.shape, target_tensor.shape)
        if input_tensor.shape[0] != target_tensor.shape[0]:
            continue
        
        loss = train(input_tensor, target_tensor, decoder, 
                     decoder_optimizer, criterion, verbose = verbose)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [None]:
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)


input_size = 2
hidden_size = 256
output_size = 2

decoder = DecoderRNN(input_size, hidden_size, output_size).to(device)

trainIters(decoder, 10000, print_every=100, learning_rate=1e-2)
    

tensor([[ 0.5832,  0.4168]], device='cuda:0') tensor([ 1.], device='cuda:0')
tensor(1.3264, device='cuda:0')
tensor([[ 0.5744,  0.4256]], device='cuda:0') tensor([ 0.], device='cuda:0')
tensor(0.1126, device='cuda:0')
tensor([[ 0.5567,  0.4433]], device='cuda:0') tensor([ 0.], device='cuda:0')
tensor(0.1254, device='cuda:0')
tensor([[ 0.5503,  0.4497]], device='cuda:0') tensor([ 0.], device='cuda:0')
tensor(0.1303, device='cuda:0')
tensor([[ 0.5478,  0.4522]], device='cuda:0') tensor([ 0.], device='cuda:0')
tensor(0.1322, device='cuda:0')
tensor([[ 0.5468,  0.4532]], device='cuda:0') tensor([ 0.], device='cuda:0')
tensor(0.1330, device='cuda:0')
tensor([[ 0.5464,  0.4536]], device='cuda:0') tensor([ 0.], device='cuda:0')
tensor(0.1333, device='cuda:0')
tensor([[ 0.5462,  0.4538]], device='cuda:0') tensor([ 0.], device='cuda:0')
tensor(0.1334, device='cuda:0')
tensor([[ 0.5461,  0.4539]], device='cuda:0') tensor([ 0.], device='cuda:0')
tensor(0.1335, device='cuda:0')
tensor([[ 0.5461,  

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5437,  0.4563]], device='cuda:0'), tensor([[ 0.5659,  0.4341]], device='cuda:0'), tensor([[ 0.5574,  0.4426]], device='cuda:0'), tensor([[ 0.5543,  0.4457]], device='cuda:0'), tensor([[ 0.5533,  0.4467]], device='cuda:0'), tensor([[ 0.5529,  0.4471]], device='cuda:0'), tensor([[ 0.5528,  0.4472]], device='cuda:0'), tensor([[ 0.5528,  0.4472]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5527,  0.4473]], device

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5276,  0.4724]], device='cuda:0'), tensor([[ 0.5509,  0.4491]], device='cuda:0'), tensor([[ 0.5487,  0.4513]], device='cuda:0'), tensor([[ 0.5446,  0.4554]], device='cuda:0'), tensor([[ 0.5412,  0.4588]], device='cuda:0'), tensor([[ 0.5390,  0.4610]], device='cuda:0'), tensor([[ 0.5377,  0.4623]], device='cuda:0'), tensor([[ 0.5369,  0.4631]], device='cuda:0'), tensor([[ 0.5365,  0.4635]], device='cuda:0'), tensor([[ 0.5362,  0.4638]], device='cuda:0'), tensor([[ 0.5360,  0.4640]], device='cuda:0'), tensor([[ 0.5359,  0.4641]], device='cuda:0'), tensor([[ 0.5358,  0.4642]], device='cuda:0'), tensor([[ 0.5358,  0.4642]], device='cuda:0'), tensor([[ 0.5358,  0.4642]], device='cuda:0'), tensor([[ 0.5357,  0.4643]], device='cuda:0'), tensor([[ 0.5357,  0.4643]], device='cuda:0'), tensor([[ 0.5357,  0.4643]], device='cuda:0'), tensor([[ 0.5357,  0.4643]], device='cuda

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5438,  0.4562]], device='cuda:0'), tensor([[ 0.5674,  0.4326]], device='cuda:0'), tensor([[ 0.5684,  0.4316]], device='cuda:0'), tensor([[ 0.5665,  0.4335]], device='cuda:0'), tensor([[ 0.5643,  0.4357]], device='cuda:0'), tensor([[ 0.5623,  0.4377]], device='cuda:0'), tensor([[ 0.5607,  0.4393]], device='cuda:0'), tensor([[ 0.5595,  0.4405]], device='cuda:0'), tensor([[ 0.5585,  0.4415]], device='cuda:0'), tensor([[ 0.5578,  0.4422]], device='cuda:0'), tensor([[ 0.5572,  0.4428]], device='cuda:0'), tensor([[ 0.5568,  0.4432]], device='cuda:0'), tensor([[ 0.5565,  0.4435]], device='cuda:0'), tensor([[ 0.5563,  0.4437]], device='cuda:0'), tensor([[ 0.5561,  0.4439]], device='cuda:0'), tensor([[ 0.5560,  0.4440]], device='cuda:0'), tensor([[ 0.5558,  0.4442]], device='cuda:0'), tensor([[ 0.5558,  0.4442]], device='cuda:0'), tensor([[ 0.5557,  0.4443]], dev

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5308,  0.4692]], device='cuda:0'), tensor([[ 0.5381,  0.4619]], device='cuda:0'), tensor([[ 0.5336,  0.4664]], device='cuda:0'), tensor([[ 0.5300,  0.4700]], device='cuda:0'), tensor([[ 0.5276,  0.4724]], device='cuda:0'), tensor([[ 0.5261,  0.4739]], device='cuda:0'), tensor([[ 0.5253,  0.4747]], device='cuda:0'), tensor([[ 0.5248,  0.4752]], device='cuda:0'), tensor([[ 0.5245,  0.4755]], device='cuda:0'), tensor([[ 0.5243,  0.4757]], device='cuda:0'), tensor([[ 0.5242,  0.4758]], device='cuda:0'), tensor([[ 0.5241,  0.4759]], device='cuda:0'), tensor([[ 0.5241,  0.4759]], device='cuda:0'), tensor([[ 0.5241,  0.4759]], device='cuda:0'), tensor([[ 0.5241,  0.4759]], device='cuda:0'), tensor([[ 0.5240,  0.4760]], device='cuda:0'), tensor([[ 0.5240,  0.4760]], device='cuda:0'), tensor([[ 0.5240,  0.4760]], device='cuda:0'), tensor([[ 0.5240,  0.4760]], device='cuda

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5481,  0.4519]], device='cuda:0'), tensor([[ 0.5617,  0.4383]], device='cuda:0'), tensor([[ 0.5583,  0.4417]], device='cuda:0'), tensor([[ 0.5550,  0.4450]], device='cuda:0'), tensor([[ 0.5524,  0.4476]], device='cuda:0'), tensor([[ 0.5505,  0.4495]], device='cuda:0'), tensor([[ 0.5491,  0.4509]], device='cuda:0'), tensor([[ 0.5482,  0.4518]], device='cuda:0'), tensor([[ 0.5475,  0.4525]], device='cuda:0'), tensor([[ 0.5471,  0.4529]], device='cuda:0'), tensor([[ 0.5468,  0.4532]], device='cuda:0'), tensor([[ 0.5466,  0.4534]], device='cuda:0'), tensor([[ 0.5464,  0.4536]], device='cuda:0'), tensor([[ 0.5463,  0.4537]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='c

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5454,  0.4546]], device='cuda:0'), tensor([[ 0.5633,  0.4367]], device='cuda:0'), tensor([[ 0.5609,  0.4391]], device='cuda:0'), tensor([[ 0.5577,  0.4423]], device='cuda:0'), tensor([[ 0.5552,  0.4448]], device='cuda:0'), tensor([[ 0.5535,  0.4465]], device='cuda:0'), tensor([[ 0.5524,  0.4476]], device='cuda:0'), tensor([[ 0.5518,  0.4482]], device='cuda:0'), tensor([[ 0.5514,  0.4486]], device='cuda:0'), tensor([[ 0.5512,  0.4488]], device='cuda:0'), tensor([[ 0.5510,  0.4490]], device='cuda:0'), tensor([[ 0.5509,  0.4491]], device='cuda:0'), tensor([[ 0.5509,  0.4491]], device='cuda:0'), tensor([[ 0.5509,  0.4491]], device='cuda:0'), tensor([[ 0.5508,  0.4492]], device='cuda:0'), tensor([[ 0.5508,  0.4492]], device='cuda:0'), tensor([[ 0.5508,  0.4492]], device='cuda:0'), tensor([[ 0.5508,  0.4492]], device='cuda:0'), tensor([[ 0.5508,  0.4492]], device='c

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5302,  0.4698]], device='cuda:0'), tensor([[ 0.5640,  0.4360]], device='cuda:0'), tensor([[ 0.5692,  0.4308]], device='cuda:0'), tensor([[ 0.5688,  0.4312]], device='cuda:0'), tensor([[ 0.5674,  0.4326]], device='cuda:0'), tensor([[ 0.5661,  0.4339]], device='cuda:0'), tensor([[ 0.5650,  0.4350]], device='cuda:0'), tensor([[ 0.5640,  0.4360]], device='cuda:0'), tensor([[ 0.5633,  0.4367]], device='cuda:0'), tensor([[ 0.5627,  0.4373]], device='cuda:0'), tensor([[ 0.5622,  0.4378]], device='cuda:0'), tensor([[ 0.5618,  0.4382]], device='cuda:0'), tensor([[ 0.5615,  0.4385]], device='cuda:0'), tensor([[ 0.5612,  0.4388]], device='cuda:0'), tensor([[ 0.5610,  0.4390]], device='cuda:0'), tensor([[ 0.5609,  0.4391]], device='cuda:0'), tensor([[ 0.5607,  0.4393]], device='cuda:0'), tensor([[ 0.5606,  0.4394]], device='cuda:0'), tensor([[ 0.5605,  0.4395]], device

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5305,  0.4695]], device='cuda:0'), tensor([[ 0.5531,  0.4469]], device='cuda:0'), tensor([[ 0.5547,  0.4453]], device='cuda:0'), tensor([[ 0.5529,  0.4471]], device='cuda:0'), tensor([[ 0.5509,  0.4491]], device='cuda:0'), tensor([[ 0.5493,  0.4507]], device='cuda:0'), tensor([[ 0.5480,  0.4520]], device='cuda:0'), tensor([[ 0.5471,  0.4529]], device='cuda:0'), tensor([[ 0.5465,  0.4535]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5457,  0.4543]], device='cuda:0'), tensor([[ 0.5455,  0.4545]], device='cuda:0'), tensor([[ 0.5454,  0.4546]], device='cuda:0'), tensor([[ 0.5453,  0.4547]], device='cuda:0'), tensor([[ 0.5452,  0.4548]], device='cuda:0'), tensor([[ 0.5451,  0.4549]], device='cuda:0'), tensor([[ 0.5451,  0.4549]], device='cuda:0'), tensor([[ 0.5451,  0.4549]], device='cuda:0'), tensor([[ 0.5451,  0.4549]], device

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5458,  0.4542]], device='cuda:0'), tensor([[ 0.5794,  0.4206]], device='cuda:0'), tensor([[ 0.5866,  0.4134]], device='cuda:0'), tensor([[ 0.5874,  0.4126]], device='cuda:0'), tensor([[ 0.5869,  0.4131]], device='cuda:0'), tensor([[ 0.5862,  0.4138]], device='cuda:0'), tensor([[ 0.5857,  0.4143]], device='cuda:0'), tensor([[ 0.5853,  0.4147]], device='cuda:0'), tensor([[ 0.5850,  0.4150]], device='cuda:0'), tensor([[ 0.5847,  0.4153]], device='cuda:0'), tensor([[ 0.5846,  0.4154]], device='cuda:0'), tensor([[ 0.5844,  0.4156]], device='cuda:0'), tensor([[ 0.5843,  0.4157]], device='cuda:0'), tensor([[ 0.5843,  0.4157]], device='cuda:0'), tensor([[ 0.5842,  0.4158]], device='cuda:0'), tensor([[ 0.5842,  0.4158]], device='cuda:0'), tensor([[ 0.5842,  0.4158]], device='cuda:0'), tensor([[ 0.5841,  0.4159]], device='cuda:0'), tensor([[ 0.5841,  0.4159]], device='c

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5578,  0.4422]], device='cuda:0'), tensor([[ 0.5726,  0.4274]], device='cuda:0'), tensor([[ 0.5719,  0.4281]], device='cuda:0'), tensor([[ 0.5704,  0.4296]], device='cuda:0'), tensor([[ 0.5690,  0.4310]], device='cuda:0'), tensor([[ 0.5681,  0.4319]], device='cuda:0'), tensor([[ 0.5675,  0.4325]], device='cuda:0'), tensor([[ 0.5671,  0.4329]], device='cuda:0'), tensor([[ 0.5669,  0.4331]], device='cuda:0'), tensor([[ 0.5667,  0.4333]], device='cuda:0'), tensor([[ 0.5667,  0.4333]], device='cuda:0'), tensor([[ 0.5666,  0.4334]], device='cuda:0'), tensor([[ 0.5666,  0.4334]], device='cuda:0'), tensor([[ 0.5666,  0.4334]], device='cuda:0'), tensor([[ 0.5665,  0.4335]], device='cuda:0'), tensor([[ 0.5665,  0.4335]], device='cuda:0'), tensor([[ 0.5665,  0.4335]], device='cuda:0'), tensor([[ 0.5665,  0.4335]], device='cuda:0'), tensor([[ 0.5665,  0.4335]], device='c

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5483,  0.4517]], device='cuda:0'), tensor([[ 0.5525,  0.4475]], device='cuda:0'), tensor([[ 0.5460,  0.4540]], device='cuda:0'), tensor([[ 0.5417,  0.4583]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5381,  0.4619]], device='cuda:0'), tensor([[ 0.5375,  0.4625]], device='cuda:0'), tensor([[ 0.5372,  0.4628]], device='cuda:0'), tensor([[ 0.5371,  0.4629]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='c

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5650,  0.4350]], device='cuda:0'), tensor([[ 0.5601,  0.4399]], device='cuda:0'), tensor([[ 0.5533,  0.4467]], device='cuda:0'), tensor([[ 0.5496,  0.4504]], device='cuda:0'), tensor([[ 0.5477,  0.4523]], device='cuda:0'), tensor([[ 0.5469,  0.4531]], device='cuda:0'), tensor([[ 0.5465,  0.4535]], device='cuda:0'), tensor([[ 0.5463,  0.4537]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='c

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5449,  0.4551]], device='cuda:0'), tensor([[ 0.5495,  0.4505]], device='cuda:0'), tensor([[ 0.5464,  0.4536]], device='cuda:0'), tensor([[ 0.5435,  0.4565]], device='cuda:0'), tensor([[ 0.5415,  0.4585]], device='cuda:0'), tensor([[ 0.5404,  0.4596]], device='cuda:0'), tensor([[ 0.5399,  0.4601]], device='cuda:0'), tensor([[ 0.5396,  0.4604]], device='cuda:0'), tensor([[ 0.5394,  0.4606]], device='cuda:0'), tensor([[ 0.5394,  0.4606]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device

tensor([[ 0.5409,  0.4591]], device='cuda:0') tensor([ 0.], device='cuda:0')
tensor(0.1377, device='cuda:0')
tensor([[ 0.5409,  0.4591]], device='cuda:0') tensor([ 1.], device='cuda:0')
tensor(1.0750, device='cuda:0')
Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5499,  0.4501]], device='cuda:0'), tensor([[ 0.5472,  0.4528]], device='cuda:0'), tensor([[ 0.5443,  0.4557]], device='cuda:0'), tensor([[ 0.5424,  0.4576]], device='cuda:0'), tensor([[ 0.5416,  0.4584]], device='cuda:0'), tensor([[ 0.5412,  0.4588]], device='cuda:0'), tensor([[ 0.5410,  0.4590]], device='cuda:0'), tensor([[ 0.5410,  0.4590]], device='cuda:0'), tensor([[ 0.5409,  0.4591]], device='cuda:0'), tensor([[ 0.5409,  0.4591]], device='cuda:0'), tensor([[ 0.5409,  0.4591]], device='cuda:0'), tensor([[ 0.5409,  0.4591]], device='cuda:0'), tensor([[ 0.5409,  0.4591]], device='cuda:0'), tensor([[ 0.5409,  0.4591]], device='cuda:0'), te

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5527,  0.4473]], device='cuda:0'), tensor([[ 0.5598,  0.4402]], device='cuda:0'), tensor([[ 0.5557,  0.4443]], device='cuda:0'), tensor([[ 0.5525,  0.4475]], device='cuda:0'), tensor([[ 0.5501,  0.4499]], device='cuda:0'), tensor([[ 0.5487,  0.4513]], device='cuda:0'), tensor([[ 0.5478,  0.4522]], device='cuda:0'), tensor([[ 0.5473,  0.4527]], device='cuda:0'), tensor([[ 0.5470,  0.4530]], device='cuda:0'), tensor([[ 0.5468,  0.4532]], device='cuda:0'), tensor([[ 0.5467,  0.4533]], device='cuda:0'), tensor([[ 0.5467,  0.4533]], device='cuda:0'), tensor([[ 0.5466,  0.4534]], device='cuda:0'), tensor([[ 0.5466,  0.4534]], device='cuda:0'), tensor([[ 0.5466,  0.4534]], device='cuda:0'), tensor([[ 0.5466,  0.4534]], device='cuda:0'), tensor([[ 0.5466,  0.4534]], device='cuda:0'), tensor([[ 0.5466,  0.4534]], device='cuda:0'), tensor([[ 0.5466,  0.4534]], device

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5774,  0.4226]], device='cuda:0'), tensor([[ 0.5669,  0.4331]], device='cuda:0'), tensor([[ 0.5546,  0.4454]], device='cuda:0'), tensor([[ 0.5494,  0.4506]], device='cuda:0'), tensor([[ 0.5473,  0.4527]], device='cuda:0'), tensor([[ 0.5465,  0.4535]], device='cuda:0'), tensor([[ 0.5462,  0.4538]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda:0'), tensor([[ 0.5461,  0.4539]], device='cuda

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5634,  0.4366]], device='cuda:0'), tensor([[ 0.5590,  0.4410]], device='cuda:0'), tensor([[ 0.5464,  0.4536]], device='cuda:0'), tensor([[ 0.5417,  0.4583]], device='cuda:0'), tensor([[ 0.5401,  0.4599]], device='cuda:0'), tensor([[ 0.5395,  0.4605]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5393,  0.4607]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='cuda:0'), tensor([[ 0.5392,  0.4608]], device='c

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5837,  0.4163]], device='cuda:0'), tensor([[ 0.5995,  0.4005]], device='cuda:0'), tensor([[ 0.5810,  0.4190]], device='cuda:0'), tensor([[ 0.5720,  0.4280]], device='cuda:0'), tensor([[ 0.5683,  0.4317]], device='cuda:0'), tensor([[ 0.5668,  0.4332]], device='cuda:0'), tensor([[ 0.5662,  0.4338]], device='cuda:0'), tensor([[ 0.5660,  0.4340]], device='cuda:0'), tensor([[ 0.5659,  0.4341]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device='cuda:0'), tensor([[ 0.5658,  0.4342]], device

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5340,  0.4660]], device='cuda:0'), tensor([[ 0.5519,  0.4481]], device='cuda:0'), tensor([[ 0.5481,  0.4519]], device='cuda:0'), tensor([[ 0.5437,  0.4563]], device='cuda:0'), tensor([[ 0.5406,  0.4594]], device='cuda:0'), tensor([[ 0.5388,  0.4612]], device='cuda:0'), tensor([[ 0.5378,  0.4622]], device='cuda:0'), tensor([[ 0.5373,  0.4627]], device='cuda:0'), tensor([[ 0.5370,  0.4630]], device='cuda:0'), tensor([[ 0.5368,  0.4632]], device='cuda:0'), tensor([[ 0.5367,  0.4633]], device='cuda:0'), tensor([[ 0.5367,  0.4633]], device='cuda:0'), tensor([[ 0.5366,  0.4634]], device='cuda:0'), tensor([[ 0.5366,  0.4634]], device='cuda:0'), tensor([[ 0.5366,  0.4634]], device='cuda:0'), tensor([[ 0.5366,  0.4634]], device='cuda:0'), tensor([[ 0.5366,  0.4634]], device='cuda:0'), tensor([[ 0.5366,  0.4634]], device='cuda:0'), tensor([[ 0.5366,  0.4634]], device='cuda

Prediction : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5300,  0.4700]], device='cuda:0'), tensor([[ 0.5386,  0.4614]], device='cuda:0'), tensor([[ 0.5355,  0.4645]], device='cuda:0'), tensor([[ 0.5333,  0.4667]], device='cuda:0'), tensor([[ 0.5323,  0.4677]], device='cuda:0'), tensor([[ 0.5319,  0.4681]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda:0'), tensor([[ 0.5317,  0.4683]], device='cuda

In [55]:
print(decoder.lstm_1.state_dict().keys())
print(decoder.lstm_1.state_dict()['weight_ih_l0'].grad)

odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0'])
None


In [68]:
def evaluate(decoder, test_X):
    input_length = input_tensor.size(0)
    
    loss = 0
        
    hidden_1 = decoder.init_hidden()
    hidden_2 = decoder.init_hidden()
    hidden_3 = decoder.init_hidden()
    hidden_4 = decoder.init_hidden()
    hidden_5 = decoder.init_hidden()
    hidden_6 = decoder.init_hidden()
    hidden_7 = decoder.init_hidden()
    cell_1 = decoder.init_cell()
    cell_2 = decoder.init_cell()
    cell_3 = decoder.init_cell()
    cell_4 = decoder.init_cell()
    cell_5 = decoder.init_cell()
    cell_6 = decoder.init_cell()
    cell_7 = decoder.init_cell()
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    temp = []
    temp_score = []
    
    decoder_input = input_tensor[0]
    
    for di in range(0, input_length):
        decoder_output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7) = decoder(decoder_input, 
                        (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7))
        temp.append(int(torch.argmax(decoder_output, dim = 1).cpu().numpy()))
        temp_score.append(decoder_output)
        if di + 1 < input_length:
            decoder_input = input_tensor[di+1]

    return temp

In [71]:
for i in range(5):
    target_tensor = target_Tensor[i-1].to(device)
    input_tensor = input_transform(train_X, i-1).to(device)
    print(evaluate(decoder, input_tensor))
    print(target_tensor.squeeze())

[1, 0, 0, 0, 0, 0, 1]
tensor([ 1.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
[0, 0, 0, 0, 0, 0, 0, 0]
tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], device='cuda:0')
[0, 0, 0, 1, 0, 0, 0, 0]
tensor([ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.], device='cuda:0')
[0, 0, 0, 0, 0, 0, 0, 1]
tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
[0, 0, 0, 0, 0, 0, 0, 0]
tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], device='cuda:0')
