In [1]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline 
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
from torch.autograd import Variable

MAX_LENGTH = 865
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")


class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        
        #self.embedding = nn.Embedding(input_size, hidden_size)
        self.embedding = nn.Linear(self.input_size, self.hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
    
    def forward(self, input, hidden):
        output = self.embedding(input.float()).view(1, 1,-1)
        output, hidden = self.gru(output, hidden)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)
    
    
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.lstm_1 = nn.LSTM(self.output_size, self.hidden_size)
        self.lstm_2 = nn.LSTM(self.output_size, self.hidden_size)
        self.lstm_3 = nn.LSTM(self.output_size, self.hidden_size)
        self.lstm_4 = nn.LSTM(self.output_size, self.hidden_size)
        self.lstm_5 = nn.LSTM(self.output_size, self.hidden_size)
        
        self.out1 = nn.Linear(self.hidden_size, self.output_size)
        self.out2 = nn.Linear(self.hidden_size, self.output_size)
        self.out3 = nn.Linear(self.hidden_size, self.output_size)
        self.out4 = nn.Linear(self.hidden_size, self.output_size)
        self.out5 = nn.Linear(self.hidden_size, self.output_size)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, input, ch_1, ch_2, ch_3, ch_4, ch_5):
        (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5) = ch_1, ch_2, ch_3, ch_4, ch_5
    
        #output.register_hook(print)
        output, (hidden_1, cell_1) = self.lstm_1(input.view(1,1,-1).float(), (hidden_1, cell_1))
        #print(self.lstm_1.state_dict()['weight_ih_l0'])
        #output.register_hook(print)
        output = self.out1(output)
        output_1 = output
        output, (hidden_2, cell_2) = self.lstm_2(output, (hidden_2, cell_2))
        #output.register_hook(print)
        output = self.out2(output)
        output_2 = output
        output, (hidden_3, cell_3) = self.lstm_3(output + output_1, (hidden_3, cell_3)) # skip_connection 1
        #output.register_hook(print)
        output = self.out3(output)
        output_3 = output
        output, (hidden_4, cell_4) = self.lstm_4(output + output_2, (hidden_4, cell_4)) # skip_connection 2
        #output.register_hook(print)
        output = self.out4(output)
        output, (hidden_5, cell_5) = self.lstm_5(output + output_3, (hidden_5, cell_5)) # skip_connection 3
        #output.register_hook(print)
        output = self.out5(output[0])
        output = self.softmax(output)
        return output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5)
    
    def init_hidden(self):
        return torch.rand((1, 1, self.hidden_size), device=device)/100
    
    def init_cell(self):
        return torch.rand((1, 1, self.hidden_size), device=device)/100
    
    
print(torch.cuda.is_available())



""" 
NOTE: 
Encoder RNN input of size (Sentence_length * input_feature)
Encoder RNN output of size (1 * 1 * hidden_size)  should be (num_sentences * 1 * hidden_size)

Decoder RNN input of size 0 (scalar value)
Decoder RNN output of size (1 * target_num)   should be (num_sentences * target_num)
"""

True


' \nNOTE: \nEncoder RNN input of size (Sentence_length * input_feature)\nEncoder RNN output of size (1 * 1 * hidden_size)  should be (num_sentences * 1 * hidden_size)\n\nDecoder RNN input of size 0 (scalar value)\nDecoder RNN output of size (1 * target_num)   should be (num_sentences * target_num)\n'

In [2]:
import pickle

# load data from file
with open("pitch_data.pkl", "rb") as f:
    dic = pickle.load(f)
    train_X = dic["X"]
    train_Y = dic["Y"]
    time_X = dic["time"]
    
for i in range(train_Y.shape[0]):
    train_Y[i] = torch.from_numpy((train_Y[i] == 4).astype(int)).float()
    

In [3]:
def input_transform(train_x, time_x, i):
    output = torch.from_numpy(np.array([train_x[i], time_x[i]]))
    return output.transpose(1, 0).to(device)

def input_factorize(train_x):
    output = []
    for i in range(train_x.shape[0]):
        for item in np.array_split(train_x[i], train_x[i].shape[0] / 9):
            output.append(item)
    return output


def target_factorize(train_y):
    output = []
    for i in range(train_y.shape[0]):
        for item in np.array_split(train_y[i].numpy(), train_y[i].shape[0] / 9):
            output.append(torch.Tensor(item))
    return output

def target_transform(train_y):
    output = torch.zeros((1, 2))
    output[0, int(train_y)] = 1
    return output.unsqueeze(1).to(device)



train_X = input_factorize(train_X)
time_X = input_factorize(time_X)
target_Tensor = target_factorize(train_Y)


In [4]:
print(len(train_X))
print(target_Tensor[50000])

51954
tensor([ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.])


In [20]:
import random
teacher_forcing_ratio = 1


def train(input_tensor, target_tensor, decoder, decoder_optimizer, criterion, verbose = False):
    decoder_optimizer.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    
    loss = 0
        
    hidden_1 = decoder.init_hidden()
    hidden_2 = decoder.init_hidden()
    hidden_3 = decoder.init_hidden()
    hidden_4 = decoder.init_hidden()
    hidden_5 = decoder.init_hidden()
    cell_1 = decoder.init_cell()
    cell_2 = decoder.init_cell()
    cell_3 = decoder.init_cell()
    cell_4 = decoder.init_cell()
    cell_5 = decoder.init_cell()
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    temp = []
    temp_score = []
    
    decoder_input = input_tensor[0]
    
    if use_teacher_forcing:
        for di in range(1, target_length):
            decoder_output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5) = decoder(decoder_input, 
                            (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5))
            if verbose:
                temp.append(int(torch.argmax(decoder_output, dim = 1).cpu().numpy()))
                temp_score.append(decoder_output)
                #print("decoder_output:", decoder_output)
                
            #print(input_tensor[di])
            loss += criterion(decoder_output, target_tensor[di].unsqueeze(0).long())
            decoder_input = input_tensor[di]
            """
            if di == 0:
                print("decoder input shape:", decoder_input.shape)
                print("decoder output shape:", decoder_output.shape)
            """
    else:
        for di in range(1, input_length):
            decoder_output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5) = decoder(decoder_input, 
                            (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5))
            if verbose:
                temp.append(int(torch.argmax(decoder_output, dim = 1).cpu().numpy()))
            loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
            
            #print(loss)
            decoder_input = decoder_output

    loss.backward()
    if verbose:
        for name, item in decoder.named_parameters():
            if item.requires_grad:
                print(name)
                print(item.grad)
    
    #print(decoder.lstm_1.state_dict()['weight_ih_l0'].grad)
    #print(decoder.lstm_1.state_dict()['weight_hh_l0'].grad)
    if verbose:
        print("Prediction :", temp) 
        print("Score :", temp_score)
        print("Target:", target_tensor) 
    

    decoder_optimizer.step()

    return loss.item() / target_length

In [21]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [22]:
def trainIters(decoder, n_iters, print_every = 1000, plot_every = 100, learning_rate = 0.01, CEL_weight=[1,5], total_batch = 51954):    
    start = time.time()
    
    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0
    
    decoder_optimizer = optim.SGD(decoder.parameters(), lr = learning_rate)
    
    criterion = nn.CrossEntropyLoss(weight = torch.Tensor(CEL_weight).to(device))
    
    
    for iter in range(1, n_iters + 1):
        num = iter % total_batch
        verbose = (iter % print_every == 0)
        input_tensor = input_transform(train_X, time_X, num - 1).to(device)
        target_tensor = target_Tensor[num].to(device)
        input_tensor = Variable(input_tensor, requires_grad = True)
        #target_tensor = Variable(target_tensor, requires_grad = True)
        #print(input_tensor.shape)
        #print(target_tensor.shape)
        if input_tensor.shape[0] != target_tensor.shape[0]:
            continue
        
        loss = train(input_tensor, target_tensor, decoder, 
                     decoder_optimizer, criterion, verbose = verbose)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [23]:
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)


input_size = 2
hidden_size = 256
output_size = 2

decoder = DecoderRNN(hidden_size, output_size).to(device)

trainIters(decoder, 10000, print_every=100, learning_rate=1e-2, CEL_weight = [0.1,0.9], total_batch=1)
    

lstm_1.weight_ih_l0
tensor([[ 8.3250e-07,  7.1012e-09],
        [-1.6254e-05, -1.3750e-07],
        [-7.1983e-07, -2.6957e-09],
        ...,
        [-1.4333e-05, -1.6311e-07],
        [-1.3797e-06, -1.7401e-08],
        [-7.3392e-07, -9.9628e-09]], device='cuda:0')
lstm_1.weight_hh_l0
tensor([[-2.9980e-11,  8.6628e-10,  4.1271e-10,  ...,  1.9267e-09,
         -3.7470e-11, -2.4536e-11],
        [ 6.8653e-10, -1.8171e-08, -8.5776e-09,  ..., -4.0717e-08,
          7.9708e-10,  5.8661e-10],
        [ 7.4219e-12, -4.6283e-10, -1.6919e-10,  ..., -7.7438e-10,
          8.6862e-12, -1.4626e-11],
        ...,
        [ 7.7669e-10, -1.8933e-08, -9.7726e-09,  ..., -4.6173e-08,
          9.7713e-10,  9.2402e-10],
        [ 6.9977e-11, -1.7374e-09, -9.2068e-10,  ..., -4.3885e-09,
          9.0552e-11,  8.4834e-11],
        [ 3.4382e-11, -8.8687e-10, -4.9786e-10,  ..., -2.4118e-09,
          4.8087e-11,  4.5301e-11]], device='cuda:0')
lstm_1.bias_ih_l0
tensor([ 1.3069e-08, -2.5390e-07, -1.1364e-08,

tensor(1.00000e-06 *
       [[-0.2813,  0.9051, -1.7549,  1.8439, -1.6810, -2.6171, -0.1227,
          1.2750, -0.3542,  0.7366, -2.2785,  2.6677,  2.8295, -0.7182,
          1.0088,  0.7154, -0.0261, -0.1924,  1.6794, -2.5303, -0.3602,
          1.7992, -1.6552,  0.2755, -1.2356,  0.0654, -0.2344, -0.6126,
         -0.3645,  4.3313, -1.1940,  2.2573, -2.8796, -3.2893, -2.4690,
         -2.8407, -2.0511,  1.4415, -1.2501, -0.4703,  0.2969,  2.8633,
          1.6703, -1.9383, -1.7340,  2.3815, -0.5094,  0.6949, -2.8210,
         -1.3398,  3.8274,  2.9595,  4.1424,  3.5409, -0.2646, -1.4431,
          2.5319, -1.7090,  1.9098,  1.9567,  2.8489,  2.4399, -1.9466,
         -1.4002,  0.3324,  0.6712,  2.3256, -0.6129,  1.2984, -0.7899,
         -2.8888,  3.5624, -2.1405,  0.9942, -1.0054,  0.4721,  0.9236,
          0.3737, -2.7010, -1.8590, -2.9864,  3.2828, -3.7826,  0.5678,
          1.2340, -3.6150, -1.7383, -2.0949,  1.3745, -1.6217, -3.1281,
          3.8303, -2.6300,  0.9808, -1.8133

lstm_1.weight_ih_l0
tensor([[ 5.6944e-07,  4.5176e-09],
        [-9.7540e-06, -7.9639e-08],
        [-3.5931e-07, -1.3285e-09],
        ...,
        [-8.0239e-06, -9.0095e-08],
        [-8.5446e-07, -1.0492e-08],
        [-4.2036e-07, -5.6381e-09]], device='cuda:0')
lstm_1.weight_hh_l0
tensor([[-1.3546e-11,  5.5729e-10,  2.6350e-10,  ...,  1.1647e-09,
         -1.4637e-11, -1.8653e-11],
        [ 3.2327e-10, -1.0548e-08, -4.9590e-09,  ..., -2.2485e-08,
          3.3585e-10,  3.9572e-10],
        [-1.3063e-12, -2.2556e-10, -8.8881e-11,  ..., -3.6521e-10,
         -3.9509e-12, -1.4824e-12],
        ...,
        [ 3.9684e-10, -1.0490e-08, -5.3424e-09,  ..., -2.4332e-08,
          4.8286e-10,  5.3631e-10],
        [ 3.7645e-11, -1.0492e-09, -5.5026e-10,  ..., -2.5402e-09,
          4.6659e-11,  5.3999e-11],
        [ 1.6602e-11, -4.9406e-10, -2.7703e-10,  ..., -1.3001e-09,
          2.2720e-11,  2.7127e-11]], device='cuda:0')
lstm_1.bias_ih_l0
tensor([ 8.9644e-09, -1.5277e-07, -5.6784e-09,

tensor(1.00000e-05 *
       [-6.3228, -1.0002], device='cuda:0')
out3.weight
tensor(1.00000e-04 *
       [[ 0.0932, -0.1421, -0.0861,  0.0563, -0.1732,  0.1052, -0.0284,
          0.0749, -0.0089, -0.0211, -0.0951,  0.0364, -0.1049,  0.1216,
         -0.1424,  0.1447,  0.0192,  0.0439,  0.0745, -0.0631, -0.0548,
          0.1023,  0.0056,  0.0232, -0.1094,  0.1755,  0.0501, -0.0721,
         -0.0996, -0.1870, -0.1745, -0.0675,  0.0559, -0.0364, -0.0391,
         -0.0323,  0.1135,  0.0394,  0.0117, -0.1424,  0.0987, -0.0701,
          0.0719, -0.1841, -0.0991, -0.0634,  0.1287, -0.1185, -0.1539,
         -0.0338, -0.1429, -0.0665, -0.0091,  0.0273,  0.0100, -0.1291,
         -0.0914,  0.0609,  0.1621,  0.0666, -0.0735,  0.0690, -0.1234,
          0.0364, -0.0294, -0.0101, -0.1198, -0.0441,  0.0154,  0.1199,
         -0.0630, -0.0641, -0.1432,  0.0306,  0.2056, -0.1589, -0.2045,
          0.0766,  0.0828, -0.2324,  0.0155, -0.0623, -0.1402,  0.0428,
          0.0397, -0.1650,  0.1174, -0

lstm_1.weight_ih_l0
tensor([[ 4.3929e-07,  3.3358e-09],
        [-7.1640e-06, -5.6653e-08],
        [-2.4441e-07, -8.9583e-10],
        ...,
        [-5.7579e-06, -6.2795e-08],
        [-6.3423e-07, -7.5523e-09],
        [-2.9686e-07, -3.9208e-09]], device='cuda:0')
lstm_1.weight_hh_l0
tensor([[-4.7640e-12,  4.1725e-10,  1.8213e-10,  ...,  8.4100e-10,
         -1.2826e-11, -4.9978e-12],
        [ 1.6220e-10, -7.5857e-09, -3.3579e-09,  ..., -1.5722e-08,
          2.6744e-10,  1.6543e-10],
        [-4.9976e-12, -1.5168e-10, -4.9614e-11,  ..., -2.3442e-10,
         -3.3571e-13, -7.7704e-12],
        ...,
        [ 2.4805e-10, -7.4235e-09, -3.6490e-09,  ..., -1.6789e-08,
          3.4931e-10,  3.1931e-10],
        [ 2.2976e-11, -7.6490e-10, -3.8559e-10,  ..., -1.8098e-09,
          3.5245e-11,  3.1426e-11],
        [ 9.3269e-12, -3.4384e-10, -1.8618e-10,  ..., -8.8834e-10,
          1.6650e-11,  1.5134e-11]], device='cuda:0')
lstm_1.bias_ih_l0
tensor([ 6.9234e-09, -1.1234e-07, -3.8652e-09,

tensor([[ 5.6130e-06, -8.0732e-06, -4.8336e-06,  3.1649e-06, -9.9768e-06,
          6.2413e-06, -1.7224e-06,  4.1611e-06, -3.7978e-07, -1.1099e-06,
         -5.5540e-06,  2.0946e-06, -6.3565e-06,  7.3603e-06, -8.3760e-06,
          8.5830e-06,  1.1820e-06,  2.4974e-06,  4.5824e-06, -3.8736e-06,
         -3.4653e-06,  5.7800e-06,  4.8229e-07,  1.2929e-06, -6.3655e-06,
          1.0209e-05,  3.1029e-06, -4.2441e-06, -5.7901e-06, -1.0820e-05,
         -1.0147e-05, -3.7995e-06,  3.2285e-06, -2.2429e-06, -2.3582e-06,
         -1.8650e-06,  6.4798e-06,  2.2134e-06,  7.9836e-07, -8.2991e-06,
          5.6167e-06, -4.1557e-06,  4.2130e-06, -1.0631e-05, -5.6215e-06,
         -3.8539e-06,  7.6061e-06, -6.9404e-06, -9.0817e-06, -2.0240e-06,
         -8.3650e-06, -3.5997e-06, -5.5408e-07,  1.6274e-06,  6.8926e-07,
         -7.4093e-06, -5.1852e-06,  3.6871e-06,  9.5088e-06,  3.9078e-06,
         -4.3643e-06,  3.8178e-06, -7.1124e-06,  1.8700e-06, -1.8072e-06,
         -5.6954e-07, -6.9229e-06, -2.

lstm_1.weight_ih_l0
tensor([[ 3.6188e-07,  2.6562e-09],
        [-5.7213e-06, -4.4095e-08],
        [-1.8732e-07, -6.8276e-10],
        ...,
        [-4.5166e-06, -4.8174e-08],
        [-4.9961e-07, -5.8633e-09],
        [-2.3097e-07, -3.0022e-09]], device='cuda:0')
lstm_1.weight_hh_l0
tensor([[-1.0910e-11,  3.2331e-10,  1.4845e-10,  ...,  6.4792e-10,
         -8.0363e-12, -1.3830e-11],
        [ 2.1956e-10, -5.8059e-09, -2.6584e-09,  ..., -1.1948e-08,
          1.7989e-10,  2.5680e-10],
        [ 1.9615e-12, -1.0516e-10, -4.0663e-11,  ..., -1.6382e-10,
         -1.5886e-12,  2.2036e-12],
        ...,
        [ 2.3326e-10, -5.7004e-09, -2.8222e-09,  ..., -1.2733e-08,
          2.5390e-10,  3.0127e-10],
        [ 2.3858e-11, -5.9121e-10, -3.0272e-10,  ..., -1.3895e-09,
          2.5764e-11,  3.2403e-11],
        [ 1.0026e-11, -2.5983e-10, -1.4372e-10,  ..., -6.6937e-10,
          1.1877e-11,  1.5540e-11]], device='cuda:0')
lstm_1.bias_ih_l0
tensor([ 5.7069e-09, -8.9771e-08, -2.9637e-09,

       [-3.1813,  0.1872], device='cuda:0')
out2.weight
tensor([[-1.4140e-07,  3.6491e-07, -5.6596e-07,  6.5599e-07, -6.1442e-07,
         -8.9823e-07, -5.0511e-08,  4.3346e-07, -1.0991e-07,  2.2055e-07,
         -7.7805e-07,  9.1385e-07,  9.4621e-07, -2.5205e-07,  3.3200e-07,
          2.3619e-07, -1.3821e-08, -4.4113e-08,  5.9031e-07, -8.9055e-07,
         -1.3332e-07,  6.4788e-07, -6.0830e-07,  5.9077e-08, -4.1242e-07,
         -1.6761e-08, -9.0861e-08, -2.2947e-07, -6.2803e-08,  1.4985e-06,
         -4.3045e-07,  7.7399e-07, -9.9743e-07, -1.1130e-06, -8.9286e-07,
         -1.0472e-06, -6.9702e-07,  4.4334e-07, -4.5593e-07, -1.8136e-07,
          8.5870e-08,  9.6025e-07,  5.4780e-07, -6.1161e-07, -6.0051e-07,
          8.2740e-07, -1.6901e-07,  2.2453e-07, -1.0256e-06, -4.3690e-07,
          1.2646e-06,  1.0300e-06,  1.4089e-06,  1.2065e-06, -8.8571e-08,
         -4.9253e-07,  8.2262e-07, -5.9918e-07,  6.5595e-07,  6.7960e-07,
          9.6492e-07,  8.2919e-07, -6.4361e-07, -4.9661e

lstm_1.weight_ih_l0
tensor([[ 3.0952e-07,  2.2075e-09],
        [-4.7933e-06, -3.6137e-08],
        [-1.5352e-07, -5.5669e-10],
        ...,
        [-3.7357e-06, -3.9069e-08],
        [-4.2287e-07, -4.8105e-09],
        [-1.9333e-07, -2.4462e-09]], device='cuda:0')
lstm_1.weight_hh_l0
tensor([[ 1.0485e-11,  2.6310e-10,  1.3568e-10,  ...,  5.4275e-10,
          5.6860e-12,  5.0318e-12],
        [-6.3571e-11, -4.7095e-09, -2.3411e-09,  ..., -9.8570e-09,
         -5.1825e-12,  5.1176e-12],
        [-1.2330e-11, -8.0280e-11, -4.1748e-11,  ..., -1.3813e-10,
         -9.9645e-12, -9.9818e-12],
        ...,
        [ 8.1216e-11, -4.6337e-09, -2.3710e-09,  ..., -1.0357e-08,
          1.3886e-10,  1.5389e-10],
        [ 4.2282e-12, -4.8468e-10, -2.5916e-10,  ..., -1.1437e-09,
          1.1378e-11,  1.3661e-11],
        [ 4.9917e-13, -2.1099e-10, -1.2230e-10,  ..., -5.4723e-10,
          4.8051e-12,  6.1942e-12]], device='cuda:0')
lstm_1.bias_ih_l0
tensor([ 4.8836e-09, -7.5240e-08, -2.4301e-09,

tensor(1.00000e-04 *
       [[ 0.0272, -0.0397, -0.0240,  0.0157, -0.0503,  0.0297, -0.0097,
          0.0213, -0.0015, -0.0060, -0.0279,  0.0111, -0.0306,  0.0352,
         -0.0409,  0.0422,  0.0052,  0.0116,  0.0217, -0.0189, -0.0169,
          0.0275,  0.0014,  0.0055, -0.0321,  0.0502,  0.0152, -0.0216,
         -0.0290, -0.0529, -0.0499, -0.0195,  0.0144, -0.0110, -0.0123,
         -0.0098,  0.0329,  0.0104,  0.0031, -0.0413,  0.0283, -0.0206,
          0.0200, -0.0517, -0.0281, -0.0185,  0.0380, -0.0347, -0.0442,
         -0.0104, -0.0414, -0.0183, -0.0028,  0.0075,  0.0030, -0.0362,
         -0.0267,  0.0179,  0.0460,  0.0197, -0.0210,  0.0202, -0.0355,
          0.0106, -0.0086, -0.0019, -0.0338, -0.0131,  0.0037,  0.0353,
         -0.0181, -0.0175, -0.0413,  0.0088,  0.0598, -0.0459, -0.0581,
          0.0227,  0.0232, -0.0655,  0.0049, -0.0178, -0.0399,  0.0117,
          0.0115, -0.0481,  0.0341, -0.0094,  0.0394,  0.0158,  0.0217,
          0.0199, -0.0633,  0.0360, -0.0239

lstm_1.weight_ih_l0
tensor([[ 2.7248e-07,  1.8951e-09],
        [-4.1497e-06, -3.0643e-08],
        [-1.3002e-07, -4.6969e-10],
        ...,
        [-3.1977e-06, -3.2825e-08],
        [-3.6406e-07, -4.0604e-09],
        [-1.6592e-07, -2.0551e-09]], device='cuda:0')
lstm_1.weight_hh_l0
tensor([[ 4.7523e-12,  2.2982e-10,  1.1212e-10,  ...,  4.7099e-10,
         -4.0834e-12,  2.2362e-13],
        [-7.9318e-13, -4.0481e-09, -1.9326e-09,  ..., -8.4128e-09,
          1.0674e-10,  5.5626e-11],
        [-7.1444e-12, -6.8707e-11, -3.1665e-11,  ..., -1.1948e-10,
         -1.9605e-12, -5.2861e-12],
        ...,
        [ 9.2814e-11, -3.9452e-09, -1.9760e-09,  ..., -8.7404e-09,
          1.6636e-10,  1.5256e-10],
        [ 7.0428e-12, -4.1532e-10, -2.1623e-10,  ..., -9.7137e-10,
          1.6643e-11,  1.4830e-11],
        [ 2.0320e-12, -1.7947e-10, -1.0121e-10,  ..., -4.6160e-10,
          7.4815e-12,  6.7556e-12]], device='cuda:0')
lstm_1.bias_ih_l0
tensor([ 4.3003e-09, -6.5157e-08, -2.0588e-09,

tensor([[-6.6697e-07,  2.3930e-06, -2.0108e-06, -4.3175e-07, -6.7021e-07,
         -1.3124e-06, -5.3744e-06, -4.5791e-07, -1.5107e-07, -1.4630e-06,
         -9.0447e-07,  5.9859e-07, -2.0822e-06,  1.0897e-06,  1.8587e-06,
          3.6808e-07, -4.2843e-07,  3.2168e-07,  1.3185e-06, -7.1611e-07,
         -4.1556e-06,  1.4497e-06, -2.2338e-06, -1.1490e-06,  3.0454e-06,
         -2.9205e-06,  1.1540e-06, -1.0508e-06,  1.1011e-06, -2.5097e-06,
          2.4682e-06,  2.0175e-09, -4.4378e-06, -1.3459e-06,  1.6278e-06,
          1.3819e-06, -4.2051e-06,  2.1267e-06,  4.0062e-06,  4.5316e-06,
          2.6514e-07, -2.5758e-06,  9.0144e-08, -2.6023e-06, -3.6182e-06,
          1.4252e-06,  3.2969e-06, -4.4083e-06, -2.1260e-06,  3.4374e-06,
          1.5088e-06, -1.1266e-06, -1.8403e-07,  7.4174e-07,  2.1001e-06,
         -1.6805e-06, -1.3139e-06, -2.3843e-06, -2.9664e-07,  1.2912e-06,
          2.7058e-06, -2.4190e-06,  4.3673e-06, -8.3984e-07, -1.8607e-06,
         -1.8401e-06, -2.0105e-06,  1.

lstm_1.weight_ih_l0
tensor([[ 2.4290e-07,  1.6551e-09],
        [-3.6803e-06, -2.6629e-08],
        [-1.1711e-07, -4.2094e-10],
        ...,
        [-2.8361e-06, -2.8399e-08],
        [-3.1947e-07, -3.5031e-09],
        [-1.4273e-07, -1.7539e-09]], device='cuda:0')
lstm_1.weight_hh_l0
tensor([[-1.4154e-12,  2.1674e-10,  9.3172e-11,  ...,  4.0940e-10,
          6.6857e-12, -1.2122e-12],
        [ 6.9047e-11, -3.7349e-09, -1.6243e-09,  ..., -7.3148e-09,
         -3.3831e-11,  6.4702e-11],
        [-2.4362e-12, -7.1508e-11, -2.4990e-11,  ..., -1.0446e-10,
         -8.5406e-12, -3.5018e-12],
        ...,
        [ 1.1230e-10, -3.5459e-09, -1.6915e-09,  ..., -7.6132e-09,
          8.5303e-11,  1.3678e-10],
        [ 1.0562e-11, -3.7367e-10, -1.8355e-10,  ..., -8.4164e-10,
          6.5248e-12,  1.3765e-11],
        [ 3.9828e-12, -1.5986e-10, -8.4767e-11,  ..., -3.9459e-10,
          2.6486e-12,  6.3625e-12]], device='cuda:0')
lstm_1.bias_ih_l0
tensor([ 3.8347e-09, -5.7804e-08, -1.8552e-09,

tensor(1.00000e-06 *
       [[-0.0685,  0.2353, -0.3660,  0.4063, -0.3614, -0.5617, -0.0285,
          0.2741, -0.0607,  0.1463, -0.4692,  0.5679,  0.6114, -0.1757,
          0.1819,  0.1703, -0.0207, -0.0184,  0.3554, -0.5464, -0.0722,
          0.3757, -0.3802,  0.0766, -0.2797, -0.0370, -0.0531, -0.1334,
         -0.0465,  0.9228, -0.2626,  0.4821, -0.6377, -0.7193, -0.5403,
         -0.6326, -0.4493,  0.3102, -0.2908, -0.1108,  0.0426,  0.6036,
          0.3901, -0.3827, -0.3786,  0.5283, -0.1166,  0.1101, -0.6255,
         -0.2927,  0.7868,  0.6176,  0.8874,  0.7771, -0.0537, -0.3074,
          0.5199, -0.3834,  0.4262,  0.3966,  0.6122,  0.5474, -0.4383,
         -0.3416,  0.0533,  0.1168,  0.4899, -0.1477,  0.3163, -0.1466,
         -0.6506,  0.7856, -0.4338,  0.2074, -0.2373,  0.0997,  0.2142,
          0.0609, -0.5805, -0.4240, -0.6513,  0.7070, -0.8294,  0.1055,
          0.2458, -0.8040, -0.3413, -0.4539,  0.3048, -0.3531, -0.6452,
          0.7689, -0.5384,  0.2079, -0.3960

lstm_1.weight_ih_l0
tensor([[ 2.1998e-07,  1.4699e-09],
        [-3.2905e-06, -2.3460e-08],
        [-1.0139e-07, -3.6432e-10],
        ...,
        [-2.4984e-06, -2.4800e-08],
        [-2.8797e-07, -3.0845e-09],
        [-1.3050e-07, -1.5538e-09]], device='cuda:0')
lstm_1.weight_hh_l0
tensor([[ 6.6784e-12,  1.8474e-10,  8.2650e-11,  ...,  3.5679e-10,
          3.9776e-12, -4.4990e-12],
        [-3.5821e-11, -3.1988e-09, -1.4301e-09,  ..., -6.3482e-09,
         -4.5844e-12,  1.0087e-10],
        [-7.0177e-12, -5.5972e-11, -2.1504e-11,  ..., -8.6864e-11,
         -5.8559e-12, -7.7404e-13],
        ...,
        [ 5.5118e-11, -3.0657e-09, -1.4792e-09,  ..., -6.5983e-09,
          8.7590e-11,  1.4223e-10],
        [ 2.9823e-12, -3.2519e-10, -1.6192e-10,  ..., -7.3510e-10,
          7.0725e-12,  1.4917e-11],
        [ 3.5081e-13, -1.3982e-10, -7.5232e-11,  ..., -3.4690e-10,
          2.8772e-12,  6.9086e-12]], device='cuda:0')
lstm_1.bias_ih_l0
tensor([ 3.4734e-09, -5.1690e-08, -1.6062e-09,

tensor(1.00000e-05 *
       [[ 0.1409, -0.2001, -0.1204,  0.0769, -0.2542,  0.1570, -0.0441,
          0.1085, -0.0096, -0.0317, -0.1373,  0.0593, -0.1528,  0.1761,
         -0.2035,  0.2130,  0.0252,  0.0589,  0.1114, -0.0911, -0.0825,
          0.1390,  0.0126,  0.0306, -0.1595,  0.2510,  0.0759, -0.1054,
         -0.1481, -0.2637, -0.2496, -0.0965,  0.0762, -0.0525, -0.0611,
         -0.0452,  0.1657,  0.0573,  0.0160, -0.2032,  0.1413, -0.1015,
          0.1051, -0.2645, -0.1407, -0.0972,  0.1879, -0.1689, -0.2228,
         -0.0490, -0.2059, -0.0856, -0.0182,  0.0445,  0.0098, -0.1850,
         -0.1347,  0.0899,  0.2289,  0.0995, -0.1037,  0.1007, -0.1769,
          0.0537, -0.0420, -0.0109, -0.1677, -0.0598,  0.0251,  0.1768,
         -0.0924, -0.0869, -0.1985,  0.0451,  0.2982, -0.2264, -0.2907,
          0.1090,  0.1134, -0.3349,  0.0221, -0.0928, -0.2045,  0.0606,
          0.0545, -0.2363,  0.1739, -0.0458,  0.2016,  0.0771,  0.1071,
          0.1038, -0.3159,  0.1854, -0.1137

lstm_1.weight_ih_l0
tensor([[ 2.0240e-07,  1.3261e-09],
        [-3.0081e-06, -2.1059e-08],
        [-9.3792e-08, -3.3530e-10],
        ...,
        [-2.2792e-06, -2.2147e-08],
        [-2.6247e-07, -2.7522e-09],
        [-1.1486e-07, -1.3643e-09]], device='cuda:0')
lstm_1.weight_hh_l0
tensor([[-2.1460e-12,  1.7107e-10,  8.0551e-11,  ...,  3.1888e-10,
          3.0033e-12,  2.3242e-12],
        [ 6.6509e-11, -2.9304e-09, -1.3594e-09,  ..., -5.6668e-09,
          2.8134e-12,  1.1446e-11],
        [-1.1542e-12, -5.2719e-11, -2.3085e-11,  ..., -7.6114e-11,
         -4.9383e-12, -4.6900e-12],
        ...,
        [ 9.3154e-11, -2.7852e-09, -1.3631e-09,  ..., -5.9040e-09,
          8.0701e-11,  8.8832e-11],
        [ 8.9499e-12, -2.9572e-10, -1.4997e-10,  ..., -6.5597e-10,
          6.6968e-12,  8.0648e-12],
        [ 3.4600e-12, -1.2445e-10, -6.8387e-11,  ..., -3.0408e-10,
          2.8981e-12,  3.7323e-12]], device='cuda:0')
lstm_1.bias_ih_l0
tensor([ 3.1964e-09, -4.7265e-08, -1.4867e-09,

tensor(1.00000e-07 *
       [[-0.5400,  1.8451, -2.8044,  3.0757, -2.9125, -4.5472, -0.4309,
          2.1378, -0.4325,  1.1858, -3.8311,  4.6410,  4.6064, -1.4119,
          1.4635,  1.4103,  0.1122, -0.1103,  2.8793, -4.5123, -0.5605,
          2.9605, -3.1099,  0.4183, -2.0049, -0.3220, -0.4815, -1.2592,
         -0.0218,  7.5659, -1.8330,  3.9444, -5.1154, -5.7202, -4.2081,
         -5.3319, -3.6988,  2.4561, -2.3009, -0.8834,  0.3658,  4.8710,
          2.9846, -3.0605, -3.0991,  4.2324, -1.0409,  0.8774, -5.3330,
         -2.2857,  6.3696,  5.2078,  7.1832,  6.3279, -0.5722, -2.2460,
          4.3950, -2.9864,  3.4860,  3.0720,  4.8619,  4.1956, -3.5003,
         -2.6455,  0.6549,  1.1894,  3.9626, -1.1847,  2.6317, -1.1548,
         -5.2796,  6.2912, -3.5911,  1.7216, -1.9838,  0.5372,  1.5833,
          0.3909, -4.7129, -3.6519, -5.2438,  5.7412, -6.8102,  0.5936,
          1.8169, -6.4580, -2.9867, -3.7021,  2.6429, -2.9024, -5.1299,
          6.1981, -4.4852,  1.5060, -3.3042

KeyboardInterrupt: 

In [44]:
a = decoder.lstm_1.parameters()
for item in decoder.parameters():
    print(item)

Parameter containing:
tensor(1.00000e-02 *
       [[-1.1262, -3.2115],
        [-3.8125, -0.5673],
        [-2.0100,  5.7254],
        ...,
        [-5.4596,  4.0230],
        [-4.4813,  2.8221],
        [-4.8098, -0.6205]], device='cuda:0')
Parameter containing:
tensor([[ 1.5239e-03,  2.1471e-02,  4.2950e-02,  ..., -1.0229e-02,
         -3.0215e-02,  5.8661e-02],
        [ 4.9033e-02, -5.2740e-03, -5.0049e-02,  ...,  5.5881e-02,
         -2.3985e-02, -2.8208e-02],
        [-1.2604e-02, -4.7180e-02,  6.1763e-02,  ..., -5.5139e-02,
          3.9513e-02,  6.1571e-02],
        ...,
        [-1.1464e-03, -1.0655e-02, -2.7396e-02,  ...,  1.7581e-02,
         -1.4270e-02, -1.0590e-02],
        [ 6.1139e-02,  4.3480e-02, -2.8529e-02,  ...,  7.2955e-03,
         -2.4781e-02, -4.5954e-02],
        [-5.7154e-03,  4.3949e-02, -4.8077e-02,  ..., -5.6575e-02,
          4.5376e-02, -1.6162e-02]], device='cuda:0')
Parameter containing:
tensor(1.00000e-02 *
       [ 1.0274,  5.6134, -1.1830,  ..., -3.

Parameter containing:
tensor(1.00000e-02 *
       [[-1.7433,  3.0465,  4.1394,  0.8468, -6.0819, -4.1277,  0.4873,
          6.2462,  0.9070,  5.8886, -5.5083, -3.7983,  2.4359, -1.1174,
          0.2732,  3.1073,  1.2329, -3.9964,  4.9802,  3.3545, -5.2311,
          2.9018,  2.2712, -2.5788, -5.6669, -3.6550, -1.0679, -5.6297,
         -3.0673, -2.2117, -3.6776,  2.3209,  4.6360,  5.4670, -4.7952,
          2.4506, -3.3564,  2.1152, -3.3683, -5.1464, -0.9345,  1.7412,
         -4.1439,  4.3508, -3.0886, -2.0961, -1.7894,  3.2653,  3.4638,
         -5.8292, -5.5223, -4.6850,  3.4052, -2.5883,  1.5004,  2.8889,
         -2.4864, -4.5988,  2.0998, -0.7780, -2.6235,  2.3226,  5.4168,
         -2.7971,  5.8116,  1.5632,  1.9761,  5.2812,  2.1607, -0.1341,
         -0.0833,  3.8719, -5.3772,  4.7640, -5.6540, -1.9479,  5.6922,
         -2.6203,  4.3403, -4.2189, -4.2526, -0.9619,  4.0780,  1.9594,
          3.5675, -5.4011, -1.3410, -5.9423, -0.3869, -5.8409, -0.6862,
         -0.6893, -1.

In [211]:
print(decoder.lstm_1.state_dict().keys())
print(decoder.lstm_1.state_dict()['weight_ih_l0'].grad)

odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0'])
None
