In [82]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline 
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
from torch.autograd import Variable

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
    
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.lstm_1 = nn.LSTM(self.output_size, self.hidden_size)
        self.lstm_2 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_3 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_4 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_5 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_6 = nn.LSTM(self.hidden_size, self.hidden_size)
        self.lstm_7 = nn.LSTM(self.hidden_size, self.hidden_size)
        
        self.out = nn.Linear(self.hidden_size, self.output_size)
          
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, input, ch_1, ch_2, ch_3, ch_4, ch_5, ch_6, ch_7):
        (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5) = ch_1, ch_2, ch_3, ch_4, ch_5
        (hidden_6, cell_6), (hidden_7, cell_7) = ch_6, ch_7
        
        
        output, (hidden_1, cell_1) = self.lstm_1(input.view(1,1,-1).float(), (hidden_1, cell_1))
        output_1 = output
        
        output, (hidden_2, cell_2) = self.lstm_2(output, (hidden_2, cell_2))
        output_2 = output
        
        output, (hidden_3, cell_3) = self.lstm_3(output + output_1, (hidden_3, cell_3)) # skip_connection 1
        output_3 = output
        
        output, (hidden_4, cell_4) = self.lstm_4(output + output_2, (hidden_4, cell_4)) # skip_connection 2
        output_4 = output
        
        output, (hidden_5, cell_5) = self.lstm_5(output + output_3, (hidden_5, cell_5)) # skip_connection 3
        output_5 = output
        
        output, (hidden_6, cell_6) = self.lstm_6(output + output_4, (hidden_6, cell_6)) # skip_connection 4
        
        output, (hidden_7, cell_7) = self.lstm_7(output + output_5, (hidden_7, cell_7)) # skip_connection 5
        
        output = self.out(output[0])
        output = self.softmax(output)
        return output, (hidden_1, cell_1),(hidden_2, cell_2),(hidden_3, cell_3),(hidden_4, cell_4),(hidden_5, cell_5),(hidden_6, cell_6),(hidden_7, cell_7)
    
    def init_hidden(self):
        return torch.rand((1, 1, self.hidden_size), device=device)/100
    
    def init_cell(self):
        return torch.rand((1, 1, self.hidden_size), device=device)/100
    
    
print(torch.cuda.is_available())



""" 
NOTE: 
Encoder RNN input of size (Sentence_length * input_feature)
Encoder RNN output of size (1 * 1 * hidden_size)  should be (num_sentences * 1 * hidden_size)

Decoder RNN input of size 0 (scalar value)
Decoder RNN output of size (1 * target_num)   should be (num_sentences * target_num)
"""

True


' \nNOTE: \nEncoder RNN input of size (Sentence_length * input_feature)\nEncoder RNN output of size (1 * 1 * hidden_size)  should be (num_sentences * 1 * hidden_size)\n\nDecoder RNN input of size 0 (scalar value)\nDecoder RNN output of size (1 * target_num)   should be (num_sentences * target_num)\n'

In [36]:
import pickle

# load data from file
with open("pitch_data.pkl", "rb") as f:
    dic = pickle.load(f)
    train_X = dic["X"]
    train_Y = dic["Y"]
    time_X = dic["time"]
    
for i in range(train_Y.shape[0]):
    train_Y[i] = torch.from_numpy((train_Y[i] == 4).astype(int)).float()
    

In [37]:
def input_transform(train_x, time_x, i):
    output = torch.from_numpy(np.array([train_x[i], time_x[i]]))
    return output.transpose(1, 0).to(device)

def input_factorize(train_x):
    output = []
    for i in range(train_x.shape[0]):
        for item in np.array_split(train_x[i], train_x[i].shape[0] / 7):
            output.append(item)
    return output


def target_factorize(train_y):
    output = []
    for i in range(train_y.shape[0]):
        for item in np.array_split(train_y[i].numpy(), train_y[i].shape[0] / 7):
            output.append(torch.Tensor(item))
    return output

def target_transform(train_y):
    output = torch.zeros((1, 2))
    output[0, int(train_y)] = 1
    return output.unsqueeze(1).to(device)



train_X = input_factorize(train_X)
time_X = input_factorize(time_X)
target_Tensor = target_factorize(train_Y)


In [38]:
print(len(train_X))
print(target_Tensor[50000])

66964
tensor([ 0.,  0.,  0.,  1.,  0.,  0.,  0.])


In [68]:
import random
teacher_forcing_ratio = 1


def train(input_tensor, target_tensor, decoder, decoder_optimizer, criterion, verbose = False):
    decoder_optimizer.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    
    loss = 0
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    temp = []
    temp_score = []
    
    decoder_input = input_tensor[0]
    
    if use_teacher_forcing:
        for di in range(0, target_length):
            decoder_output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7) = decoder(decoder_input, 
                            (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7))
            if verbose:
                temp.append(int(torch.argmax(decoder_output, dim = 1).cpu().numpy()))
                temp_score.append(decoder_output)
                #print("decoder_output:", decoder_output)
                
            #print(input_tensor[di])
            if int(target_tensor[di]) == 1:
                #temp = float(loss)
                loss += 5 * criterion(decoder_output, target_tensor[di].unsqueeze(0).long())
                #print(decoder_output, target_tensor[di])
                #print(float(loss) - temp)
            else:
                #temp = float(loss)
                loss += criterion(decoder_output, target_tensor[di].unsqueeze(0).long())
                #print(decoder_output, target_tensor[di])
                #print(float(loss) - temp)
            if di + 1 < target_length:
                decoder_input = input_tensor[di + 1]
            """
            if di == 0:
                print("decoder input shape:", decoder_input.shape)
                print("decoder output shape:", decoder_output.shape)
            """
    else:
        for di in range(1, input_length):
            decoder_output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5) = decoder(decoder_input, 
                            (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5))
            if verbose:
                temp.append(int(torch.argmax(decoder_output, dim = 1).cpu().numpy()))
            loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))
            
            #print(loss)
            decoder_input = decoder_output

    loss.backward()
    """if verbose:
        for name, item in decoder.named_parameters():
            if item.requires_grad:
                print(name)
                print(item.shape)
                print(item.grad)
"""
    if verbose:
        print("Prediction :", temp) 
        print("Score :", temp_score)
        print("Target:", target_tensor) 
    

    decoder_optimizer.step()

    return loss.item() / target_length

In [69]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [70]:
def trainIters(decoder, n_iters, print_every = 1000, plot_every = 100, learning_rate = 0.01, CEL_weight=[1,5], total_batch = 51954):    
    start = time.time()
    
    plot_losses = []
    print_loss_total = 0
    plot_loss_total = 0
    
    decoder_optimizer = optim.Adagrad(decoder.parameters(), lr = learning_rate)
    
    criterion = nn.CrossEntropyLoss(weight = torch.Tensor(CEL_weight).to(device))
    
    
    for iter in range(1, n_iters + 1):
        num = iter % total_batch
        verbose = (iter % print_every == 0)
        input_tensor = input_transform(train_X, time_X, num - 1).to(device)
        target_tensor = target_Tensor[num].to(device)
        input_tensor = Variable(input_tensor, requires_grad = True)
        if input_tensor.shape[0] != target_tensor.shape[0]:
            continue
        
        loss = train(input_tensor, target_tensor, decoder, 
                     decoder_optimizer, criterion, verbose = verbose)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [71]:
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)


input_size = 2
hidden_size = 256
output_size = 2

decoder = DecoderRNN(hidden_size, output_size).to(device)

trainIters(decoder, 10000, print_every=100, learning_rate=1e-3, CEL_weight = [1,10], total_batch=5)
    

Prediction : [0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5382,  0.4618]], device='cuda:0'), tensor([[ 0.5890,  0.4110]], device='cuda:0'), tensor([[ 0.6481,  0.3519]], device='cuda:0'), tensor([[ 0.7043,  0.2957]], device='cuda:0'), tensor([[ 0.7508,  0.2492]], device='cuda:0'), tensor([[ 0.7865,  0.2135]], device='cuda:0'), tensor([[ 0.8126,  0.1874]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
0m 8s (- 13m 37s) (100 1%) 0.9854
Prediction : [0, 0, 0, 0, 0, 0, 0]
Score : [tensor([[ 0.5438,  0.4562]], device='cuda:0'), tensor([[ 0.6010,  0.3990]], device='cuda:0'), tensor([[ 0.6651,  0.3349]], device='cuda:0'), tensor([[ 0.7230,  0.2770]], device='cuda:0'), tensor([[ 0.7686,  0.2314]], device='cuda:0'), tensor([[ 0.8019,  0.1981]], device='cuda:0'), tensor([[ 0.8254,  0.1746]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
0m 16s (- 13m 35s) (200 2%) 0.9752
Prediction : [0, 0, 0, 0, 0, 0, 0]
Score : [ten

Prediction : [0, 0, 0, 0, 0, 1, 1]
Score : [tensor([[ 0.5285,  0.4715]], device='cuda:0'), tensor([[ 0.5232,  0.4768]], device='cuda:0'), tensor([[ 0.5446,  0.4554]], device='cuda:0'), tensor([[ 0.7267,  0.2733]], device='cuda:0'), tensor([[ 0.8363,  0.1637]], device='cuda:0'), tensor([[ 0.3777,  0.6223]], device='cuda:0'), tensor([[ 0.0245,  0.9755]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
2m 37s (- 11m 11s) (1900 19%) 0.8244
Prediction : [0, 0, 0, 0, 0, 1, 1]
Score : [tensor([[ 0.5327,  0.4673]], device='cuda:0'), tensor([[ 0.5324,  0.4676]], device='cuda:0'), tensor([[ 0.5652,  0.4348]], device='cuda:0'), tensor([[ 0.7317,  0.2683]], device='cuda:0'), tensor([[ 0.5464,  0.4536]], device='cuda:0'), tensor([[ 0.0368,  0.9632]], device='cuda:0'), tensor([[ 0.0186,  0.9814]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
2m 45s (- 11m 3s) (2000 20%) 0.8125
Prediction : [0, 1, 1, 0, 0, 0, 1]
Score : 

Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [tensor([[ 0.7360,  0.2640]], device='cuda:0'), tensor([[ 0.9753,  0.0247]], device='cuda:0'), tensor([[ 0.9994,  0.0006]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 0.9999,  0.0001]], device='cuda:0'), tensor([[ 0.9925,  0.0075]], device='cuda:0'), tensor([[ 0.0004,  0.9996]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
5m 7s (- 8m 43s) (3700 37%) 0.5811
Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [tensor([[ 0.7378,  0.2622]], device='cuda:0'), tensor([[ 0.9781,  0.0219]], device='cuda:0'), tensor([[ 0.9995,  0.0005]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 0.9999,  0.0001]], device='cuda:0'), tensor([[ 0.9937,  0.0063]], device='cuda:0'), tensor([[ 0.0004,  0.9996]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
5m 15s (- 8m 35s) (3800 38%) 0.5796
Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [t

Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [tensor([[ 0.7115,  0.2885]], device='cuda:0'), tensor([[ 0.9819,  0.0181]], device='cuda:0'), tensor([[ 0.9998,  0.0002]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 0.9985,  0.0015]], device='cuda:0'), tensor([[ 0.0005,  0.9995]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
7m 31s (- 6m 9s) (5500 55%) 0.5709
Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [tensor([[ 0.7087,  0.2913]], device='cuda:0'), tensor([[ 0.9818,  0.0182]], device='cuda:0'), tensor([[ 0.9998,  0.0002]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 0.9984,  0.0016]], device='cuda:0'), tensor([[ 0.0004,  0.9996]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
7m 39s (- 6m 0s) (5600 56%) 0.5705
Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [te

Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [tensor([[ 0.6733,  0.3267]], device='cuda:0'), tensor([[ 0.9806,  0.0194]], device='cuda:0'), tensor([[ 0.9998,  0.0002]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 0.9990,  0.0010]], device='cuda:0'), tensor([[ 0.0003,  0.9997]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
9m 54s (- 3m 39s) (7300 73%) 0.5658
Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [tensor([[ 0.6727,  0.3273]], device='cuda:0'), tensor([[ 0.9807,  0.0193]], device='cuda:0'), tensor([[ 0.9998,  0.0002]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 0.9991,  0.0009]], device='cuda:0'), tensor([[ 0.0003,  0.9997]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
10m 2s (- 3m 31s) (7400 74%) 0.5656
Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [

Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [tensor([[ 0.6437,  0.3563]], device='cuda:0'), tensor([[ 0.9827,  0.0173]], device='cuda:0'), tensor([[ 0.9999,  0.0001]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 0.9993,  0.0007]], device='cuda:0'), tensor([[ 0.0002,  0.9998]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
12m 17s (- 1m 12s) (9100 91%) 0.5620
Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : [tensor([[ 0.6423,  0.3577]], device='cuda:0'), tensor([[ 0.9827,  0.0173]], device='cuda:0'), tensor([[ 0.9999,  0.0001]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 1.0000,  0.0000]], device='cuda:0'), tensor([[ 0.9994,  0.0006]], device='cuda:0'), tensor([[ 0.0002,  0.9998]], device='cuda:0')]
Target: tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
12m 24s (- 1m 4s) (9200 92%) 0.5618
Prediction : [0, 0, 0, 0, 0, 0, 1]
Score : 

In [55]:
print(decoder.lstm_1.state_dict().keys())
print(decoder.lstm_1.state_dict()['weight_ih_l0'].grad)

odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0'])
None


In [77]:
def evaluate(decoder, test_X):
    input_length = input_tensor.size(0)
    
    loss = 0
        
    hidden_1 = decoder.init_hidden()
    hidden_2 = decoder.init_hidden()
    hidden_3 = decoder.init_hidden()
    hidden_4 = decoder.init_hidden()
    hidden_5 = decoder.init_hidden()
    hidden_6 = decoder.init_hidden()
    hidden_7 = decoder.init_hidden()
    cell_1 = decoder.init_cell()
    cell_2 = decoder.init_cell()
    cell_3 = decoder.init_cell()
    cell_4 = decoder.init_cell()
    cell_5 = decoder.init_cell()
    cell_6 = decoder.init_cell()
    cell_7 = decoder.init_cell()
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    temp = []
    temp_score = []
    
    decoder_input = input_tensor[0]
    
    for di in range(0, input_length):
        decoder_output, (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3),  (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7) = decoder(decoder_input, 
                        (hidden_1, cell_1), (hidden_2, cell_2), (hidden_3, cell_3), (hidden_4, cell_4), (hidden_5, cell_5), (hidden_6, cell_6), (hidden_7, cell_7))
        temp.append(int(torch.argmax(decoder_output, dim = 1).cpu().numpy()))
        temp_score.append(decoder_output)
        if di + 1 < input_length:
            decoder_input = input_tensor[di+1]

    return temp

In [81]:
for i in range(5):
    target_tensor = target_Tensor[i].to(device)
    input_tensor = input_transform(train_X, time_X, i-1).to(device)
    print(evaluate(decoder, input_tensor))
    print(target_tensor)

[0, 0, 0, 0, 0, 0, 1]
tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.], device='cuda:0')
[0, 0, 0, 0, 0, 0, 0]
tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  0.], device='cuda:0')
[0, 0, 0, 0, 0, 0, 0]
tensor([ 1.,  0.,  0.,  0.,  0.,  0.,  0.], device='cuda:0')
[0, 0, 1, 0, 0, 0, 0]
tensor([ 0.,  0.,  1.,  0.,  0.,  0.,  0.], device='cuda:0')
[0, 0, 0, 1, 0, 0, 0]
tensor([ 0.,  0.,  0.,  1.,  0.,  0.,  0.], device='cuda:0')
