In [1]:
import numpy as np

from torchtext import data
from torchtext import datasets
from torchtext.vocab import Vectors, GloVe

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
from torch.nn import Linear, RNN, LSTM
from torch.nn.functional import softmax, relu

from sklearn.manifold import TSNE
import h5py

In [2]:
# Load data
with h5py.File('C:/Users/jovog/OneDrive/Documents/50000_dataset.hdf5', 'r') as hf:
    data = hf['data'][:]
    label = hf['label'][:]
data[:,1] += 1
label[:] += 1

In [3]:
seq = [] #Initalize
lab = []
current_user = -1
for i in range(data.shape[0]): # going through the data and sorting into user sequences
    if data[i,0]!=current_user:
        current_user = data[i,0]
        seq.append([data[i,1]])
        lab.append([label[i]])        
    else:
        seq[-1].append(data[i,1]) # new user line
        lab[-1].append(label[i])
        
seq.sort(key=lambda x:len(x))
lab.sort(key=lambda x:len(x))
seq_length = [len(x) for x in seq]

In [49]:
# Define network
output_size = 17770
num_input = 17770
hidden_size = 100

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.one_hot = nn.Embedding(num_input, num_input)
        self.one_hot.weight.data = torch.eye(num_input)
        self.one_hot.weight.detach_()
        self.rnn = LSTM(input_size=num_input,
                        hidden_size=100,
                        num_layers=1,
                        bidirectional=False,
                        batch_first = True)
        self.l_out = Linear(in_features=hidden_size,
                            out_features=output_size,
                            bias=False)
        self.criteria = nn.CrossEntropyLoss()
    
    def forward(self, x):
        out = {}
        x = self.one_hot(x)
        
        x, hn = self.rnn(x)
        
        out = softmax(self.l_out(x), dim=2)
        return out
    
    def MyLoss(self, Y_hat, Y):
        # create a mask by filtering out all tokens that ARE NOT the padding token
        mask = (Y > 0).float()
        #if mask[0,-1]==0:
        #    m = len(mask[0,:])-1
        mask = mask.view(-1,1)
        
        Y = Y.view(-1)
        
        Y_hat = Y_hat.view(-1, output_size)
        
        # count how many tokens we have
        #nb_tokens = int(torch.sum(mask).data[0])
        print('y_hat:',Y_hat.size(),Y_hat)
        print('y:',Y.size(),Y)
        #print('mask:',mask.size(),mask)
        # pick the values for the label and zero out the rest with the mask
        Y_hat = Y_hat * mask
        #print('2:',Y_hat.size(),Y_hat)
        #print('3:',Y_hat[m,:])
        #print(Y_hat.type())
        #print(Y.type())
        loss = self.criteria(Y_hat,Y)
        return loss
net = Net()
print(net)

Net(
  (one_hot): Embedding(17770, 17770)
  (rnn): LSTM(17770, 100, batch_first=True)
  (l_out): Linear(in_features=100, out_features=17770, bias=False)
  (criteria): CrossEntropyLoss()
)


In [50]:
{p[0]: p[1].requires_grad for p in net.named_parameters()}

{'one_hot.weight': False,
 'rnn.weight_ih_l0': True,
 'rnn.weight_hh_l0': True,
 'rnn.bias_ih_l0': True,
 'rnn.bias_hh_l0': True,
 'l_out.weight': True}

In [51]:
optimizer = optim.Adam(net.parameters(), lr=0.01)

In [52]:
# Train and validation loop
batch_size = 10
net.train()
for i in np.random.choice(len(seq)//batch_size,len(seq)//batch_size, replace=False):
    batch = seq[i*batch_size:(i+1)*batch_size]
    batch_length = seq_length[i*batch_size:(i+1)*batch_size]
    batch_label = lab[i*batch_size:(i+1)*batch_size]
    
    # Zero padding of the batch
    max_len_batch = max(batch_length)
    batch_padded = np.zeros((batch_size,max_len_batch))
    padded_label = np.zeros((batch_size,max_len_batch))
    for j in range(batch_size):
        batch_padded[j,0:batch_length[j]] = batch[j]
        padded_label[j,0:batch_length[j]] = batch_label[j]
    batch_padded = torch.from_numpy(batch_padded).long()
    padded_label = torch.from_numpy(padded_label).long()
    output = net(batch_padded)
    #print('batch_length:',batch_length)
    batch_loss = net.MyLoss(output,padded_label)
    optimizer.zero_grad()
    batch_loss.backward()
    optimizer.step()
    break

y_hat: torch.Size([390, 17770]) tensor([[0.0001, 0.0001, 0.0001,  ..., 0.0001, 0.0001, 0.0001],
        [0.0001, 0.0001, 0.0001,  ..., 0.0001, 0.0001, 0.0001],
        [0.0001, 0.0001, 0.0001,  ..., 0.0001, 0.0001, 0.0001],
        ...,
        [0.0001, 0.0001, 0.0001,  ..., 0.0001, 0.0001, 0.0001],
        [0.0001, 0.0001, 0.0001,  ..., 0.0001, 0.0001, 0.0001],
        [0.0001, 0.0001, 0.0001,  ..., 0.0001, 0.0001, 0.0001]],
       grad_fn=<ViewBackward>)
y: torch.Size([390]) tensor([ 7632, 13359, 13728,  1144,  3782,  4302,  4656,  5112,  6196,  6931,
         7590,  8181,  8393,  8915,  9205,  9471,  9617,  9645, 10375, 12047,
        12582, 12918, 13102, 13391, 14103, 14312, 14856, 14909, 15124, 15156,
        15436, 15472, 15563, 15844, 15922, 16139, 16339, 16879, 16901,  1542,
         2782,  9144,  6287,  7234,  8636,  8904,  9340, 13050, 14313, 14367,
        14590, 15124, 15156, 16872, 10583, 16380,  6007,  1409,  2372, 12155,
         4590,  6428, 12799,  7786, 14533,  1700, 

In [48]:
print(padded_label.size())

torch.Size([10, 197])
