In [171]:

import os.path
import pandas as pn

In [172]:
import numpy as np
from itertools import count
from collections import OrderedDict

In [173]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset, Dataset
import torch.utils.data

torch.set_default_dtype(torch.double)
torch.manual_seed(1)

<torch._C.Generator at 0x7f42e09eeb90>

In [174]:
import torch.nn.utils.rnn as rnn_utils

In [175]:
def tensor_seq_to_onehot(s,nlabels):
    bar = torch.zeros(s.shape[0],1,nlabels)
    bar.scatter_(-1,s.view(-1,1,1),1)
    return bar

In [176]:
class MyAuto(torch.nn.Module):
    def __init__(self,N_SYMBOLS):
        super(MyAuto,self).__init__()
        self.N_SYMBOLS = N_SYMBOLS
        self.N_hidden = 30
        self.D_embed = 20
        self.embedding = nn.Embedding(N_SYMBOLS,self.D_embed,padding_idx=0)
        self.encoder = nn.RNN(self.D_embed,self.N_hidden,2,dropout=0.1)
        #self.encoder = nn.Identity()
        self.d1 = nn.Linear(self.N_hidden,N_SYMBOLS)
        self.d2 = nn.Softmax(dim=-1)
    
    def forward(self, some_data):
        is_packed = isinstance(some_data,rnn_utils.PackedSequence)
        
        if is_packed:
            embedded_data = self.embedding(some_data.data)
        else:
            embedded_data = self.embedding(some_data)
        
        if len(embedded_data.shape) > 3: #Embedding adds an extra dimension
            embedded_data = torch.flatten(embedded_data,-2)
        
        assert len(embedded_data.shape) == 3 , "Data embedded with shape {}".format(embedded_data.shape)
        
        outputs,hidden = self.encoder(embedded_data)
        #outputs = self.encoder(embedded_data)
        x = self.d1(outputs)
        y = self.d2(x)
        if is_packed:
            return rnn_utils.PackedSequence(y,some_data.batch_sizes,some_data.sorted_indices, some_data.unsorted_indices)
        else:
            return y
        

In [177]:
model = MyAuto(20)

In [178]:
N_SYMBOLS = 20 #reserve the 1 and 0 symbol for start and stop
M_SEQS = 31
L_SEQS = (20,50)

import random as pyrand
#create sequences as 1-d long tensors
basic_sequence_lengths = torch.LongTensor(M_SEQS).random_(*L_SEQS)
basic_sequences = [torch.LongTensor(i.item()).random_(2,N_SYMBOLS) for i in basic_sequence_lengths]
for i in basic_sequences:
    i[0] = 1
    i[-1] = 0

In [179]:
#For individual sequences, it should only be two dimensions with the first dimension as time.
torch_shaped_sequences = [i.reshape(-1,1) for i in basic_sequences]

In [180]:
#embedding adds an additional dimension, rather than just expand into an existing dimension.
model.embedding(torch_shaped_sequences[0].reshape(-1,1)).shape

torch.Size([29, 1, 20])

In [181]:
#Experiments about padding

In [182]:
padded_torch_seqs = rnn_utils.pad_sequence(basic_sequences)
print("padded shape, basic: " , padded_torch_seqs.shape)
print("shape after embedding: ", model.embedding(padded_torch_seqs).shape)
print("Can run model on this?", model(padded_torch_seqs).shape)

padded shape, basic:  torch.Size([49, 31])
shape after embedding:  torch.Size([49, 31, 20])
Can run model on this? torch.Size([49, 31, 20])


In [183]:
padded_torch_seqs = rnn_utils.pad_sequence(torch_shaped_sequences)
print("padded shape, torch_shaped: ", padded_torch_seqs.shape)
print("shape after embedding: ", model.embedding(padded_torch_seqs).shape)
print("\t(Note the added extra dimension, handled in the model)")
print("Can run model on this?", model(padded_torch_seqs).shape) #Because the model automagically handles that extra coordinate

padded shape, torch_shaped:  torch.Size([49, 31, 1])
shape after embedding:  torch.Size([49, 31, 1, 20])
	(Note the added extra dimension, handled in the model)
Can run model on this? torch.Size([49, 31, 20])


In [184]:
#experiments about packing

In [185]:
packed_torch_seqs = rnn_utils.pack_sequence(basic_sequences,enforce_sorted=False)
print("packed shape :", packed_torch_seqs.data.shape)
print("shape after embedding :", model.embedding(packed_torch_seqs.data).shape)
try:
    print("Can run model on this?",model(packed_torch_seqs).data.shape)
except:
    print("\n\tCan't run model.")

packed shape : torch.Size([1083])
shape after embedding : torch.Size([1083, 20])

	Can't run model.


In [186]:
packed_torch_seqs = rnn_utils.pack_sequence(torch_shaped_sequences,enforce_sorted=False)
print("packed shape :", packed_torch_seqs.data.shape)
print("shape after embedding :", model.embedding(packed_torch_seqs.data).shape)
print("Can run model on this?",model(packed_torch_seqs).data.shape)

packed shape : torch.Size([1083, 1])
shape after embedding : torch.Size([1083, 1, 20])
Can run model on this? torch.Size([1083, 1, 20])


In [187]:
packed_torch_seqs = rnn_utils.pack_sequence(torch_shaped_sequences,enforce_sorted=False)
print("packed shape :", packed_torch_seqs.data.shape)
unpacked_torch_seqs, unpacked_lengths = rnn_utils.pad_packed_sequence(packed_torch_seqs)

print("shape after embedding :", model.embedding(unpacked_torch_seqs).shape)
print("Can run model on this?",model(packed_torch_seqs).data.shape)

packed shape : torch.Size([1083, 1])
shape after embedding : torch.Size([49, 31, 1, 20])
Can run model on this? torch.Size([1083, 1, 20])


In [188]:
unpacked_torch_seqs.shape

torch.Size([49, 31, 1])

In [189]:
blah = model.embedding(unpacked_torch_seqs)
repacked = rnn_utils.pack_padded_sequence(torch.flatten(blah,-2),unpacked_lengths,enforce_sorted=False)
output, hidden = model.encoder(repacked)

In [190]:
hidden.shape

torch.Size([2, 31, 30])

In [192]:
output.data.shape

torch.Size([1083, 30])