In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.utils.data import random_split, Dataset
import torch.optim as optim
import pandas as pd
import numpy as np
import random
import pickle
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)
print("Device:", device)

Device: cpu


In [3]:
with open('p5/train.pkl', 'rb') as f:
    data = pickle.load(f)

In [6]:
class VariableLenDataset(Dataset):
    def __init__(self, in_data, target):
        self.data = [(x, y) for x, y in zip(in_data, target)]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        in_data, target = self.data[idx]
        return in_data, target

In [5]:
data_attr = []
data_targets = []
for d in data:
    data_attr.append(d[0])
    data_targets.append(d[1])

In [8]:
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

pad = 0

def pad_collate(batch, pad_value=0):
    xx, yy = zip(*batch)
    x_lens = [len(x) for x in xx]
    y_lens = [len(y) for y in yy]

    xx_pad = pad_sequence(xx, batch_first=True, padding_value=pad_value)
    yy_pad = pad_sequence(yy, batch_first=True, padding_value=pad_value)

    return xx_pad, yy_pad, x_lens, y_lens

In [4]:
rng = np.random.default_rng(73512)

min_gen_val = 10
max_gen_val = 1001
samples = 1000
max_gen_len = 32

data = []
targets = []
max_val = -1
for _ in range(samples):
    seq_len = rng.integers(low=1, high=max_gen_len, size=1)
    data_in = rng.integers(low=min_gen_val, high=max_gen_val, size=seq_len)
    data_sum = np.array([data_in[:i + 1].sum() for i in range(len(data_in))])
    data.append(torch.from_numpy(data_in))
    targets.append(torch.from_numpy(data_sum))
    max_val = data_sum[-1] if data_sum[-1] > max_val else max_val

In [7]:
train_indices = int(len(data) * 0.7)
data = [(x / max_val).float() for x in data]
targets = [(x / max_val).float() for x in targets]
train_set = VariableLenDataset(data[:train_indices], targets[:train_indices])
test_set = VariableLenDataset(data[train_indices:], targets[train_indices:])

In [10]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=50, shuffle=True, collate_fn=pad_collate)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=50, shuffle=False, drop_last=False, collate_fn=pad_collate)

In [11]:
class LSTM_Seq_Regressor(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, out_size):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.proj_size = out_size
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, proj_size = out_size)
        
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.proj_size)
        state = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        return hidden, state
    
    def forward(self, x, hidden):
        # x = torch.transpose(x, 0, 1)
        all_outputs, hidden = self.lstm(x, hidden)
        # all_outputs = torch.transpose(all_outputs, 0, 1)
        return all_outputs, hidden
    
model = LSTM_Seq_Regressor(1, 200, 1, 1).to(device)
model

LSTM_Seq_Regressor(
  (lstm): LSTM(1, 200, proj_size=1)
)

In [12]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fun = nn.MSELoss()

# Training loop
for epoch in range(101):
    for x, targets, x_len, target_len in train_loader:
        x = x.to(device).unsqueeze(2)
        targets = targets.to(device)
        hidden, state = model.init_hidden(x.size(0))
        hidden, state = hidden.to(device), state.to(device) 
        
        x = torch.transpose(x, 0, 1)
        preds, _ = model(x, (hidden, state))
        preds = torch.transpose(preds, 0, 1)
        
#         x_packed = pack_padded_sequence(x, x_len, batch_first=True, enforce_sorted=False)
#         preds_packed, _ = model(x_packed, (hidden, state))
#         preds, pred_len = pad_packed_sequence(preds_packed, batch_first=True, padding_value=pad)
        
        preds = preds.squeeze(2)
        optimizer.zero_grad()
        mask = targets != pad
        loss = loss_fun(preds[mask], targets[mask])
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, loss: {loss.item():.3}")

  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


Epoch: 0, loss: 4.58e-06
Epoch: 10, loss: 9.87e-10
Epoch: 20, loss: 2.3e-10
Epoch: 30, loss: 9.27e-11
Epoch: 40, loss: 8.71e-11
Epoch: 50, loss: 1.32e-10
Epoch: 60, loss: 1.15e-10
Epoch: 70, loss: 8.29e-11
Epoch: 80, loss: 1.31e-10
Epoch: 90, loss: 1.86e-10
Epoch: 100, loss: 6.33e-08


In [27]:
with torch.no_grad():
    for x, targets, x_len, target_len in test_loader:
        x = x.to(device).unsqueeze(2)
        targets = targets.to(device)
        int_targets = []
        for target_1 in targets:
                for target_2 in target_1:
                    int_targets.append(int(target_2))
        print(set(list(int_targets)))
        hidden, state = model.init_hidden(x.shape[0])
        hidden, state = hidden.to(device), state.to(device)

#         x = torch.transpose(x, 0, 1)        
#         preds, _ = model(x, (hidden, state))
#         preds = torch.transpose(preds, 0, 1)
        
        x_packed = pack_padded_sequence(x, x_len, batch_first=True, enforce_sorted=False)
        preds_packed, _ = model(x_packed, (hidden, state))
        preds, pred_len = pad_packed_sequence(preds_packed, batch_first=True, padding_value=pad)
        
        preds = preds.squeeze(2)
        mask_tgt = targets != pad
        print(targets)
        print(preds)
        int_targets = []
        int_preds = []
        for i in range(len(targets)):
            for j in range(len(targets[i])):
                int_targets.append(int(round(float(targets[i][j]))))
                int_preds.append(int(preds[i][j]))
        print(set(int_preds))

{0}
tensor([[2.8939e-06, 5.4399e-06, 5.5305e-06,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.5726e-06, 1.6340e-06, 1.9351e-06,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.9906e-06, 2.4203e-06, 3.8234e-06,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        ...,
        [1.1020e-06, 1.4586e-06, 1.6194e-06,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.3329e-06, 3.4580e-06, 5.2528e-06,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.2716e-06, 2.7448e-06, 3.6860e-06,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00]])
tensor([[0.0002, 0.0004, 0.0004,  ..., 0.0000, 0.0000, 0.0000],
        [0.0002, 0.0004, 0.0004,  ..., 0.0000, 0.0000, 0.0000],
        [0.0002, 0.0004, 0.0004,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0002, 0.0004, 0.0004,  ..., 0.0000, 0.0000, 0.0000],
        [0.0002, 0.0004, 0.0004,  ..., 0.0000, 0.0000, 0.0000],
        [0.0002, 0.0004, 0.0004,  ..., 0.0000, 0.0000, 0.0000]])
{0