In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.utils.data import random_split, Dataset
import torch.optim as optim
import pandas as pd
import numpy as np
import random
import pickle
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)
print("Device:", device)

Device: cpu


In [3]:
with open('p5/train.pkl', 'rb') as f:
    data = pickle.load(f)

In [4]:
class VariableLenDataset(Dataset):
    def __init__(self, in_data, target):
        self.data = [(x, y) for x, y in zip(in_data, target)]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        in_data, target = self.data[idx]
        return in_data, target

In [5]:
data_attr = []
data_targets = []
for d in data:
    data_attr.append(d[0])
    data_targets.append(d[1])

In [28]:
def pad_collate(batch, pad_value=-100):
    try:
        xx, yy = zip(*batch)
    except ValueError as e:
        print(f"Error while unpacking batch: {e}")
        print(f"Batch content: {batch}")
        raise

    xx = [torch.tensor(x) for x in xx]
    x_lens = [len(x) for x in xx]
    yy_pad = torch.tensor(yy)
    y_lens = [1] * len(yy)

    xx_pad = pad_sequence(xx, batch_first=True, padding_value=pad_value)

    return xx_pad, yy_pad, x_lens, y_lens
    # return xx_pad, yy_pad

In [29]:
rng = np.random.default_rng(73512)

dataset_size = len(data)
train_size = int(0.8 * dataset_size)
test_size = dataset_size - train_size

train_set, test_set = random_split(data, [train_size, test_size])

print(train_set[6])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True, collate_fn=pad_collate)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False, drop_last=False, collate_fn=pad_collate)

(array([144., 144., 144., 144.,  32., 146.,  33.,  78.,  92.,   5., 185.,
       154.,  47.,  88.,  56., 113.,  37.,  60.,  33.,  38.,  47.,  12.,
        12.,  47.,  60., 159.,  13.,  79., 149.,  32., 121., 185.,   8.,
        47.,  92.,  65.,  92., 172.,  28.,  12.,  28., 172.,  47., 152.,
       152., 145., 145., 145., 159.,  65.,  73.,  28.,  88.,  41., 125.,
       119., 119.,  78.,  12.,  47.,  33.,  38.,  47., 145.,   5., 117.,
        47., 159., 190., 124., 124.,  12.,  44.,  56.,  56.,  33.,  47.,
       145., 145., 145., 159.,   5., 159.,  12., 125.,  47.,  28., 159.,
       125., 125.,  60.,  33.,  47.,  92.,  47.,  38., 172.,  92., 117.,
         8.,   8.,   0.,   0.,  67.,   0.,  65.,  65., 109.,  44.,  60.,
        92., 145.,  28.,  47.,  74.,  73.,  44.,  79., 149.,  32., 124.,
       185., 190., 114., 146.,  25., 132., 144., 185., 112., 112., 112.,
        12.,  88.,  45., 120., 124.,  92., 159.,  12.,  92., 172.,  47.,
        92.,  92., 145.,  88.,  41.,  40., 144.,  

In [25]:
class LSTMRegressor(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, out_size, bidirectional = False):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.bidirectional = 2 if bidirectional else 1
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, bidirectional=bidirectional, dropout=0.4)
        self.fc = nn.Linear(hidden_size*self.bidirectional, out_size)
        
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers*self.bidirectional , batch_size, self.hidden_size)
        state = torch.zeros(self.num_layers*self.bidirectional , batch_size, self.hidden_size)
        return hidden, state
    
    def forward(self, x, hidden):
        x = torch.transpose(x,0,1)
        all_outputs, hidden = self.lstm(x, hidden)
        all_outputs = torch.transpose(all_outputs,0,1)
        # out = torch.flatten(all_outputs,1)
        out = all_outputs[-1]
        x = self.fc(out)
        return x, hidden
    
model = LSTMRegressor(1,5,2,16).to(device)
model

LSTMRegressor(
  (lstm): LSTM(1, 5, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=5, out_features=16, bias=True)
)

In [32]:
class LSTM_Seq_Regressor(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, out_size):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.proj_size = out_size
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, proj_size = out_size)
        
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.proj_size)
        state = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        return hidden, state
    
    def forward(self, x, hidden):
        # x = torch.transpose(x, 0, 1)
        all_outputs, hidden = self.lstm(x, hidden)
        # all_outputs = torch.transpose(all_outputs, 0, 1)
        return all_outputs, hidden
    
model = LSTM_Seq_Regressor(1, 200, 1, 1).to(device)
model

LSTM_Seq_Regressor(
  (lstm): LSTM(1, 200, proj_size=1)
)

In [34]:
train_indices = int(len(data) * 0.7)
train_set = VariableLenDataset(data[:train_indices], targets[:train_indices])
test_set = VariableLenDataset(data[train_indices:], targets[train_indices:])

In [36]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=50, shuffle=True, collate_fn=pad_collate)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=50, shuffle=False, drop_last=False, collate_fn=pad_collate)

In [27]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_fun = nn.CrossEntropyLoss()

print(train_loader)

# Training loop
for epoch in range(5):
    for x, targets in train_loader:
        print("done")
        x = x.to(device).unsqueeze(2).float()
        # x = torch.tensor(x, dtype=torch.float32)
#         x = x.unsqueeze(2)
        targets = targets.to(device).long()
        hidden, state = model.init_hidden(x.size(0))
        hidden, state = hidden.to(device), state.to(device) 
        hidden = torch.tensor(hidden, dtype=torch.float32)
        state = torch.tensor(state, dtype=torch.float32)
        preds, _ = model(x, (hidden,state))
        preds = preds.squeeze(1)
        optimizer.zero_grad() 
        loss = loss_fun(preds, targets)
        loss.backward()
        optimizer.step()
    if epoch % 1 == 0:
        print(f"Epoch: {epoch}, loss: {loss.item():.3}")

<torch.utils.data.dataloader.DataLoader object at 0x000001B459445F10>
done


  hidden = torch.tensor(hidden, dtype=torch.float32)
  state = torch.tensor(state, dtype=torch.float32)


ValueError: Expected input batch_size (2031) to match target batch_size (32).

In [37]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fun = nn.MSELoss()
pad = 0

# Training loop
for epoch in range(101):
    for x, targets, x_len, target_len in train_loader:
        x = x.to(device).unsqueeze(2).float()
        targets = targets.to(device)
        hidden, state = model.init_hidden(x.size(0))
        hidden, state = hidden.to(device), state.to(device) 
        
        x = torch.transpose(x, 0, 1)
        preds, _ = model(x, (hidden, state))
        preds = torch.transpose(preds, 0, 1)
        
#         x_packed = pack_padded_sequence(x, x_len, batch_first=True, enforce_sorted=False)
#         preds_packed, _ = model(x_packed, (hidden, state))
#         preds, pred_len = pad_packed_sequence(preds_packed, batch_first=True, padding_value=pad)
        
        preds = preds.squeeze(2)
        optimizer.zero_grad()
        mask = targets != pad
        loss = loss_fun(preds[mask], targets[mask])
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, loss: {loss.item():.3}")

  xx = [torch.tensor(x) for x in xx]


TypeError: not a sequence