In [10]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Dataset
import torch.optim as optim
from tqdm import tqdm
import numpy as np
from read_data import getData, LargeDataset, WindowedNormalizedDataset
from sklearn.preprocessing import StandardScaler
import gc


In [12]:
trainData, testData = getData("data")
trainData.shape, testData.shape

Training Data's shape is (10000, 50, 110, 6) and Test Data's is (10000, 50, 110, 6)


((10000, 50, 110, 6), (2100, 50, 50, 6))

In [19]:
class WindowedNormalizedDataset(Dataset):
    def __init__(self, data, window_size=40, forecast_horizon=10, mean=None, std=None):
        self.data = data
        self.window_size = window_size
        self.forecast_horizon = forecast_horizon
        self.mean = mean
        self.std = std

        # Precompute indices of valid (sample, t) combinations
        self.indices = []
        for sample in range(data.shape[0]):
            for t in range(data.shape[2] - window_size - forecast_horizon + 1):
                self.indices.append((sample, t))

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        sample_idx, t = self.indices[idx]
        
        x = self.data[sample_idx, :, t:t+self.window_size, :]  # shape: (50, 40, 6)
        y = self.data[sample_idx, 0, t+self.window_size:t+self.window_size+self.forecast_horizon, :2]  # shape: (10, 2)

        if self.mean is not None and self.std is not None:
            x = (x - self.mean) / self.std
        
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)


dataset = WindowedNormalizedDataset(data = trainData, window_size=50, forecast_horizon=20)
testdataset = WindowedNormalizedDataset(data = testData, window_size=50, forecast_horizon=20)

In [20]:
class EncoderDecoderModel(nn.Module):
    def __init__(self, infeatures, outfeatures = 0):
        super().__init__()
        self.layer1 = nn.Linear(in_features = infeatures, out_features = 16)
        self.encoderlstm = nn.LSTM(input_size = 16, hidden_size = 32, num_layers = 2, batch_first = True, dropout = 0.2)

        self.pool = nn.AdaptiveAvgPool1d(20)

        self.decoderlstm = nn.LSTM(input_size = 32, hidden_size = 16, num_layers = 2, batch_first = True, dropout = 0.3)
        self.layer12 = nn.Linear(in_features = 16, out_features = outfeatures)

    def forward(self, x):

        batch_size, channels, height, width = x.shape

        x = self.layer1(x)
        x = nn.GELU()(x)
        x = x.view(batch_size, -1, x.size(-1))
        x, _ = self.encoderlstm(x)

        x = x.permute(0, 2, 1)  # [batch, 64, seq_len]
       

        x = self.pool(x)  # Forces output to [batch, channel, 10]

        x = x.permute(0, 2, 1)  # [batch, 64, seq_len]

        x, _ = self.decoderlstm(x)
        x = self.layer12(x)
        x = nn.GELU()(x)

        return x

model = EncoderDecoderModel(6, 2)
test = torch.randn(10, 50, 110, 6)
out = model(test)
out.shape

torch.Size([10, 20, 2])

In [22]:
device = torch.device("cpu")

# dataset = WindowedNormalizedDataset(trainData)
trainDataLoader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=0)

model.to(device)

# Training setup
epochs = 100
lossFn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

for each_epoch in range(epochs):
    model.train()
    runningLoss = 0.0
    loop = tqdm(trainDataLoader, desc=f"Epoch [{each_epoch+1}/{epochs}]")

    for batchX, batchY in loop:
        batchX, batchY = batchX.to(device, non_blocking=True), batchY.to(device, non_blocking=True)
        output = model(batchX)
        loss = lossFn(output, batchY)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        runningLoss += loss.item()

    avgLoss = runningLoss / len(trainDataLoader)

    # if each_epoch % 5 == 0:
    torch.save(model.state_dict(), f'./models/large_model_{avgLoss}.pth')
    print(f"Epoch {each_epoch + 1}, Training Loss: {avgLoss:.4f}")


Epoch [1/100]:   0%|          | 38/12813 [00:19<1:49:55,  1.94it/s]


KeyboardInterrupt: 