<a href="https://colab.research.google.com/github/FedericaBellini22/NeuralNetworks_Project24-25/blob/main/TSmixer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from urllib.request import urlretrieve
import zipfile

DATA_URL = "https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh1.csv"
DATA_DIR = "data"
FILE_PATH = os.path.join(DATA_DIR, "ETTh1.csv")

LOOKBACK_WINDOW = 512  # Number of input time steps
FORECAST_HORIZON = 96   # Number of time steps in output
BATCH_SIZE = 32

# Download dataset if not present
def download_dataset():
    if not os.path.exists(DATA_DIR):
        os.makedirs(DATA_DIR)
    if not os.path.exists(FILE_PATH):
        print(f"Downloading {DATA_URL} ...")
        urlretrieve(DATA_URL, FILE_PATH)
        print("Download completed!")

class ETTh1Dataset(Dataset):
    def __init__(self, file_path, lookback=LOOKBACK_WINDOW, horizon=FORECAST_HORIZON, split="train"):
        self.data = pd.read_csv(file_path, parse_dates=["date"], index_col="date")

        # Normalization min-max
        self.mean = self.data.mean()
        self.std = self.data.std()
        self.data = (self.data - self.mean) / self.std

        # Split train/val/test (70/20/10)
        num_samples = len(self.data)
        train_size = int(num_samples * 0.7)
        val_size = int(num_samples * 0.2)

        if split == "train":
            self.data = self.data.iloc[:train_size].values
        elif split == "val":
            self.data = self.data.iloc[train_size:train_size + val_size].values
        elif split == "test":
            self.data = self.data.iloc[train_size + val_size:].values

        self.lookback = lookback
        self.horizon = horizon

    def __len__(self):
        return len(self.data) - self.lookback - self.horizon

    def __getitem__(self, index):
        x = self.data[index : index + self.lookback]  # Input L
        y = self.data[index + self.lookback : index + self.lookback + self.horizon, 0]  # Output T (only target OT)
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Function to get the DataLoader
def get_dataloaders(batch_size=BATCH_SIZE):
    download_dataset()

    train_dataset = ETTh1Dataset(FILE_PATH, split="train")
    val_dataset = ETTh1Dataset(FILE_PATH, split="val")
    test_dataset = ETTh1Dataset(FILE_PATH, split="test")

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

if __name__ == "__main__":
    train_loader, val_loader, test_loader = get_dataloaders()
    x, y = next(iter(train_loader))
    print(f"Dataset ETTh1 ready! Input sizes: {x.shape}, output: {y.shape}")


Downloading https://raw.githubusercontent.com/zhouhaoyi/ETDataset/main/ETT-small/ETTh1.csv ...
Download completed!
Dataset ETTh1 ready! Input sizes: torch.Size([32, 512, 7]), output: torch.Size([32, 96])


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

class TimeMixingMLP(nn.Module):
    #MLP operating along the time dimension
    def __init__(self, input_dim, hidden_dim, dropout=0.1):
        super(TimeMixingMLP, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, input_dim)
        )
        self.norm = nn.LayerNorm(input_dim)

    def forward(self, x):
        batch_size, time_steps, features = x.shape
        x = x.view(batch_size * time_steps, features)
        out = self.mlp(x)
        out = out.view(batch_size, time_steps, features)
        return self.norm(out + x.view(batch_size, time_steps, features))

class FeatureMixingMLP(nn.Module):
    #MLP operating along the feature size
    def __init__(self, input_dim, hidden_dim, dropout=0.1):
        super(FeatureMixingMLP, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, input_dim)
        )
        self.norm = nn.LayerNorm(input_dim)

    def forward(self, x):
        batch_size, time_steps, features = x.shape
        x = x.reshape(-1, features)
        out = self.mlp(x)
        out = out.view(batch_size, time_steps, features)
        return self.norm(out + x.view(batch_size, time_steps, features))

class TSMixer(nn.Module):
    #TSMixer model with Time-Mixing and Feature-Mixing blocks
    def __init__(self, input_dim, hidden_dim, num_layers=3, output_size=96):
        super(TSMixer, self).__init__()
        self.output_size = output_size
        self.layers = nn.ModuleList()
        for _ in range(num_layers):
            self.layers.append(TimeMixingMLP(input_dim, hidden_dim))
            self.layers.append(FeatureMixingMLP(input_dim, hidden_dim))

        self.output_layer = nn.Linear(input_dim, 1)  # Final forecast

    def forward(self, x):
        print(f"[TSMixer] Input shape: {x.shape}")
        for layer in self.layers:
            x = layer(x)

        x = self.output_layer(x)  # (batch, time, 1)
        x = x.squeeze(-1)  # Output with shape (batch, time)

        x = x[:, -self.output_size:]
        print(f"[TSMixer] Output shape: {x.shape}")
        return x


# Configurations
EPOCHS = 50
BATCH_SIZE = 64
LEARNING_RATE = 5e-4
HIDDEN_DIM = 256
NUM_LAYERS = 6
DROPOUT_RATE = 0.1
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the DataLoader
train_loader, val_loader, test_loader = get_dataloaders(batch_size=BATCH_SIZE)

x, y = next(iter(train_loader))
print(f"Shape batch: {x.shape}")  # check the size

# Define the model
input_dim = next(iter(train_loader))[0].shape[-1]  # Number of features
model = TSMixer(input_dim=input_dim, hidden_dim=HIDDEN_DIM, num_layers=NUM_LAYERS).to(DEVICE)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training function
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(x)  # Predictions
        loss = criterion(output, y)  # Compute the loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update the weights
        total_loss += loss.item()
    return total_loss / len(train_loader)

# Validation function
def evaluate(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)
            output = model(x)
            loss = criterion(output, y)
            total_loss += loss.item()
    return total_loss / len(val_loader)

# Training Loop
best_val_loss = float("inf")
save_path = "best_tsmixer.pth"

for epoch in range(1, EPOCHS + 1):
    train_loss = train_epoch(model, train_loader, criterion, optimizer, DEVICE)
    val_loss = evaluate(model, val_loader, criterion, DEVICE)

    print(f"Epoch [{epoch}/{EPOCHS}] - Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

    # Save the template if it improves
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), save_path)
        print("Best saved model")

print("Training completed!")





[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMi

In [3]:
save_path = "tsmixer_trained.pth"
torch.save(model.state_dict(), save_path)
print(f"Model saved in {save_path}")


Model saved in tsmixer_trained.pth


In [4]:
pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.7.0-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.14.2-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->torchmetrics)
  D

In [5]:
from torchmetrics.functional import mean_absolute_error, mean_squared_error

# Upload the saved template
model.load_state_dict(torch.load("best_tsmixer.pth"))
model.eval()

# Compute MAE e MSE
y_true = []
y_pred = []

with torch.no_grad():
    for x, y in val_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        output = model(x)
        y_true.append(y.cpu())
        y_pred.append(output.cpu())

y_true = torch.cat(y_true, dim=0)
y_pred = torch.cat(y_pred, dim=0)

mae = mean_absolute_error(y_pred, y_true)
mse = mean_squared_error(y_pred, y_true)

print(f"MAE on validation set: {mae:.4f}")
print(f"MSE on validation set: {mse:.4f}")


[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape: torch.Size([64, 96])
[TSMixer] Input shape: torch.Size([64, 512, 7])
[TSMixer] Output shape

In [6]:
import json

results = {
    "MSE": float(mse.item()),
    "MAE": float(mae.item()),
    "Train Loss": float(train_loss),
    "Validation Loss": float(val_loss)
}

with open("training_results.json", "w") as f:
    json.dump(results, f, indent=4)

print("results saved in training_results.json")


results saved in training_results.json
