In [63]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [100]:
class ResNetBlock(nn.Module):
    def __init__(self, input_dim: int, kernel_size: int = 5, padding: int = 2):
        super(ResNetBlock, self).__init__()
        self.net = nn.Sequential(
            nn.Conv1d(input_dim, input_dim, kernel_size=kernel_size, padding=padding),
            nn.BatchNorm1d(input_dim),
            nn.ReLU(),
            nn.Conv1d(input_dim, input_dim, kernel_size=kernel_size, padding=padding),
            nn.BatchNorm1d(input_dim),
        )

    def forward(self, x):
        residual = x # Save the input for the skip connection
        out = self.net(x)
        out += residual
        return F.relu(out)


class RVolResNet(nn.Module):
    def __init__(self, input_dim: int = 24, hidden_dim: int = 64, num_blocks: int = 4):
        super(RVolResNet, self).__init__()
        layers = [
            nn.Conv1d(input_dim, hidden_dim, kernel_size=1),
        ]
        
        for _ in range(num_blocks):
            layers.append(ResNetBlock(hidden_dim))
            layers.append(nn.AdaptiveAvgPool1d(1))
        
        layers.append(nn.Flatten())
        layers.append(nn.Dropout())
        layers.append(nn.Linear(hidden_dim, 1))

        self.net = nn.Sequential(*layers)

    def forward(self, x):
        for i, layer in enumerate(self.net):
            x = layer(x)
            if torch.isnan(x).any():
                print(f"NaN detected after layer {i}: {layer}")
                break
        return x

        



In [101]:
from engineer import engineer_data
import numpy as np
import pandas as pd

df_train, df_test = engineer_data(load_local=True)

In [102]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

train, valid = train_test_split(df_train, test_size=0.2, random_state=42)

X_train = train.drop(columns=['row_id', 'target', 'time_id']).to_numpy()
y_train = train['target'].to_numpy()

X_valid = valid.drop(columns=['row_id', 'target', 'time_id']).to_numpy()
y_valid = valid['target'].to_numpy()

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)

In [103]:
from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(X_train, y_train)
batch_size = 64
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [104]:
def rmse_loss(y_pred, y_true):
    return torch.sqrt(F.mse_loss(y_pred, y_true))

def rmspe_loss(y_pred, y_true):
    epsilon = 1e-12
    percentage_error = ((y_true - y_pred) / (y_true + epsilon)) ** 2
    return torch.sqrt(torch.mean(percentage_error))

In [105]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = RVolResNet(input_dim=X_train.shape[1], hidden_dim=64, num_blocks=4).to(device)
criterion = nn.MSELoss() #rmspe_loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)  # Learning rate can be adjusted

In [106]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        inputs = inputs.unsqueeze(1)
        inputs = inputs.permute(0, 2, 1)
        optimizer.zero_grad()
        outputs = model(inputs).squeeze() 
        loss = criterion(outputs, targets)  # Use RMSPE or RMSE loss here
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(dataloader.dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24,

  return F.mse_loss(input, target, reduction=self.reduction)


NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24, 64, kernel_size=(1,), stride=(1,))
NaN detected after layer 0: Conv1d(24,

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (64) must match the size of tensor b (41) at non-singleton dimension 1

In [99]:
inputs.permute(0, 2, 1).shape

torch.Size([64, 1, 24])

In [96]:
outputs.shape

torch.Size([64])

In [90]:
inputs


tensor([[[-0.4048],
         [-0.3506],
         [-0.1245],
         ...,
         [ 0.0965],
         [-0.3538],
         [-0.4351]],

        [[-0.5480],
         [-0.4523],
         [-0.1334],
         ...,
         [-0.2538],
         [-0.3975],
         [-0.4335]],

        [[-0.6545],
         [-0.5207],
         [-0.0854],
         ...,
         [ 0.9855],
         [-0.1697],
         [-0.2144]],

        ...,

        [[ 1.4891],
         [ 2.1180],
         [-0.1236],
         ...,
         [ 1.2011],
         [-0.1704],
         [-0.0933]],

        [[-0.0259],
         [ 0.1983],
         [-0.0931],
         ...,
         [ 0.5006],
         [-0.0952],
         [-0.1179]],

        [[-0.2327],
         [-0.2036],
         [-0.1125],
         ...,
         [-0.3076],
         [ 0.0094],
         [ 0.0849]]], device='cuda:0')