In [None]:
import lightning as L
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import TensorDataset
import torch
from torch.utils.data import DataLoader
from lightning import Trainer
from icecream import ic


cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'
if cuda:
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [None]:
train_x = torch.load('data/train_x.pt').to(torch.float32)
train_y = torch.load('data/train_y.pt').to(torch.float32)
test_x  =  torch.load('data/test_x.pt').to(torch.float32) #[:10,:]
test_y  =  torch.load('data/test_y.pt').to(torch.float32) #[:10]
validation_x = test_x[:len(test_x)//2,:]
validation_y = test_y[:len(test_y)//2]
test_x = test_x[len(test_x)//2:,:]
test_y = test_y[len(test_y)//2:]

generator = torch.Generator(device=device)

train_dataset = TensorDataset(train_x, train_y)
validation_dataset = TensorDataset(validation_x, validation_y)
test_dataset = TensorDataset(test_x, test_y)

train_loader = DataLoader(train_dataset, batch_size=10024, shuffle=True, num_workers=0, generator=generator)
validation_loader = DataLoader(validation_dataset, batch_size=10024, shuffle=True, num_workers=0, generator=generator)
test_loader = DataLoader(test_dataset, batch_size=10024, shuffle=True, num_workers=0, generator=generator)

train_x.shape, train_y.shape, validation_x.shape, validation_y.shape, test_x.shape, test_y.shape,

(torch.Size([100000, 100]),
 torch.Size([10000, 100]),
 torch.Size([100000]),
 torch.Size([10000]))

In [None]:
class MyModel(L.LightningModule):
    def __init__(self, partition_size):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(partition_size, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        self.loss_fn = nn.MSELoss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x).squeeze()
        loss = self.loss_fn(y_hat, y)
        self.log("train_loss", loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x).squeeze()
        loss = self.loss_fn(y_hat, y)
        self.log("val_loss", loss, prog_bar=True)
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x).squeeze()
        loss = self.loss_fn(y_hat, y)
        self.log("test_loss", loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=5e-3)



In [None]:
model = MyModel(train_x.shape[-1])
trainer = Trainer(max_epochs=50, accelerator="auto", devices=1)
trainer.test(model, dataloaders=test_loader)
# trainer.fit(model, train_dataloaders=train_loader)
trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=validation_loader)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Testing DataLoader 0: 100%|██████████| 10/10 [00:00<00:00, 74.43it/s]


  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | model   | Sequential | 6.5 K  | train
1 | loss_fn | MSELoss    | 0      | train
-----------------------------------------------
6.5 K     Trainable params
0         Non-trainable params
6.5 K     Total params
0.026     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode



────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss            6.600236892700195
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Epoch 28:  80%|████████  | 8/10 [16:22<04:05,  0.01it/s, v_num=22, train_loss=1.090]
Epoch 99: 100%|██████████| 10/10 [00:00<00:00, 39.55it/s, v_num=24, train_loss=0.912]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|██████████| 10/10 [00:00<00:00, 38.62it/s, v_num=24, train_loss=0.912]


In [23]:
trainer.test(model, dataloaders=test_loader)

Testing DataLoader 0: 100%|██████████| 10/10 [00:00<00:00, 83.88it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss           0.9340535402297974
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.9340535402297974}]