In [1]:
# Move to the SPVD directory(SPVD/notebooks)
%cd ..

/home/tourloid/Desktop/PhD/Code/SPVD_Lightning


# Imports

In [2]:
#export
import torch
import torch.nn as nn
import torch.nn.functional as F
import lightning as L
from abc import ABC, abstractmethod

# Task
Using taks allows for easy intergration of similar tasks, like Completion, Super-Resolution as proposed in the SPVD publication.

In [3]:
#export
class Task(ABC):
    @abstractmethod
    def prep_data(self, batch):
        pass
    @abstractmethod
    def loss_fn(self, pred, target):
        pass

In [4]:
#export
class SparseGeneration(Task):
    def prep_data(self, batch):
        noisy_data, t, noise = batch['input'], batch['t'], batch['noise']
        inp = (noisy_data, t)
        return inp, noise.F
    def loss_fn(self, preds, target):
        return F.mse_loss(preds, target)

# DiffusionBase

In [5]:
#export 
class DiffusionBase(L.LightningModule):

    def __init__(self, model, task=SparseGeneration()):
        super().__init__()
        self.model = model
        self.task = task
        
    def forward(self, x):
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        # get data from the batch
        inp, target = self.task.prep_data(batch)

        # activate the network for noise prediction
        preds = self(inp)

        # calculate the loss
        loss = self.task.loss_fn(preds, target)

        #self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        inp, target = self.task.prep_data(batch)
        preds = self(inp)
        loss = self.task.loss_fn(preds, target)
        self.log('val_loss', loss)

    def configure_optimizers(self):
        opt = torch.optim.AdamW(params=self.parameters() ,lr=0.0005, weight_decay=0.05)
        # sched = LinearWarmupCosineAnnealingLR(opt, warmup_epochs=10, max_epochs=self.trainer.max_epochs, warmup_start_lr=1e-6, eta_min=1e-6)
        return opt #[opt], [sched]

# Train a model using Lightning

In [6]:
#optimization for speed 
torch.set_float32_matmul_precision('medium')

In [7]:
from models import SPVD

In [8]:
model = DiffusionBase(SPVD())

In [9]:
from datasets.shapenet_pointflow_sparse import get_dataloaders

path = "/home/vvrbeast/Desktop/Giannis/Data/ShapeNetCore.v2.PC15k"
path = '/home/tourloid/Desktop/PhD/Data/ShapeNetCore.v2.PC15k/'

categories = ['car']

tr_dl, te_dl = get_dataloaders(path, categories)

(1, 1, 1)
Total number of data:2458
Min number of points: (train)2048 (test)2048
(1, 1, 1)
Total number of data:352
Min number of points: (train)2048 (test)2048


In [10]:
trainer = L.Trainer()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/tourloid/anaconda3/envs/sparse/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


In [None]:
trainer.fit(model=model, train_dataloaders=tr_dl, val_dataloaders=te_dl)

/home/tourloid/anaconda3/envs/sparse/lib/python3.10/site-packages/lightning/pytorch/loops/utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
Missing logger folder: /home/tourloid/Desktop/PhD/Code/SPVD_Lightning/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type    | Params | Mode 
------------------------------------------
0 | model | SPVUnet | 32.9 M | train
------------------------------------------
32.9 M    Trainable params
0         Non-trainable params
32.9 M    Total params
131.576   Total estimated model params size (MB)


Sanity Checking: |                                        | 0/? [00:00<?, ?it/s]

/home/tourloid/anaconda3/envs/sparse/lib/python3.10/site-packages/lightning/pytorch/utilities/data.py:78: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 32. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Training: |                                               | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Exception ignored in: <function _releaseLock at 0x7e505ef31c60>
Traceback (most recent call last):
  File "/home/tourloid/anaconda3/envs/sparse/lib/python3.10/logging/__init__.py", line 228, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 
