In [1]:
import torch

import torch.nn.functional as F
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from torch.utils.data import random_split, DataLoader
from pytorch_lightning.loggers import TensorBoardLogger


import resnet 


In [2]:
hyperparameters = { 'batch size': 24,
                    'val ratio': 0.1,
                    'epochs': 35,
                    'lr': 0.0000095,
                    'lr decay': 0.25,
                    'lr decay threshold': 0.05,
                    'lr warming up period': 50,
                    'weight decay': 0.01}

In [48]:
class Dataset(pl.LightningDataModule):
    def __init__(self, batch_size, val_ratio, dataset_path):
        self.batch_size = batch_size
        self.dataset_path = dataset_path

    def setup(self, stage=None):
        self.images = torch.load(f'{self.dataset_path}/dataset_images')
        

    def train_dataloader(self):
        return DataLoader(self.images, batch_size=self.batch_size)


In [69]:
class Net(pl.LightningModule):

    def __init__(self, model, dataset_path):
        super(Net, self).__init__()
        self.model = model
        self.dna = torch.load(f'{dataset_path}/dataset_dna')
        self.learning_rate = hyperparameters['lr']
        self.weight_decay = hyperparameters['weight decay']


    def mse(self, x, y):
        return  F.mse_loss(x, y)


    def training_step(self, batch, batch_idx):
        images, dnas = batch
        dnas = torch.stack([self.dna[i] for i in dnas])

        predictions = self.model(images)

        loss = self.mse(predictions, dnas)       
        return {'loss': loss}

    def trainning_epoch_end(self, outputs):
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        self.log('loss', torch.sqrt(avg_loss), prog_bar=True)
        return {'loss': avg_loss}

   
    def configure_optimizers(self):
        optimizer = torch.optim.RMSprop(self.model.parameters(), lr = self.learning_rate, weight_decay = self.weight_decay)
        return optimizer       

                                                   

In [70]:
data_module = Dataset(hyperparameters['batch size'], hyperparameters['val ratio'], './dataset')
model = Net(resnet.ResNet50(img_channel=3, num_features=101), './dataset')
trainer = Trainer(gpus=1, max_epochs=hyperparameters['epochs'])

  torch.nn.init.xavier_uniform(layer)


Epoch 0:   0%|          | 0/1110 [01:13<?, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs





In [71]:
trainer.fit(model, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 23.7 M
---------------------------------
23.7 M    Trainable params
0         Non-trainable params
23.7 M    Total params
94.860    Total estimated model params size (MB)
  rank_zero_warn(


Epoch 0:   0%|          | 0/1110 [00:00<?, ?it/s] 

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 4.00 GiB total capacity; 2.50 GiB already allocated; 0 bytes free; 2.58 GiB reserved in total by PyTorch)