<a href="https://colab.research.google.com/github/E1250/other-support-ref/blob/main/PyTorch_Lightning_Course.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

* Youtube Playlist - https://www.youtube.com/playlist?list=PLhhyoLH6IjfyL740PTuXef4TstxAK6nGP
* ML Collections Github Repo - https://github.com/aladdinpersson/Machine-Learning-Collection
* Deep Learning for Computer Vision (Stanford) - http://cs231n.stanford.edu/
* Natural Language Processing with Deep Learning (Stanford) - https://web.stanford.edu/class/cs224n/

# Why Lightning
* Youtube Video - https://www.youtube.com/watch?v=XbIN9LaQycQ&list=PLhhyoLH6IjfyL740PTuXef4TstxAK6nGP
* PyTorch Lightning - https://lightning.ai/
> When you are considering doing sort of training on a cluster with **multiple GPU** or you have your **own setup with multiple GPUs**, you're considering doing **distributed training** or **TPU training**, it's a good time to use Torch Lightining.

# Lightning Module
* Youtube Video - https://www.youtube.com/watch?v=HGF2iyThWT8&list=PLhhyoLH6IjfyL740PTuXef4TstxAK6nGP&index=2

In [None]:
!pip install lightning

In [24]:
import lightning.pytorch as pl
from torch import nn , optim
import torch
from torchvision import datasets , transforms
from torch.utils.data import random_split , DataLoader
from tqdm import tqdm

In [None]:
??trainer

In [None]:
??pl.LightningModule

In [7]:
class NN(pl.LightningModule):
  def __init__(self,input_size , num_classes):
    super().__init__()
    self.fc1 = nn.Linear(input_size , 50)
    self.fc2 = nn.Linear(50,num_classes)
    self.loss_fn = nn.CrossEntropyLoss()

  def forward(self,x):
    x = nn.functional.relu(self.fc1(x))
    x =self.fc2(x)
    return x

  def training_step(self , batch , batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("train_loss",loss)
    return loss

  # def on_train_epoch_end(self , outputs):
    # pass

  def validation_step(self , batch, batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("validation_loss",loss)
    return loss

  def test_step(self , batch , batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("test_loss",loss)
    return loss

  def _common_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    loss = self.loss_fn(scores , y)
    return loss , scores , y

  def predict_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    preds = torch.argmax(scores , dim = 1)
    return preds

  def configure_optimizers(self):
    return optim.Adam(self.parameters() , lr = 0.001)

In [8]:
# Set device cuda for GPU if it's available otherwise run on the GPU
device = 'cuda' if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# Hyperparameters
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 3

# Load Data
entire_dataset = datasets.MNIST(
    root='dataset/',train = True , transform = transforms.ToTensor() , download=True
)
train_ds , val_ds = random_split(entire_dataset , [50000,10000])
test_ds = datasets.MNIST(
    root='dataset/' , train = False , transform = transforms.ToTensor() , download = True
)
train_loader = DataLoader(dataset = train_ds , batch_size = batch_size , shuffle= True)
val_loader = DataLoader(dataset = val_ds , batch_size = batch_size , shuffle= False)
test_loader = DataLoader(dataset = test_ds , batch_size = batch_size , shuffle= False)

# Initilaize network
model = NN(input_size = input_size , num_classes = num_classes).to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters() , lr = learning_rate)

# Trainer

In [None]:
trainer = pl.Trainer(accelerator = 'gpu' , devices = [0] , min_epochs = 1 , max_epochs = 3 , precision = 16)
# trainer.tune() # find best optimal hyperparameters

trainer.fit(model , train_loader , val_loader)
trainer.validate(model , val_loader)
trainer.test(model , test_loader)  # Run before deploying

INFO: Using 16bit Automatic Mixed Precision (AMP)
INFO:lightning.pytorch.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name    | Type             | Params
---------------------------------------------
0 | fc1     | Linear           | 39.2 K
1 | fc2     | Linear           | 510   
2 | loss_fn | CrossEntropyLoss | 0     
-------------------

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=3` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.


# Metrics
* Youtube video - https://www.youtube.com/watch?v=e6Nw01v2X4s&list=PLhhyoLH6IjfyL740PTuXef4TstxAK6nGP&index=4

In [None]:
!pip install torchmetrics

In [34]:
from torchmetrics import Metric
import torchmetrics

In [35]:
class MyAccuracy(Metric):
  def __init__(self):
    super().__init__()
    self.add_state("correct" , default = torch.tensor(0) , dist_reduce_fx = 'sum')
    self.add_state("total" , default = torch.tensor(0) , dist_reduce_fx = 'sum')

  def update(self , preds , target):
    preds = torch.argmax(preds , dim = 1)
    assert preds.shape == target.shape
    self.correct += torch.sum(preds == target)
    self.total += target.numel()

  def compute(self):
    return self.correct.float() / self.total.float()

In [36]:
class NN(pl.LightningModule):
  def __init__(self,input_size , num_classes):
    super().__init__()
    self.fc1 = nn.Linear(input_size , 50)
    self.fc2 = nn.Linear(50,num_classes)
    self.loss_fn = nn.CrossEntropyLoss()

    self.my_accuracy = MyAccuracy()
    # self.accuracy = torchmetrics.Accuracy(task = 'multiclass' , num_classes=num_classes)
    self.f1_score = torchmetrics.F1Score(task = 'multiclass' , num_classes=num_classes)


  def forward(self,x):
    x = nn.functional.relu(self.fc1(x))
    x =self.fc2(x)
    return x

  def training_step(self , batch , batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    # This way is a little bit slow and talk a lot of computation if we use Profiler

    accuracy = self.my_accuracy(scores , y)
    # accuracy = self.accuracy(scores , y)
    f1_score = self.f1_score(scores , y)
    self.log_dict({"train_loss":loss , 'train_accuracy' : accuracy , 'train_f1_score' : f1_score} , logger = True , prog_bar = True , on_epoch = True , on_step = False)
    return loss

  # def on_train_epoch_end(self , outputs):
    # This is the outputs of the training_step what ever it was, you can handle them here
    # pass

  def validation_step(self , batch, batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("validation_loss",loss)
    return loss

  def test_step(self , batch , batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("test_loss",loss)
    return loss

  def _common_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    loss = self.loss_fn(scores , y)
    return loss , scores , y

  def predict_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    preds = torch.argmax(scores , dim = 1)
    return preds

  def configure_optimizers(self):
    return optim.Adam(self.parameters() , lr = 0.001)

In [37]:
model = NN(input_size = input_size , num_classes = num_classes).to(device)
trainer = pl.Trainer(accelerator = 'gpu' , devices = [0] , min_epochs = 1 , max_epochs = 3 , precision = 16)

trainer.fit(model , train_loader , val_loader)
trainer.validate(model , val_loader)
trainer.test(model , test_loader)  # Run before deploying

  rank_zero_warn(
INFO: Using 16bit Automatic Mixed Precision (AMP)
INFO:lightning.pytorch.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name        | Type              | Params
--------------------------------------------------
0 | fc1         | Linear            | 39.2 K
1 | fc2         | Linear            | 510   
2 | loss_fn     | Cro

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=3` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: 0it [00:00, ?it/s]

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.1565074771642685}]

# Data Module

In [None]:
# class CustomDataset(Datasets):
#   def __init__(self):
#     pass

#   def __len__(self):
#     pass

#   def __getitem__(self):
#     pass

In [38]:
class MNISTDataModule(pl.LightningDataModule):
  def __init__(self,data_dir , batch_size , num_workers):
    super().__init__()
    self.data_dir = data_dir
    self.batch_size = batch_size
    self.num_workers = num_workers


  def prepare_data(self):
    # Single GPU
    datasets.MNIST(self.data_dir , train = True , download = True )
    datasets.MNIST(self.data_dir , train = False , download = True )


  def setup(self,stage):
    # Multiple GPUs
    # my_ds = CustomDataset(train_csv)
    entire_dataset = datasets.MNIST(self.data_dir , train = True , download = False  , transform = transforms.ToTensor())
    self.train_ds , self.val_ds = random_split(entire_dataset , [50000 , 10000])
    self.test_ds = datasets.MNIST(self.data_dir , train = False , download = False , transform = transforms.ToTensor())


  def train_dataloader(self):
    return DataLoader(
        self.train_ds,
        batch_size = self.batch_size,
        num_workers = self.num_workers,
        shuffle = True
    )

  def val_dataloader(self):
    return DataLoader(
        self.val_ds,
        batch_size = self.batch_size,
        num_workers = self.num_workers,
        shuffle = True
    )

  def test_dataloader(self):
    return DataLoader(
        self.test_ds,
        batch_size = self.batch_size,
        num_workers = self.num_workers,
        shuffle = True
    )

In [39]:
model = NN(input_size = input_size , num_classes = num_classes).to(device)
dm = MNISTDataModule(data_dir = 'dataset/' , num_workers = 4 , batch_size = 64)

trainer = pl.Trainer(accelerator = 'gpu' , devices = [0] , min_epochs = 1 , max_epochs = 3 , precision = 16)
trainer.fit(model , dm)
trainer.validate(model ,dm)
trainer.test(model , dm)  # Run before deploying

INFO: Using 16bit Automatic Mixed Precision (AMP)
INFO:lightning.pytorch.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name        | Type              | Params
--------------------------------------------------
0 | fc1         | Linear            | 39.2 K
1 | fc2         | Linear            | 510   
2 | loss_fn     | CrossEntropyLoss  | 0

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=3` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: 0it [00:00, ?it/s]

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.16052767634391785}]

# Code Structure
* Youtube Video - https://www.youtube.com/watch?v=UtQoZ_v57uI&list=PLhhyoLH6IjfyL740PTuXef4TstxAK6nGP&index=6

## Dataset.py

In [40]:
# %%writefile dataset.py

class MNISTDataModule(pl.LightningDataModule):
  def __init__(self,data_dir , batch_size , num_workers):
    super().__init__()
    self.data_dir = data_dir
    self.batch_size = batch_size
    self.num_workers = num_workers

  def prepare_data(self):
    # Single GPU
    datasets.MNIST(self.data_dir , train = True , download = True )
    datasets.MNIST(self.data_dir , train = False , download = True )


  def setup(self,stage):
    # Multiple GPUs
    # my_ds = CustomDataset(train_csv)
    entire_dataset = datasets.MNIST(self.data_dir , train = True , download = False  , transform = transforms.ToTensor())
    self.train_ds , self.val_ds = random_split(entire_dataset , [50000 , 10000])
    self.test_ds = datasets.MNIST(self.data_dir , train = False , download = False , transform = transforms.ToTensor())


  def train_dataloader(self):
    return DataLoader(
        self.train_ds,
        batch_size = self.batch_size,
        num_workers = self.num_workers,
        shuffle = True
    )

  def val_dataloader(self):
    return DataLoader(
        self.val_ds,
        batch_size = self.batch_size,
        num_workers = self.num_workers,
        shuffle = True
    )

  def test_dataloader(self):
    return DataLoader(
        self.test_ds,
        batch_size = self.batch_size,
        num_workers = self.num_workers,
        shuffle = True
    )

## Model.py

In [41]:
# %%writefile model.py

class NN(pl.LightningModule):
  def __init__(self,input_size ,learning_rate, num_classes):
    super().__init__()
    self.fc1 = nn.Linear(input_size , 50)
    self.fc2 = nn.Linear(50,num_classes)
    self.loss_fn = nn.CrossEntropyLoss()
    self.learning_rate = learning_rate

    self.accuracy = torchmetrics.Accuracy(task = 'multiclass' , num_classes=num_classes)
    self.f1_score = torchmetrics.F1Score(task = 'multiclass' , num_classes=num_classes)


  def forward(self,x):
    x = nn.functional.relu(self.fc1(x))
    x =self.fc2(x)
    return x

  def training_step(self , batch , batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    # This way is a little bit slow and talk a lot of computation if we use Profiler

    accuracy = self.accuracy(scores , y)
    f1_score = self.f1_score(scores , y)
    self.log_dict({"train_loss":loss , 'train_accuracy' : accuracy , 'train_f1_score' : f1_score} , logger = True , prog_bar = True , on_epoch = True , on_step = False)
    return loss

  # def on_train_epoch_end(self , outputs):
    # This is the outputs of the training_step what ever it was, you can handle them here
    # pass

  def validation_step(self , batch, batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("validation_loss",loss)
    return loss

  def test_step(self , batch , batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("test_loss",loss)
    return loss

  def _common_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    loss = self.loss_fn(scores , y)
    return loss , scores , y

  def predict_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    preds = torch.argmax(scores , dim = 1)
    return preds

  def configure_optimizers(self):
    return optim.Adam(self.parameters() , lr = self.learning_rate)

## Config.py

In [42]:
# %%writefile config.py
# Training Hyperparameters
INPUT_SIZE = 784
NUM_CLASSES = 10
LEARNING_RATE = 0.001
BATCH_SIZE = 64
NUM_EPOCHS = 3

# Datasets
DATA_DIR = 'dataset/'
NUM_WORKERS = 4

# Conpute Related
# Set device cuda for GPU if it's available otherwise run on the GPU
device = 'cuda' if torch.cuda.is_available() else "cpu"
print(device)

ACCELERATOR = 'gpu'
DEVICES = 2
PRECISION = 16

cuda


## Metrics.py

In [43]:
# %%writefile metrics.py

class MyAccuracy(Metric):
  def __init__(self):
    super().__init__()
    self.add_state("correct" , default = torch.tensor(0) , dist_reduce_fx = 'sum')
    self.add_state("total" , default = torch.tensor(0) , dist_reduce_fx = 'sum')

  def update(self , preds , target):
    preds = torch.argmax(preds , dim = 1)
    assert preds.shape == target.shape
    self.correct += torch.sum(preds == target)
    self.total += target.numel()

  def compute(self):
    return self.correct.float() / self.total.float()

## Train.py

In [44]:
# %%writefile train.py
model = NN(input_size = config.INPUT_SIZE ,learning_rate = config.LEARNING_RATE, num_classes = config.NUM_CLASSES).to(device)
dm = MNISTDataModule(data_dir = config.DATA_DIR , num_workers = config.NUM_WORKERS , batch_size = config.BATCH_SIZE)

trainer = pl.Trainer(accelerator = config.ACCELERATOR , devices = config.DEVICES , min_epochs = 1 , max_epochs = config.NUM_EPOCHS , precision = config.PRECISION)
trainer.fit(model , dm)
trainer.validate(model ,dm)
trainer.test(model , dm)  # Run before deploying

  rank_zero_warn(
INFO: Using 16bit Automatic Mixed Precision (AMP)
INFO:lightning.pytorch.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name     | Type               | Params
------------------------------------------------
0 | fc1      | Linear             | 39.2 K
1 | fc2      | Linear             | 510   
2 | loss_fn  | CrossEntropyLo

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=3` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: 0it [00:00, ?it/s]

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.16116371750831604}]

In [44]:
# to reformat all files
# !pip install black
# !black .

# Callbacks
* Youtube Video - https://www.youtube.com/watch?v=Wcze6oGch1g&list=PLhhyoLH6IjfyL740PTuXef4TstxAK6nGP&index=7
* Bolts - https://lightning.ai/docs/pytorch/stable/ecosystem/bolts.html

In [48]:
??lightning

Object `lightning` not found.


In [49]:
# %%writefile callbacks.py

from lightning.pytorch.callbacks import EarlyStopping , Callback

class MyPrintingCallbacks(Callback):
  def ___init(self):
    super().__init__()
  def on_train_start(self,trainer , pl_module):
    print('Starting to train!.')

  def on_train_end(self , trianer , pl_module):
    print("Training is Done.")

In [None]:
# %%writefile train.py
torch.set_float32_matmul_precision('medium') # To make lightning happy
model = NN(input_size = config.INPUT_SIZE ,learning_rate = config.LEARNING_RATE, num_classes = config.NUM_CLASSES).to(device)
dm = MNISTDataModule(data_dir = config.DATA_DIR , num_workers = config.NUM_WORKERS , batch_size = config.BATCH_SIZE)

trainer = pl.Trainer(accelerator = config.ACCELERATOR , devices = config.DEVICES , min_epochs = 1 , max_epochs = config.NUM_EPOCHS , precision = config.PRECISION , callbacks = [MyPrintingCallbacks() , EarlyStopping(monitor = 'val_loss')])
trainer.fit(model , dm)
trainer.validate(model ,dm)
trainer.test(model , dm)  # Run before deploying

# Logging with TensorBoard

In [None]:
# %%writefile dataset.py
from torchvision.transforms import RandomHorizontalFlip , RandomVerticalFlip

class MNISTDataModule(pl.LightningDataModule):
  def __init__(self,data_dir , batch_size , num_workers):
    super().__init__()
    self.data_dir = data_dir
    self.batch_size = batch_size
    self.num_workers = num_workers

  def prepare_data(self):
    # Single GPU
    datasets.MNIST(self.data_dir , train = True , download = True )
    datasets.MNIST(self.data_dir , train = False , download = True )


  def setup(self,stage):
    # Multiple GPUs
    # my_ds = CustomDataset(train_csv)
    entire_dataset = datasets.MNIST(self.data_dir , train = True , download = False  , transform = transforms.Compose([
        transforms.RandomHorizontalFlip, transforms.RandomVerticalFlip , transforms.ToTensor()
    ]))
    self.train_ds , self.val_ds = random_split(entire_dataset , [50000 , 10000])
    self.test_ds = datasets.MNIST(self.data_dir , train = False , download = False , transform = transforms.Compose([
        transforms.RandomHorizontalFlip, transforms.RandomVerticalFlip , transforms.ToTensor()
    ]))


  def train_dataloader(self):
    return DataLoader(
        self.train_ds,
        batch_size = self.batch_size,
        num_workers = self.num_workers,
        shuffle = True
    )

  def val_dataloader(self):
    return DataLoader(
        self.val_ds,
        batch_size = self.batch_size,
        num_workers = self.num_workers,
        shuffle = True
    )

  def test_dataloader(self):
    return DataLoader(
        self.test_ds,
        batch_size = self.batch_size,
        num_workers = self.num_workers,
        shuffle = True
    )

In [None]:
# %%writefile model.py

class NN(pl.LightningModule):
  def __init__(self,input_size ,learning_rate, num_classes):
    super().__init__()
    self.fc1 = nn.Linear(input_size , 50)
    self.fc2 = nn.Linear(50,num_classes)
    self.loss_fn = nn.CrossEntropyLoss()
    self.learning_rate = learning_rate

    self.accuracy = torchmetrics.Accuracy(task = 'multiclass' , num_classes=num_classes)
    self.f1_score = torchmetrics.F1Score(task = 'multiclass' , num_classes=num_classes)

  def forward(self,x):
    x = nn.functional.relu(self.fc1(x))
    x =self.fc2(x)
    return x

  def training_step(self , batch , batch_idx):
    x, y = batch
    loss , scores , y = self._common_step(batch,batch_idx)
    # This way is a little bit slow and talk a lot of computation if we use Profiler

    accuracy = self.accuracy(scores , y)
    f1_score = self.f1_score(scores , y)
    self.log_dict({"train_loss":loss , 'train_accuracy' : accuracy , 'train_f1_score' : f1_score} , logger = True , prog_bar = True , on_epoch = True , on_step = False)

    if batch_idx % 100 == 0:
      x = x[:8]
      grid = torchvision.utils.make_grid(x.view(-1 , 1 , 28 , 28))
      self.logger.experiment.add_image('mnist_images' , grid , self.gobal_step)
    return loss

  # def on_train_epoch_end(self , outputs):
    # This is the outputs of the training_step what ever it was, you can handle them here
    # pass

  def validation_step(self , batch, batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("validation_loss",loss)
    return loss

  def test_step(self , batch , batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("test_loss",loss)
    return loss

  def _common_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    loss = self.loss_fn(scores , y)
    return loss , scores , y

  def predict_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    preds = torch.argmax(scores , dim = 1)
    return preds

  def configure_optimizers(self):
    return optim.Adam(self.parameters() , lr = self.learning_rate)

In [None]:
# %%writefile train.py
from lightning.pytorch.loggers import TensorBoardLogger

logger = TensorBoardLogger('tb_logs' , name='my_model')
torch.set_float32_matmul_precision('medium') # To make lightning happy
model = NN(input_size = config.INPUT_SIZE ,learning_rate = config.LEARNING_RATE, num_classes = config.NUM_CLASSES).to(device)
dm = MNISTDataModule(data_dir = config.DATA_DIR , num_workers = config.NUM_WORKERS , batch_size = config.BATCH_SIZE)

trainer = pl.Trainer(accelerator = config.ACCELERATOR , logger = logger ,devices = config.DEVICES , min_epochs = 1 , max_epochs = config.NUM_EPOCHS , precision = config.PRECISION , callbacks = [MyPrintingCallbacks() , EarlyStopping(monitor = 'val_loss')])
trainer.fit(model , dm)
trainer.validate(model ,dm)
trainer.test(model , dm)  # Run before deploying

# Profiler
* Youtube Video - https://www.youtube.com/watch?v=MHjlaNoWy78&list=PLhhyoLH6IjfyL740PTuXef4TstxAK6nGP&index=9

In [None]:
# Profiler with tensorboard
# !pip install torch-tb-profiler

In [None]:
# %%writefile train.py
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.profilers import PyTorchProfiler

profiler = PyTorchProfiler(
    # output_filename = 'profiler.txt',
    on_trace_ready = torch.profiler.tensorboard_trace_handler('tb_logs/profiler0'),
    trace_memroy = True,
    schedule = torch.profiler.schedule(skip_first = 10 , wait = 1 , warmup = 1 , active = 20)
    )

logger = TensorBoardLogger('tb_logs' , name='my_model')
torch.set_float32_matmul_precision('medium') # To make lightning happy
model = NN(input_size = config.INPUT_SIZE ,learning_rate = config.LEARNING_RATE, num_classes = config.NUM_CLASSES).to(device)
dm = MNISTDataModule(data_dir = config.DATA_DIR , num_workers = config.NUM_WORKERS , batch_size = config.BATCH_SIZE)

trainer = pl.Trainer(
    # profiler = 'simple', # ['simple' ,"advanced"]
    profiler = profiler
                     accelerator = config.ACCELERATOR , logger = logger ,devices = config.DEVICES , min_epochs = 1 , max_epochs = config.NUM_EPOCHS , precision = config.PRECISION , callbacks = [MyPrintingCallbacks() , EarlyStopping(monitor = 'val_loss')])
trainer.fit(model , dm)
trainer.validate(model ,dm)
trainer.test(model , dm)  # Run before deploying

In [None]:
# %%writefile model.py

class NN(pl.LightningModule):
  def __init__(self,input_size ,learning_rate, num_classes):
    super().__init__()
    self.fc1 = nn.Linear(input_size , 50)
    self.fc2 = nn.Linear(50,num_classes)
    self.loss_fn = nn.CrossEntropyLoss()
    self.learning_rate = learning_rate

    self.accuracy = torchmetrics.Accuracy(task = 'multiclass' , num_classes=num_classes)
    self.f1_score = torchmetrics.F1Score(task = 'multiclass' , num_classes=num_classes)

  def forward(self,x):
    x = nn.functional.relu(self.fc1(x))
    x =self.fc2(x)
    return x

  def training_step(self , batch , batch_idx):
    x, y = batch
    loss , scores , y = self._common_step(batch,batch_idx)
    # This way is a little bit slow and talk a lot of computation if we use Profiler

    self.log_dict({"train_loss":loss } , logger = True , prog_bar = True , on_epoch = True , on_step = False)

    if batch_idx % 100 == 0:
      x = x[:8]
      grid = torchvision.utils.make_grid(x.view(-1 , 1 , 28 , 28))
      self.logger.experiment.add_image('mnist_images' , grid , self.gobal_step)
    return loss

  def on_train_epoch_end(self , outputs):
    # This is the outputs of the training_step what ever it was, you can handle them here
    # avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
    scores = torch.cat([x['scores'] for x in outputs])
    y = torch.cat([x['y'] for x in outputs])
    self.log_dict({
        # 'train_loss_epoch' : avg_loss,
        "train_acc" : self.accuracy(scores , y),
        "train_f1":self.f1_score(scores , y)
    },
                  on_step = False,
                  on_epoch = True,
                  prog_bar = True)


  def validation_step(self , batch, batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("validation_loss",loss)
    return loss

  def test_step(self , batch , batch_idx):
    loss , scores , y = self._common_step(batch,batch_idx)
    self.log("test_loss",loss)
    return loss

  def _common_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    loss = self.loss_fn(scores , y)
    return loss , scores , y

  def predict_step(self,batch,batch_idx):
    x , y = batch
    x = x.reshape(x.size(0),-1)
    scores = self.forward(x)
    preds = torch.argmax(scores , dim = 1)
    return preds

  def configure_optimizers(self):
    return optim.Adam(self.parameters() , lr = self.learning_rate)

# Multi GPU Training
* Youtube Video - https://www.youtube.com/watch?v=70oXdSMcj9c&list=PLhhyoLH6IjfyL740PTuXef4TstxAK6nGP&index=10

In [None]:
# !pip install deepspeed

In [None]:
# %%writefile train.py

from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.profilers import PyTorchProfiler
from lightning.strategies import DeepSpeedStrategy

profiler = PyTorchProfiler(
    # output_filename = 'profiler.txt',
    on_trace_ready = torch.profiler.tensorboard_trace_handler('tb_logs/profiler0'),
    trace_memroy = True,
    schedule = torch.profiler.schedule(skip_first = 10 , wait = 1 , warmup = 1 , active = 20)
    )

strategy = DeepSpeedStrategy()
logger = TensorBoardLogger('tb_logs' , name='my_model')
torch.set_float32_matmul_precision('medium') # To make lightning happy
model = NN(input_size = config.INPUT_SIZE ,learning_rate = config.LEARNING_RATE, num_classes = config.NUM_CLASSES).to(device)
dm = MNISTDataModule(data_dir = config.DATA_DIR , num_workers = config.NUM_WORKERS , batch_size = config.BATCH_SIZE)

trainer = pl.Trainer(
    # strategy = 'ddp',
    strategy = strategy,
    # profiler = 'simple', # ['simple' ,"advanced"]
    profiler = profiler,
    accelerator = config.ACCELERATOR , logger = logger ,devices = config.DEVICES , min_epochs = 1 , max_epochs = config.NUM_EPOCHS , precision = config.PRECISION , callbacks = [MyPrintingCallbacks() , EarlyStopping(monitor = 'val_loss')])
trainer.fit(model , dm)
trainer.validate(model ,dm)
trainer.test(model , dm)  # Run before deploying