<a href="https://colab.research.google.com/github/TirendazAcademy/PyTorch-Lightning-Tutorials/blob/main/Comet_Logging_with_Lightning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing lightning

In [1]:
!pip install lightning -q 
!pip install comet-ml -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.5/55.5 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m55.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.9/66.9 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 kB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.9/129.9 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━

# Importing required libraries

In [2]:
import os
import comet_ml
import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import nn, optim
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import pytorch_lightning as pl
import torchmetrics
from torchmetrics import Metric
from pytorch_lightning.callbacks import EarlyStopping, Callback
from torchvision.transforms import RandomHorizontalFlip, RandomVerticalFlip
import torchvision
from pytorch_lightning.loggers import TensorBoardLogger
from lightning.pytorch.loggers import CometLogger

In [3]:
print("Torch version:", torch.__version__)
print("Pytorch ligthening version:", pl.__version__)
print("Comet-ML version:", comet_ml.__version__)

Torch version: 2.0.0+cu118
Pytorch ligthening version: 2.0.2
Comet-ML version: 3.32.9


# Loading the dataset

In [4]:
class MnistDataModule(pl.LightningDataModule):
    def __init__(self, data_dir, batch_size, num_workers):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.num_workers = num_workers

    def prepare_data(self):
        datasets.MNIST(self.data_dir, train=True, download=True)
        datasets.MNIST(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            entire_dataset = datasets.MNIST(
                root=self.data_dir,  
                train=True,
                transform=transforms.Compose([
                  transforms.RandomVerticalFlip(),
                  transforms.RandomHorizontalFlip(),
                  transforms.ToTensor()
                ]),
                download=False,
            )
            self.train_ds, self.val_ds = random_split(entire_dataset, [50000, 10000])       

        # Assign test dataset for use in dataloader(s)     
        if stage == "test" or stage is None:            
            self.test_ds = datasets.MNIST(
                root=self.data_dir,
                train=False,
                transform=transforms.ToTensor(),
                download=False,
            )

    def train_dataloader(self):
        return DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=True,
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
        )

# Building the Model

In [5]:
class NN(pl.LightningModule):
    def __init__(self, input_size, learning_rate, num_classes):
        super().__init__()
        
        self.save_hyperparameters()
        self.lr = learning_rate
        self.loss_fn = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
        self.f1_score = torchmetrics.F1Score(task="multiclass", num_classes=num_classes)

        # Neural Network architecture
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Dropout(p=0.5))
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Dropout(p=0.5))
        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            torch.nn.Dropout(p=0.5))
        
        self.fc1 = torch.nn.Linear(4 * 4 * 128, 625, bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        self.dense1_bn = torch.nn.BatchNorm1d(625)
        self.fc2 = torch.nn.Linear(625, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        self.dense2_bn = torch.nn.BatchNorm1d(10)

    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.size(0), -1)
        x = self.dense1_bn(self.fc1(x))
        x = self.dense2_bn(self.fc2(x))
        x = F.softmax(x, dim=1)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        loss, scores, y = self._common_step(batch, batch_idx)
        accuracy = self.accuracy(scores, y)
        f1_score = self.f1_score(scores, y)   
        self.log_dict(
            {
                "train_loss": loss,
                "train_accuracy": accuracy,
                "train_f1_score": f1_score,
            },
            on_step=False,
            on_epoch=True,
            prog_bar=True,
        )      
        return {"loss": loss, "scores": scores, "y": y}

    def validation_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log("val_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log("test_loss", loss)
        return loss

    def _common_step(self, batch, batch_idx):
        x, y = batch
        scores = self.forward(x)
        loss = self.loss_fn(scores, y)
        return loss, scores, y

    def predict_step(self, batch, batch_idx):
        x, y = batch
        scores = self.forward(x)
        preds = torch.argmax(scores, dim=1)
        return preds

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.lr)

# Callbacks

In [6]:
class MyPrintingCallback(Callback):
    def __init__(self):
        super().__init__()

    def on_train_start(self, trainer, pl_module):
        print("Starting to train!")

    def on_train_end(self, trainer, pl_module):
        print("Training is done.")

In [7]:
comet_logger = CometLogger(
    api_key=os.environ.get("api_key"),
    workspace=os.environ.get("workspace"),
    project_name=os.environ.get("project_name"),
)

INFO: CometLogger will be initialized in online mode
INFO:lightning.pytorch.loggers.comet:CometLogger will be initialized in online mode


# Setting the hyperparameters

In [8]:
hyper_params = {
    # Training hyperparameters
    "INPUT_SIZE": 784, 
    "NUM_CLASSES": 10, 
    "LEARNING_RATE": 0.001, 
    "BATCH_SIZE": 64,
    "NUM_EPOCHS": 10,
    # Dataset
    "DATA_DIR": "dataset/",
    "NUM_WORKERS": os.cpu_count(),
    # Compute related
    "ACCELERATOR": "auto",
    "DEVICES": "auto",
    }

# Logging hyperparamters
comet_logger.experiment.log_parameters(hyper_params)

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/content' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/tirendaz-test/lightning-demo/fbab4d10b0a1494585c16a9a96c3d620



# Training the model

In [9]:
model = NN(
    input_size=hyper_params["INPUT_SIZE"],
    learning_rate=hyper_params["LEARNING_RATE"],
    num_classes=hyper_params["NUM_CLASSES"]
)
dm = MnistDataModule(
    data_dir=hyper_params["DATA_DIR"],
    batch_size=hyper_params["BATCH_SIZE"],
    num_workers=hyper_params["NUM_WORKERS"],
)
trainer = pl.Trainer(
    logger=comet_logger,
    accelerator=hyper_params["ACCELERATOR"],
    devices=hyper_params["DEVICES"],
    min_epochs=1,
    max_epochs=hyper_params["NUM_EPOCHS"],
    callbacks=[MyPrintingCallback(), EarlyStopping(monitor="val_loss")]
)

trainer.fit(model, dm)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 325023540.79it/s]


Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 43201032.03it/s]


Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 165527186.92it/s]

Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz



100%|██████████| 4542/4542 [00:00<00:00, 7669295.00it/s]


Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type               | Params
-------------------------------------------------
0 | loss_fn   | CrossEntropyLoss   | 0     
1 | accuracy  | MulticlassAccuracy | 0     
2 | f1_score  | MulticlassF1Score  | 0     
3 | layer1    | Sequential         | 320   
4 | layer2    | Sequential         | 18.5 K
5 | layer3    | Sequential         | 73.9 K
6 | fc1       | Linear             | 1.3 M 
7 | dense1_bn | BatchNorm1d        | 1.2 K 
8 | fc2       | Linear             | 6.3 K 
9 | dense2_bn | BatchNorm1d        | 20    
-------------------------------------------------
1.4 M     Trainable params
0         Non-trainable params
1.4 M     Total params
5.523     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Starting to train!


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=10` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


Training is done.


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/tirendaz-test/lightning-demo/fbab4d10b0a1494585c16a9a96c3d620
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     loss [782]          : (1.5125749111175537, 2.3072807788848877)
[1;38;5;39mCOMET INFO:[0m     train_accuracy [10] : (0.6470000147819519, 0.9036399722099304)
[1;38;5;39mCOMET INFO:[0m     train_f1_score [10] : (0.6470000147819519, 0.9036399722099304)
[1;38;5;39mCOMET INFO:[0m     train_loss [10]     : (1.5792583227157593, 1.9541436433792114)
[1;38;5;39mCOMET INFO:[0m     val_loss

In [10]:
# Saving the model 
my_model = NN.load_from_checkpoint(
    "/content/lightning-demo/fbab4d10b0a1494585c16a9a96c3d620/checkpoints/epoch=9-step=7820.ckpt")

In [11]:
# Saving Model in Comet-ML
from comet_ml.integration.pytorch import log_model
log_model(comet_logger.experiment, my_model, model_name="my_pl_model")

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/content' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/tirendaz-test/lightning-demo/fbab4d10b0a1494585c16a9a96c3d620



In [12]:
# Ending our experiment
comet_logger.experiment.end()

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml ExistingExperiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/tirendaz-test/lightning-demo/fbab4d10b0a1494585c16a9a96c3d620
[1;38;5;39mCOMET INFO:[0m   Others:
[1;38;5;39mCOMET INFO:[0m     Created from : pytorch-lightning
[1;38;5;39mCOMET INFO:[0m   Uploads:
[1;38;5;39mCOMET INFO:[0m     model-element : 2 (5.28 MB)
[1;38;5;39mCOMET INFO:[0m 
[1;38;5;39mCOMET INFO:[0m Uploading 1 metrics, params and output messages


# Resource

- [CometLogger](https://lightning.ai/docs/pytorch/latest/extensions/generated/lightning.pytorch.loggers.CometLogger.html#lightning.pytorch.loggers.comet.CometLogger.log_hyperparams)


🔗 Let's connect [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [Instagram](https://www.instagram.com/tirendazacademy) | [GitHub](http://github.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) | [Kaggle](https://www.kaggle.com/tirendazacademy) 😎