In [1]:
# This notebook is for pre-training and finetuning models (Efficient Net v2)

In [1]:
# Importing the necessary libraries #
import torch
import numpy as np
import torch.nn as nn
import torchvision.transforms.functional as F
import lightning as L
from typing import List
from lightning.pytorch import Trainer
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import precision_score
from torchmetrics import Accuracy
import utils
import os

In [2]:
torch.cuda.empty_cache()
torch.set_float32_matmul_precision("medium")

In [3]:
import wandb
from lightning.pytorch.loggers import WandbLogger
wandb.login()
# Initializing wandb logger #
wandb_logger = WandbLogger(
    entity="A2_DA6401_DL",
    project="EfficientNet",       
)
wandb.login(key="5ef7c4bbfa350a2ffd3c198cb9289f544e3a0910")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mae21b105[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/joel/.netrc


True

In [None]:
import torch.nn as nn
from torchvision import models
from torchvision.models import EfficientNet_V2_L_Weights, EfficientNet_V2_M_Weights

# Load EfficientNetV2-L pretrained
model = models.efficientnet_v2_l(weights=EfficientNet_V2_L_Weights.IMAGENET1K_V1)

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False


for layer in list(model.features.children())[-3:]:  # last 3 layers
    for param in layer.parameters():
        param.requires_grad = True

in_features = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Linear(in_features, 512),
    nn.BatchNorm1d(512),
    nn.SiLU(),
    nn.Dropout(0.3),
    nn.Linear(512, 10)
)

for param in model.classifier.parameters():
    param.requires_grad = True


In [6]:
# Defining the dataloaders to be built
train_dataset, val_dataset, data_transforms = utils.create_dataset_image_folder(path_=os.path.join(os.path.abspath(""), "nature_12K/inaturalist_12K/train/"), input_size=(224,224))
train_loader, val_loader = utils.create_dataloaders(batch_size=8, num_workers=2, train_dataset=train_dataset, val_dataset=val_dataset, is_data_aug=True, data_transforms = data_transforms)

# Setting up the callbacks to be used
early_stopping = EarlyStopping('val_acc', patience=7, mode="max")
checkpoint_callback = ModelCheckpoint(monitor="val_acc", dirpath="checkpoints/", filename="best-checkpoint_eff_net_unf", save_top_k=1, mode="max")
# Defining the model
lit_model = utils.Lightning_CNN(model = model)
# Training the Model
trainer = Trainer(max_epochs=50, precision="16-mixed", accelerator="auto", logger=wandb_logger, accumulate_grad_batches=4, callbacks=[early_stopping, checkpoint_callback])
trainer.fit(lit_model, train_dataloaders=train_loader, val_dataloaders=val_loader)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


/home/joel/Pytorch_CUDA/virt_env/lib/python3.12/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/joel/Pytorch_CUDA/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name       | Type               | Params | Mode 
----------------------------------------------------------
0 | model      | EfficientNet       | 117 M  | train
1 | loss_fn    | CrossEntropyLoss   | 0      | train
2 | acc_metric | MulticlassAccuracy | 0      | train
----------------------------------------------------------
96.4 M    Trainable params
21.5 M    Non-trainable params
117 M     Total params
471.585   Total estimated model params size (MB)
1414      Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [7]:
# Recalling the model weights from the callbacks
best_model_path = checkpoint_callback.best_model_path
lit_model = utils.Lightning_CNN.load_from_checkpoint(best_model_path, model=model)

# Getting the test dataloader
test_loader = utils.get_test_dataloader(os.path.join(os.path.abspath(""), "nature_12K/inaturalist_12K/val/"), data_transforms)
# Prediction of the test data
trainer = Trainer(logger=wandb_logger)
# Running prediction
predictions = trainer.test(model=lit_model, dataloaders=test_loader)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

# Extra-Training

In [5]:
wandb_logger = WandbLogger(
    entity="A2_DA6401_DL",
    project="EfficientNet",       
)

In [6]:
# Defining the dataloaders to be built
train_dataset, val_dataset, data_transforms = utils.create_dataset_image_folder(path_=os.path.join(os.path.abspath(""), "nature_12K/inaturalist_12K/train/"), input_size=(224,224))
train_loader, val_loader = utils.create_dataloaders(batch_size=8, num_workers=2, train_dataset=train_dataset, val_dataset=val_dataset, is_data_aug=True, data_transforms = data_transforms)

lit_model = utils.Lightning_CNN.load_from_checkpoint("/home/joel/Pytorch_CUDA/checkpoints/best-checkpoint_eff_net_unf-v1.ckpt", model=model)

early_stopping = EarlyStopping('val_acc', patience=7, mode="max")
checkpoint_callback = ModelCheckpoint(monitor="val_acc", dirpath="checkpoints/", filename="best-checkpoint_eff_net_unf_retrained", save_top_k=1, mode="max")

trainer = Trainer(max_epochs=20, precision="16-mixed", accelerator="auto", logger=wandb_logger, accumulate_grad_batches=4, callbacks=[early_stopping, checkpoint_callback])
trainer.fit(lit_model, train_dataloaders=train_loader, val_dataloaders=val_loader)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


/home/joel/Pytorch_CUDA/virt_env/lib/python3.12/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /home/joel/Pytorch_CUDA/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name       | Type               | Params | Mode 
----------------------------------------------------------
0 | model      | EfficientNet       | 117 M  | train
1 | loss_fn    | CrossEntropyLoss   | 0      | train
2 | acc_metric | MulticlassAccuracy | 0      | train
----------------------------------------------------------
96.4 M    Trainable params
21.5 M    Non-trainable params
117 M     Total params
471.585   Total estimated model params size (MB)
1414      Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [7]:
# Recalling the model weights from the callbacks
best_model_path = checkpoint_callback.best_model_path
lit_model = utils.Lightning_CNN.load_from_checkpoint(best_model_path, model=model)

# Getting the test dataloader
test_loader = utils.get_test_dataloader(os.path.join(os.path.abspath(""), "nature_12K/inaturalist_12K/val/"), data_transforms)
# Prediction of the test data
trainer = Trainer(logger=wandb_logger)
# Running prediction
predictions = trainer.test(model=lit_model, dataloaders=test_loader)

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]