In [1]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.2.4-py3-none-any.whl.metadata (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.4/53.4 kB[0m [31m749.7 kB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
Downloading lightning-2.2.4-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hInstalling collected packages: lightning
Successfully installed lightning-2.2.4


In [2]:
# Basic libraries
import os
import numpy
import torch
from torch import nn
import pandas
import matplotlib.pyplot as plt

# PyTorch data processing libraries
from torchvision import transforms, datasets
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader, random_split

# PyTorch Model Implementation Libraries
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
from torchmetrics import Accuracy

# PyTorch Lightning
from lightning import LightningDataModule, LightningModule
import lightning as L
from lightning.pytorch.callbacks.early_stopping import EarlyStopping

In [3]:
# Set up train and test paths
data_path = '/kaggle/input/agricultural-crops-image-classification/Agricultural-crops'

In [23]:
class CropsDataModule(L.LightningDataModule):
    def __init__(self, data_dir: str, batch_size: int = 32):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.base_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    def setup(self, stage: str = None):
    # Use the ImageFolder method to arrange the dataset
        full_dataset = datasets.ImageFolder(root=self.data_dir, transform=self.base_transform)

        if stage == "fit" or stage is None:
            # Split for training and validation
            train_size = int(len(full_dataset) * 0.7)
            val_size = int(len(full_dataset) * 0.15)
            test_size = len(full_dataset) - train_size - val_size
            self.train_set, self.val_set, self.test_set = random_split(
                full_dataset, [train_size, val_size, test_size], generator=torch.Generator().manual_seed(42)
                )
            self.train_set.dataset.transform = transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                self.base_transform
            ])
            self.val_set.dataset.transform = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                self.base_transform
            ])

        if stage == "test" or stage == "predict" or stage is None:
            # Ensure the test set uses a less aggressive transformation
            self.test_set.dataset.transform = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                self.base_transform
            ])

    def train_dataloader(self):
        return DataLoader(self.train_set, batch_size=self.batch_size, num_workers=4, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_set, batch_size=self.batch_size, num_workers=4)

    def test_dataloader(self):
        return DataLoader(self.test_set, batch_size=self.batch_size, num_workers=4)

In [24]:
# Create an instance of the data class
dm = CropsDataModule(data_dir=data_path, batch_size=16)

In [6]:
# Setup the data module
dm.setup()
train_loader = dm.train_dataloader()
val_loader = dm.val_dataloader()
test_loader = dm.test_dataloader()

# Function to check a few batches
def check_dataloader(dataloader, name):
    print(f"Checking {name} DataLoader:")
    for i, (data, target) in enumerate(dataloader):
        print(f"  Batch {i + 1}:")
        print(f"    Data shape: {data.shape}, Type: {data.dtype}")
        print(f"    Target shape: {target.shape}, Type: {target.dtype}")
        if i >= 2:  # Check only the first 3 batches
            break

check_dataloader(train_loader, "Training")
check_dataloader(val_loader, "Validation")
check_dataloader(test_loader, "Testing")

Checking Training DataLoader:
  Batch 1:
    Data shape: torch.Size([16, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([16]), Type: torch.int64
  Batch 2:
    Data shape: torch.Size([16, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([16]), Type: torch.int64
  Batch 3:
    Data shape: torch.Size([16, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([16]), Type: torch.int64
Checking Validation DataLoader:
  Batch 1:
    Data shape: torch.Size([16, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([16]), Type: torch.int64
  Batch 2:
    Data shape: torch.Size([16, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([16]), Type: torch.int64
  Batch 3:
    Data shape: torch.Size([16, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([16]), Type: torch.int64
Checking Testing DataLoader:
  Batch 1:
    Data shape: torch.Size([16, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([16]), Typ

In [16]:
class ImageNetTL(L.LightningModule):
    def __init__(self, num_target_classes: int, learning_rate: float = 0.001):
        super().__init__()
        self.save_hyperparameters()  # This saves learning_rate and num_target_classes as part of model hyperparameters
        self.validation_step_outputs = [] # Create an empty list to store the validation step outputs
        self.test_step_outputs = []
        
        backbone = models.resnet50(weights="DEFAULT")
        num_features = backbone.fc.in_features # Save the features of the last layer
        layers = list(backbone.children())[:-1] # Remove the last layer
        self.feature_extractor = nn.Sequential(*layers) # Wrap the remaining layers
        
        self.num_target_classes = num_target_classes
        self.classifier = nn.Linear(num_features, num_target_classes)
        
        # Evaluation metrics
        self.train_acc = Accuracy(task='multiclass', num_classes=num_target_classes)
        self.valid_acc = Accuracy(task='multiclass', num_classes=num_target_classes)
        self.test_acc = Accuracy(task='multiclass', num_classes=num_target_classes)
        
    def forward(self, x):
        representations = self.feature_extractor(x).flatten(1)
        return self.classifier(representations)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        loss = F.cross_entropy(preds, y)
        self.train_acc(preds, y)
        
        self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        train_acc_val = self.train_acc.compute()
        self.log('train_acc', train_acc_val, on_step=False, on_epoch=True, prog_bar=True)
        return {'loss': loss, 'train_acc': train_acc_val}
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        val_loss = F.cross_entropy(preds, y)
        self.valid_acc(preds, y)
        self.validation_step_outputs.append(val_loss)
        self.log('val_loss', val_loss, on_step=False, on_epoch=True)
        return {'val_loss': val_loss}
    
    # Added to make use of the outputs from each `validation_step`
    def on_validation_epoch_end(self):
        loss_average = torch.stack(self.validation_step_outputs).mean()
        self.log("validation_loss_average", loss_average)
        self.validation_step_outputs.clear()  # free memory
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        test_loss = F.cross_entropy(preds, y)
        self.test_step_outputs.append(test_loss)
        self.log("test_loss", test_loss, on_step=False, on_epoch=True)
        return {'test_loss': test_loss}
    
    # Added to make use of the outputs from each `test_step`
    def on_test_epoch_end(self):
        loss_average = torch.stack(self.test_step_outputs).mean()
        self.log("test_loss_average", loss_average)
        self.test_step_outputs.clear()  # free memory
        
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.hparams.learning_rate)

In [17]:
model = ImageNetTL(num_target_classes=30)

In [9]:
# Run the model once quickly to check if everything is good
trainer = L.Trainer(fast_dev_run=True) # By default it runs (5 batches of train, validation, and test)

INFO: Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO: Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.


In [10]:
# Run a sanity check in the validation loop
trainer = L.Trainer(num_sanity_val_steps=2)

INFO: Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs


In [25]:
# Check the model for one epoch
dm.setup('fit')
trainer = L.Trainer(max_epochs=1, log_every_n_steps=10)
#print(next(iter(dm.train_dataloader())))  # Test the iterability directly here
trainer.fit(model, datamodule=dm)

INFO: Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name              | Type               | Params
---------------------------------------------------------
0 | feature_extractor | Sequential         | 23.5 M
1 | classifier        | Linear             | 61.5 K
2 | train_acc         | MulticlassAccuracy | 0     
3 | valid_acc         | MulticlassAccuracy | 0     
4 | test_acc          | MulticlassAccuracy | 0     
---------------------------------------------------------
23.6 M    Trainable params
0         Non-trainable pa

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=1` reached.


In [26]:
# Initialize EarlyStopping callback to monitor 'val_loss' for a patience of 3 epochs
early_stop_callback = EarlyStopping(
    monitor='val_loss',
    min_delta=0.00,
    patience=3,
    verbose=False,
    mode='min'
)

trainer = L.Trainer(max_epochs=10,
                    profiler='simple', log_every_n_steps=10, callbacks=early_stop_callback)
# Profiler is added to check if there are bottlenecks in the code
trainer.fit(model, datamodule=dm)

INFO: Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name              | Type               | Params
---------------------------------------------------------
0 | feature_extractor | Sequential         | 23.5 M
1 | classifier        | Linear             | 61.5 K
2 | train_acc         | MulticlassAccuracy | 0     
3 | valid_acc         | MulticlassAccuracy | 0     
4 | test_acc          | MulticlassAccuracy | 0     
---------------------------------------------------------
23.6 M    Trainable params
0         Non-trainable pa

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=10` reached.
INFO: FIT Profiler Report

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Action                                                                                                                                                         	|  Mean duration (s)	|  Num calls      	|  Total time (s) 	|  Percentage %   	|
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Total                                                                                                                                      

In [27]:
trainer.test(model, datamodule=dm)

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: |          | 0/? [00:00<?, ?it/s]

INFO: TEST Profiler Report

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Action                                                                                                                                                         	|  Mean duration (s)	|  Num calls      	|  Total time (s) 	|  Percentage %   	|
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Total                                                                                                                                                          	|  -              	|  17500     

[{'test_loss': 1.7627671957015991, 'test_loss_average': 1.7628612518310547}]

In [28]:
# Launch TensorBoard to display outputs
%reload_ext tensorboard
%tensorboard --logdir=lightning_logs/

Reusing TensorBoard on port 6006 (pid 149), started 0:20:45 ago. (Use '!kill 149' to kill it.)