In [1]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.2.4-py3-none-any.whl.metadata (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.4/53.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Downloading lightning-2.2.4-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: lightning
Successfully installed lightning-2.2.4


In [17]:
# Basic libraries
import os
import numpy
import torch
from torch import nn
import pandas
import matplotlib.pyplot as plt

# PyTorch data processing libraries
from torchvision import transforms, datasets
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader, random_split

# PyTorch Model Implementation Libraries
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
from torchmetrics import Accuracy

# PyTorch Lightning
from lightning import LightningDataModule, LightningModule
import lightning as L
from lightning.pytorch.callbacks.early_stopping import EarlyStopping

In [3]:
# Set up train and test paths
data_path = '/kaggle/input/agricultural-crops-image-classification/Agricultural-crops'

In [36]:
class CropsDataModule(L.LightningDataModule):
    def __init__(self, data_dir: str, batch_size: int = 32):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        
    def prepare_data(self):
        # download the data (since we are using Kaggle, there's no need for that)
        # When downloading the data, unzip it and check it here
        pass
    
    def setup(self, stage):
        # Define base transformations
        base_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        
        # Enhanced transformations for training with data augmentation
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            base_transform
        ])

        # More consistent transformations for validation and testing
        test_val_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            base_transform
        ])
        
        # Assign a train and valid dataset to use in dataloaders
        dataset = datasets.ImageFolder(root=self.data_dir)
        
        train_size = int(len(dataset) * 0.7)
        valid_size = int(len(dataset) * 0.15)
        test_size = len(dataset) - train_size - valid_size
        
        # Random split with a seed for reproducibility
        seed = torch.Generator().manual_seed(42)
        self.train_set, self.valid_set, self.test_set = random_split(dataset,
                                                                     [train_size, valid_size, test_size],
                                                                     generator=seed)
        # Apply the transformations
        if stage == 'fit' or stage is None:
            self.train_set.dataset.transform = train_transform
            
        if stage in {'validate', 'test', 'predict'} or stage is None:
            self.valid_set.dataset.transform = test_val_transform
            self.test_set.dataset.transform = test_val_transform
            
    def train_dataloader(self):
        print("Creating training dataloader")
        return DataLoader(self.train_set, batch_size=self.batch_size, num_workers=3, shuffle=True)
    
    def val_dataloader(self):
        print("Creating validation dataloader")
        return DataLoader(self.valid_set, batch_size=self.batch_size, num_workers=3)
    
    def test_dataloader(self):
        print("Creating test dataloader")
        return DataLoader(self.test_set, batch_size=self.batch_size, num_workers=3)

In [None]:
dm = CropsDataModule(data_dir=data_path, batch_size=16)

In [37]:
train_loader = dm.train_dataloader()
val_loader = dm.val_dataloader()
test_loader = dm.test_dataloader()

# Function to check a few batches
def check_dataloader(dataloader, name):
    print(f"Checking {name} DataLoader:")
    for i, (data, target) in enumerate(dataloader):
        print(f"  Batch {i + 1}:")
        print(f"    Data shape: {data.shape}, Type: {data.dtype}")
        print(f"    Target shape: {target.shape}, Type: {target.dtype}")
        if i >= 2:  # Check only the first 3 batches
            break

check_dataloader(train_loader, "Training")
check_dataloader(val_loader, "Validation")
check_dataloader(test_loader, "Testing")

Creating training dataloader
Creating validation dataloader
Creating test dataloader
Checking Training DataLoader:
  Batch 1:
    Data shape: torch.Size([32, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([32]), Type: torch.int64
  Batch 2:
    Data shape: torch.Size([32, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([32]), Type: torch.int64
  Batch 3:
    Data shape: torch.Size([32, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([32]), Type: torch.int64
Checking Validation DataLoader:
  Batch 1:
    Data shape: torch.Size([32, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([32]), Type: torch.int64
  Batch 2:
    Data shape: torch.Size([32, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([32]), Type: torch.int64
  Batch 3:
    Data shape: torch.Size([32, 3, 224, 224]), Type: torch.float32
    Target shape: torch.Size([32]), Type: torch.int64
Checking Testing DataLoader:
  Batch 1:
    Data shape: torch

In [42]:
class ImageNetTL(L.LightningModule):
    def __init__(self, num_target_classes: int, learning_rate: float = 0.001):
        super().__init__()
        self.save_hyperparameters()  # This saves learning_rate and num_target_classes as part of model hyperparameters
        self.validation_step_outputs = []
        
        backbone = models.resnet50(weights="DEFAULT")
        num_features = backbone.fc.in_features # Save the features of the last layer
        layers = list(backbone.children())[:-1] # Remove the last layer
        self.feature_extractor = nn.Sequential(*layers) # Wrap the remaining layers
        
        self.num_target_classes = num_target_classes
        self.classifier = nn.Linear(num_features, num_target_classes)
        
        # Evaluation metrics
        self.train_acc = Accuracy(task='multiclass', num_classes=num_target_classes)
        self.valid_acc = Accuracy(task='multiclass', num_classes=num_target_classes)
        self.test_acc = Accuracy(task='multiclass', num_classes=num_target_classes)
        
    def forward(self, x):
        representations = self.feature_extractor(x).flatten(1)
        return self.classifier(representations)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        loss = F.cross_entropy(preds, y)
        self.train_acc(preds, y)
        self.log('train_loss', loss)
        self.log('train_acc', self.train_acc.compute().mean(), on_step=False, on_epoch=True, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        val_loss = F.cross_entropy(preds, y)
        self.valid_acc(preds, y)
        self.validation_step_outputs.append(val_loss)
        self.log('val_loss', val_loss, on_step=False, on_epoch=True)
        return {'val_loss': val_loss}
    
    # Added to make use of all the outputs from each `validation_step`
    def on_validation_epoch_end(self):
        epoch_average = torch.stack(self.validation_step_outputs).mean()
        self.log("validation_epoch_average", epoch_average)
        self.validation_step_outputs.clear()  # free memory
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        test_loss = F.cross_entropy(preds, y)
        self.log("test_loss", test_loss, on_step=False, on_epoch=True)
        return {'test_loss': test_loss}
    
    def on_test_epoch_end(self, outputs):
        self.log('test_acc', self.test_acc.compute(), on_epoch=True, prog_bar=True)
        self.test_acc.reset()
        
    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=self.hparams.learning_rate)

In [43]:
model = ImageNetTL(num_target_classes=30)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth

  0%|          | 0.00/97.8M [00:00<?, ?B/s][A
  4%|▍         | 4.12M/97.8M [00:00<00:02, 43.2MB/s][A
 17%|█▋        | 16.7M/97.8M [00:00<00:00, 95.4MB/s][A
 30%|███       | 29.6M/97.8M [00:00<00:00, 114MB/s] [A
 44%|████▍     | 42.9M/97.8M [00:00<00:00, 124MB/s][A
 58%|█████▊    | 57.2M/97.8M [00:00<00:00, 133MB/s][A
 73%|███████▎  | 71.5M/97.8M [00:00<00:00, 139MB/s][A
100%|██████████| 97.8M/97.8M [00:00<00:00, 130MB/s][A


In [44]:
dm.setup('fit')
trainer = L.Trainer(max_epochs=1, log_every_n_steps=10)
#print(next(iter(dm.train_dataloader())))  # Test the iterability directly here
trainer.fit(model, datamodule=dm)


INFO: GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO: 
  | Name              | Type               | Params
---------------------------------------------------------
0 | feature_extractor | Sequential         | 23.5 M
1 | classifier        | Linear             | 61.5 K
2 | train_acc         | MulticlassAccuracy | 0     
3 | valid_acc         | MulticlassAccuracy | 0     
4 | test_acc          | MulticlassAccuracy | 0     
---------------------------------------------------------
23.6 M    Trainable params
0         Non-trainable params
23.6 M    Total params
94.278    Total estimated model params size (MB)


Training set size: 580
Validation set size: 124
Test set size: 125
Training set size: 580
Validation set size: 124
Test set size: 125


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Creating validation dataloader


/opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Creating training dataloader


/opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

/opt/conda/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


In [50]:
# Initialize EarlyStopping callback to monitor 'val_loss' for a patience of 3 epochs
early_stop_callback = EarlyStopping(
    monitor='val_loss',
    min_delta=0.00,
    patience=3,
    verbose=False,
    mode='min'
)

trainer = L.Trainer(max_epochs=10,
                    profiler='simple', log_every_n_steps=10)
trainer.fit(model, datamodule=dm)

DataLoaders are iterable and working as expected.


INFO: Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
INFO: GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
INFO: 
  | Name              | Type               | Params
---------------------------------------------------------
0 | feature_extractor | Sequential         | 23.5 M
1 | classifier        | Linear             | 61.5 K
2 | train_acc         | MulticlassAccuracy | 0     
3 | valid_acc         | MulticlassAccuracy | 0     
4 | test_acc          | MulticlassAccuracy | 0     
---------------------------------------------------------
23.6 M    Trainable params
0         Non-trainable pa

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

TypeError: An invalid dataloader was passed to `Trainer.fit(train_dataloaders=...)`. Found <__main__.CropsDataModule object at 0x790c9c9f8b20>.

In [None]:
%reload_ext tensorboard
%tensorboard --logdir=lightning_logs/

In [37]:
!pip show pytorch-lightning
!pip install --upgrade pytorch-lightning

Name: pytorch-lightning
Version: 2.2.2
Summary: PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate.
Home-page: https://github.com/Lightning-AI/lightning
Author: Lightning AI et al.
Author-email: pytorch@lightning.ai
License: Apache-2.0
Location: /opt/conda/lib/python3.10/site-packages
Requires: fsspec, lightning-utilities, numpy, packaging, PyYAML, torch, torchmetrics, tqdm, typing-extensions
Required-by: lightning
Collecting pytorch-lightning
  Downloading pytorch_lightning-2.2.4-py3-none-any.whl.metadata (21 kB)
Downloading pytorch_lightning-2.2.4-py3-none-any.whl (802 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m802.2/802.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pytorch-lightning
  Attempting uninstall: pytorch-lightning
    Found existing installation: pytorch-lightning 2.2.2
    Uninstalling pytorch-lightning-2.2.2:
      Successfully u