In [19]:
%pip install split-folders

Note: you may need to restart the kernel to use updated packages.


In [20]:
%pip install wandb -qU

In [21]:
import wandb
wandb.login()

True

In [29]:
import os
from glob import glob
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import splitfolders

def count_images_per_class(data_path):
    classes = os.listdir(data_path)
    print(classes)

    for i in classes:
        new_loc = os.path.join(data_path, i)
        images_jpg = glob(os.path.join(new_loc, '*.jpg'))
        images_JPG =glob(os.path.join(new_loc, '*.JPG'))

        # images_jpg = glob(new_jpg)
        # images_JPG = glob(new_JPG)

        total_images = len(images_jpg) + len(images_JPG)

        print(f"{i}: {total_images} images")

#         wandb.log({f"Class {i} Count": total_images})

def perform_data_split(data_path):
    splitfolders.ratio(data_path, seed=1337, output='train-test-splitted', ratio=(0.6, 0.2, 0.2))
#     wandb.log({"Data Split": "Completed"})

def create_transforms():
    transform_train = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        transforms.ColorJitter(brightness=0, contrast=0.2, saturation=0, hue=0)
    ])

    transform_tests = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    
    transform_vals = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    return transform_train, transform_tests, transform_vals

def create_data_loaders(train_data_dir, test_data_dir, val_data_dir):
    transform_train, transform_tests, transforms_vals = create_transforms()

    train_data = datasets.ImageFolder(train_data_dir, transform=transform_train)
    test_data = datasets.ImageFolder(test_data_dir, transform=transform_tests)
    val_data = datasets.ImageFolder(val_data_dir, transform=transforms_vals)

#     wandb.log({"Train Dataset Size": len(train_data)})
#     wandb.log({"Validation Dataset Size": len(test_data)})

    train_loader = DataLoader(train_data, batch_size=32, drop_last=True, shuffle=True, num_workers=0)
    validation_loader = DataLoader(test_data, batch_size=32, drop_last=True, shuffle=True, num_workers=0)
    test_loader = DataLoader(val_data, batch_size=32, drop_last=True, shuffle=True, num_workers=0)

    return train_loader, validation_loader, test_loader, train_data, test_data

In [24]:
%pip install git+https://github.com/PyTorchLightning/pytorch-lightning
import pytorch_lightning as pl
print(pl.__version__)

Collecting git+https://github.com/PyTorchLightning/pytorch-lightning
  Cloning https://github.com/PyTorchLightning/pytorch-lightning to /tmp/pip-req-build-pv7xc_jq
  Running command git clone --filter=blob:none --quiet https://github.com/PyTorchLightning/pytorch-lightning /tmp/pip-req-build-pv7xc_jq
  Resolved https://github.com/PyTorchLightning/pytorch-lightning to commit 48c39ce24f814609c02aad208bc8c2df27145beb
  Running command git submodule update --init --recursive -q
  Encountered 22 file(s) that should have been pointers, but weren't:
        .notebooks/course_UvA-DL/01-introduction-to-pytorch.ipynb
        .notebooks/course_UvA-DL/02-activation-functions.ipynb
        .notebooks/course_UvA-DL/03-initialization-and-optimization.ipynb
        .notebooks/course_UvA-DL/04-inception-resnet-densenet.ipynb
        .notebooks/course_UvA-DL/05-transformers-and-MH-attention.ipynb
        .notebooks/course_UvA-DL/06-graph-neural-networks.ipynb
        .notebooks/course_UvA-DL/07-deep-ener

In [25]:
import torch.nn as nn
import torch
import pytorch_lightning as pl

class ModifiedCNN(nn.Module):
    def __init__(self, num_classes, dropout_rate):
        super(ModifiedCNN, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2)
        )

        self.dense_layers = nn.Sequential(
            nn.Dropout(p=dropout_rate),
            nn.Linear(128, 64),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)

        x = x.mean([2, 3])

        x = self.dense_layers(x)

        return x

In [26]:
%pip install torchmetrics

Note: you may need to restart the kernel to use updated packages.


In [27]:
import pytorch_lightning as pl
import torch
import torch.nn.functional as F
from typing import Any, Optional, Union
import torchmetrics
from torchmetrics import Accuracy
# from src.model.modelCNN import ModifiedCNN

# STEP_OUTPUT = Union[float, torch.Tensor]

class Classifier(pl.LightningModule):
    def __init__(self, n_classes: int = 8) -> None:
        super().__init__()
        self.model = ModifiedCNN(num_classes=n_classes, dropout_rate=0.2)
        self.accuracy = torchmetrics.classification.Accuracy(task="multiclass", num_classes=n_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.model(inputs)
        loss = F.cross_entropy(outputs, labels)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.model(inputs)
        loss = F.cross_entropy(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        acc = torch.sum(preds == labels).item() / len(labels)
        self.log('val_loss', loss, on_epoch=True, prog_bar=True)
        self.log('val_acc', acc, on_epoch=True, prog_bar=True)
        
    def test_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.model(inputs)
        loss = F.cross_entropy(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        acc = self.accuracy(preds, labels)
        self.log('test_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('test_acc', acc, on_step=True, on_epoch=True, prog_bar=True)

    # def on_validation_epoch_end(self) -> None:
    #     avg_loss = torch.stack([x['val_loss'] for x in self.outputs]).mean()
    #     avg_acc = torch.stack([x['val_acc'] for x in self.outputs]).mean()
    #     self.log('val_loss', avg_loss)
    #     self.log('val_acc', avg_acc)

    def configure_optimizers(self) -> Any:
        return torch.optim.Adam(self.parameters(), lr=1e-3)

    # def on_validation_epoch_end(self) -> None:
    #     pass


In [35]:
import torch
import torch.nn as nn
import torch
import pytorch_lightning as pl
from typing import Dict
from pytorch_lightning.callbacks import ModelCheckpoint
import wandb
from pytorch_lightning.loggers import WandbLogger
from pathlib import Path

# from scripts.model import get_model
# from dataset.dataset import create_data_loaders, count_images_per_class, perform_data_split

class TrainConfig:
    def __init__(self, dataset_root: Path, max_epochs: int, batch_size: int, optimizer: Dict):
        self.dataset_root = dataset_root
        self.max_epochs = max_epochs
        self.batch_size = batch_size
        self.optimizer = optimizer

def train(cfg: TrainConfig) -> None:
    wandb.finish()

    wandb.init(
        project="pytorch-plant-disease",
        config={
            "batch_size": cfg.batch_size,
            "learning_rate": cfg.optimizer["learning_rate"],
            "epochs": cfg.max_epochs,
            "optimizer": cfg.optimizer["type"]
        },
    )

    logger = WandbLogger(name="Wandb", project="pytorch-plant-disease")

    checkpoint_callback = ModelCheckpoint(
        dirpath = '/kaggle/working/scripts/checkpoints',
        filename = "epoch={epoch}-step={global_step}",
    )
    

    trainer = pl.Trainer(
        max_epochs = cfg.max_epochs,
        log_every_n_steps = 5,
        callbacks = [checkpoint_callback],
        accelerator = "cuda",
        devices = 1,
        strategy = "auto",
        enable_model_summary = True,
        logger = logger,
    )

    model = Classifier(n_classes=8)
    data_path = Path("/kaggle/input/plant-disease-dataset/plant_disease")
    
    
    perform_data_split(data_path)

    # Define data directories
    train_data_dir = '/kaggle/working/train-test-splitted/train'
    test_data_dir = '/kaggle/working/train-test-splitted/test'
    val_data_dir = '/kaggle/working/train-test-splitted/val'

    # Create datasets and data loaders
    train_loader, validation_loader, test_loader, train_data, test_data = create_data_loaders(train_data_dir, test_data_dir, val_data_dir)

    trainer.fit(model, train_loader, validation_loader)
    torch.save(
        model.state_dict(), "/kaggle/working/Modified_CNN.pt"
    )
    
    trainer.test(model, test_loader)

    wandb.finish()
    

if __name__ == "__main__":
    config_values = {
        "max_epochs": 20,
        "dataset_root": Path("/kaggle/input/plant-disease-dataset/plant_disease"),
        "batch_size": 32,
        "optimizer": {
            "type": "Adam",
            "learning_rate": 1e-3
        }
    }

    train_cfg = TrainConfig(**config_values)
    train(train_cfg)





Copying files: 0 files [00:00, ? files/s][A
Copying files: 1 files [00:00,  1.42 files/s][A
Copying files: 152 files [00:00, 255.11 files/s][A
Copying files: 296 files [00:00, 484.04 files/s][A
Copying files: 426 files [00:01, 656.55 files/s][A
Copying files: 545 files [00:01, 290.49 files/s][A
Copying files: 627 files [00:02, 314.96 files/s][A
Copying files: 695 files [00:02, 322.65 files/s][A
Copying files: 752 files [00:02, 327.44 files/s][A
Copying files: 802 files [00:02, 330.33 files/s][A
Copying files: 847 files [00:02, 349.82 files/s][A
Copying files: 892 files [00:02, 353.21 files/s][A
Copying files: 935 files [00:02, 347.30 files/s][A
Copying files: 975 files [00:02, 356.82 files/s][A
Copying files: 1015 files [00:03, 145.05 files/s][A
Copying files: 1168 files [00:03, 309.10 files/s][A
Copying files: 1317 files [00:03, 479.82 files/s][A
Copying files: 1466 files [00:04, 652.78 files/s][A
Copying files: 1575 files [00:04, 321.63 files/s][A
Copying files: 1

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇▇██
test_acc_epoch,▁
test_acc_step,▇▃▅▄▃█▅▂▅▆▅▅▂▂▆▂▃▅▆▇▂▅▁▄▅
test_loss_epoch,▁
test_loss_step,▃▅▅▄▆▁▃▆▄▂▃▆▇▆▃█▄▅▃▃█▄█▅▃
trainer/global_step,▁▂▂▂▃▃▃▄▄▅▅▆▆▆▇▇██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_acc,▄▁▆▁▅▄▅▆█▅▃█▄▆█▆▄▆▇▇
val_loss,▅█▃█▄▅▄▄▁▄▇▁▅▂▁▂▅▃▂▃

0,1
epoch,20.0
test_acc_epoch,0.82
test_acc_step,0.84375
test_loss_epoch,0.51325
test_loss_step,0.3906
trainer/global_step,1500.0
val_acc,0.825
val_loss,0.51866
