In [None]:
#Checking the directory of dataset
!ls 'drive/MyDrive/plant_disease_detection'

colab_notebooks  dataset  logs	models


In [23]:

!pip install timm



In [31]:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import timm
import logging
torch.cuda.empty_cache()


# Import PyTorch XLA and TPU libraries

import torch.cuda.amp as amp
from torch.cuda.amp import autocast, GradScaler

import cProfile
import pstats

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np


In [None]:
# Setup logging
os.makedirs('drive/MyDrive/plant_disease_detection/logs', exist_ok=True)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[
    logging.FileHandler("drive/MyDrive/plant_disease_detection/logs/efficientnet_v2_s_logs.log"),
    logging.StreamHandler()
])
logger = logging.getLogger()

In [25]:
#Defining data transformations

#transforms.RandomResizedCrop(224)=>Randomly crops the image to a size of 224x224 pixels
#reason:Helps in augmenting the dataset by introducing variations in the input images.

#transforms.RandomHorizontalFlip()=>Randomly flips the image horizontally with a probability of 0.5
#reason:Introduces more variations and helps in making the model invariant to the orientation of the images.

#transforms.ToTensor()=>Converts the image to a PyTorch tensor
#reason:Changes the data type from PIL Image or numpy.ndarray to torch.Tensor

#transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])=>Normalizes the image by subtracting the mean [0.485, 0.456, 0.406] and dividing by the standard deviation [0.229, 0.224, 0.225] for each channel (RGB)
#reason:This normalization helps in speeding up convergence during training by standardizing the input

# Define data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(degrees=30),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
}



In [26]:
#Loading the images from train and validation folder and transform them
data_dir = 'drive/MyDrive/plant_disease_detection/dataset'
image_datasets = {x: datasets.ImageFolder(root=f"{data_dir}/{x}", transform=data_transforms[x]) for x in ['train', 'valid']}

# Optimized DataLoader setup
batch_size = 64
num_workers = 8

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=batch_size, shuffle=True, num_workers=num_workers, prefetch_factor=2, pin_memory=True),
    'valid': DataLoader(image_datasets['valid'], batch_size=batch_size, shuffle=False, num_workers=num_workers, prefetch_factor=2, pin_memory=True),
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes
print(len(dataloaders['train']))
print(class_names)
print(len(class_names))

437
['Pepper,_bell___Bacterial_spot', 'Pepper,_bell___healthy', 'Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy', 'Tomato___Bacterial_spot', 'Tomato___Early_blight', 'Tomato___Late_blight', 'Tomato___Leaf_Mold', 'Tomato___Septoria_leaf_spot', 'Tomato___Spider_mites Two-spotted_spider_mite', 'Tomato___Target_Spot', 'Tomato___Tomato_Yellow_Leaf_Curl_Virus', 'Tomato___Tomato_mosaic_virus', 'Tomato___healthy']
15


In [None]:
# Profile the DataLoader
def profile_dataloader(dataloader, num_batches=10):
    import cProfile, pstats
    profiler = cProfile.Profile()
    profiler.enable()

    for i, _ in enumerate(dataloader):
        if i >= num_batches:
            break

    profiler.disable()
    stats = pstats.Stats(profiler).sort_stats('cumtime')
    stats.print_stats(10)  # Print top 10 bottlenecks

# Profile the training DataLoader
print("Profiling DataLoader...")
profile_dataloader(dataloaders['train'])

Profiling DataLoader...
         7339 function calls (7337 primitive calls) in 54.540 seconds

   Ordered by: cumulative time
   List reduced from 246 to 10 due to restriction <10>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       47    0.001    0.000   43.610    0.928 /usr/lib/python3.10/threading.py:288(wait)
      319   43.610    0.137   43.610    0.137 {method 'acquire' of '_thread.lock' objects}
       11    0.000    0.000   43.592    3.963 /usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:626(__next__)
       11    0.000    0.000   43.589    3.963 /usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:1299(_next_data)
       15    0.000    0.000   43.588    2.906 /usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:1266(_get_data)
       21    0.000    0.000   43.587    2.076 /usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:1120(_try_get_data)
       21    0.000    0.000   4

In [27]:
# Initialize EfficientNetV2-L model
model = timm.create_model('tf_efficientnetv2_s', pretrained=True, num_classes=len(class_names))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()
model = model.to(device)
print(device)

# Define criterion, optimizer, and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0025)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# AMP GradScaler initialization
scaler = GradScaler()


cuda


In [None]:
# Training function with AMP and Gradient Accumulation
def train_model(model, criterion, optimizer, scheduler, dataloaders, dataset_sizes, device, num_epochs=25):
    since = time.time()
    best_model_wts = model.state_dict()
    best_acc = 0.0
    accumulation_steps = 4  # Number of batches to accumulate gradients over

    for epoch in range(num_epochs):
        print(f"Epoch {epoch}/{num_epochs - 1}")
        print('-' * 10)
        logger.info(f"Epoch {epoch}/{num_epochs - 1}")
        logger.info('-' * 10)

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0
            optimizer.zero_grad()
            y=0

            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs, labels = inputs.to(device), labels.to(device)
                print(f"Epoch {epoch}/{num_epochs - 1}"+"-iterating-"+phase+str(y))
                y=y+1

                with autocast():
                    outputs = model(inputs)
                    loss = criterion(outputs, labels) / accumulation_steps

                if phase == 'train':
                    scaler.scale(loss).backward()

                    if (i + 1) % accumulation_steps == 0:
                        scaler.step(optimizer)
                        scaler.update()
                        optimizer.zero_grad()

                running_loss += loss.item() * inputs.size(0) * accumulation_steps
                _, preds = torch.max(outputs, 1)
                running_corrects += torch.sum(preds == labels.data)

                # Free up memory
                del inputs, labels, outputs
                torch.cuda.empty_cache()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
            logger.info(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        scheduler.step()
        print()

    time_elapsed = time.time() - since
    print(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    print(f"Best val Acc: {best_acc:.4f}")
    logger.info(f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    logger.info(f"Best val Acc: {best_acc:.4f}")


    return model

In [None]:


# Train the model
model = train_model(model, criterion, optimizer, scheduler,dataloaders,dataset_sizes,device, num_epochs=25)

# Saving the model
os.makedirs('drive/MyDrive/plant_disease_detection/models', exist_ok=True)
torch.save(model.state_dict(), 'drive/MyDrive/plant_disease_detection/models/efficientnet_v2_S_nightshade_plant_disease_detection_model.pth')
print("Model saved successfully.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 15/24-iterating-valid83
Epoch 15/24-iterating-valid84
Epoch 15/24-iterating-valid85
Epoch 15/24-iterating-valid86
Epoch 15/24-iterating-valid87
Epoch 15/24-iterating-valid88
Epoch 15/24-iterating-valid89
Epoch 15/24-iterating-valid90
Epoch 15/24-iterating-valid91
Epoch 15/24-iterating-valid92
Epoch 15/24-iterating-valid93
Epoch 15/24-iterating-valid94
Epoch 15/24-iterating-valid95
Epoch 15/24-iterating-valid96
Epoch 15/24-iterating-valid97
Epoch 15/24-iterating-valid98
Epoch 15/24-iterating-valid99
Epoch 15/24-iterating-valid100
Epoch 15/24-iterating-valid101
Epoch 15/24-iterating-valid102
Epoch 15/24-iterating-valid103
Epoch 15/24-iterating-valid104
Epoch 15/24-iterating-valid105
Epoch 15/24-iterating-valid106
Epoch 15/24-iterating-valid107
Epoch 15/24-iterating-valid108
Epoch 15/24-iterating-valid109
valid Loss: 0.0062 Acc: 0.9989

Epoch 16/24
----------
Epoch 16/24-iterating-train0
Epoch 16/24-iterating-train1
Ep

In [28]:
# Load the saved model weights
model.load_state_dict(torch.load('drive/MyDrive/plant_disease_detection/models/efficientnet_v2_S_nightshade_plant_disease_detection_model.pth'))

# Set the model to evaluation mode
model.eval()

EfficientNet(
  (conv_stem): Conv2dSame(3, 24, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNormAct2d(
    24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): ConvBnAct(
        (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (drop_path): Identity()
      )
      (1): ConvBnAct(
        (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (drop_path):

In [29]:
# Evaluate the model on the validation set
def evaluate_model(model, dataloader, criterion, device):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    running_corrects = 0
    all_labels = []
    all_preds = []
    dataset_size = len(dataloader.dataset)

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    epoch_loss = running_loss / dataset_size
    epoch_acc = running_corrects.double() / dataset_size

    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)

    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    accuracy = accuracy_score(all_labels, all_preds)

    print(f"Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
    print(f"Precision: {precision:.4f} Recall: {recall:.4f} F1 Score: {f1:.4f} Accuracy: {accuracy:.4f}")

    return epoch_loss, epoch_acc, precision, recall, f1, accuracy

In [32]:
# Evaluate the model
evaluate_model(model, dataloaders['valid'], criterion, device)

Loss: 0.0055 Acc: 0.9990
Precision: 0.9990 Recall: 0.9990 F1 Score: 0.9990 Accuracy: 0.9990


(0.00548334737413265,
 tensor(0.9990, device='cuda:0', dtype=torch.float64),
 0.999001296853927,
 0.998997995991984,
 0.9989982830732032,
 0.998997995991984)