In [1]:
!pip install opendatasets --quiet
import opendatasets as od
od.download('https://www.kaggle.com/datasets/emmarex/plantdisease')

Skipping, found downloaded files in ".\plantdisease" (use force=True to force download)



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import torch
from torch import nn
from torch.optim import Adam
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import pandas as pd
import os

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [4]:
root_path = 'plantdisease/PlantVillage/'
img_path = []
labels_path = []

for label in os.listdir(root_path):
    for item in os.listdir(f'{root_path}/{label}'):
        img_path.append(f'{root_path}/{label}/{item}')
        labels_path.append(label)
        
print(f'Number of Images: {len(img_path)}')

Number of Images: 20639


In [5]:
# Creation of our Dataframe
data_df = pd.DataFrame(zip(img_path, labels_path), columns = ['image_path', 'label'])

# Print the distribution of data among classes and the format of our DataFrame.
print(data_df['label'].value_counts())
data_df.head()

label
Tomato__Tomato_YellowLeaf__Curl_Virus          3209
Tomato_Bacterial_spot                          2127
Tomato_Late_blight                             1909
Tomato_Septoria_leaf_spot                      1771
Tomato_Spider_mites_Two_spotted_spider_mite    1676
Tomato_healthy                                 1591
Pepper__bell___healthy                         1478
Tomato__Target_Spot                            1404
Potato___Early_blight                          1000
Potato___Late_blight                           1000
Tomato_Early_blight                            1000
Pepper__bell___Bacterial_spot                   997
Tomato_Leaf_Mold                                952
Tomato__Tomato_mosaic_virus                     373
Potato___healthy                                152
Name: count, dtype: int64


Unnamed: 0,image_path,label
0,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot
1,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot
2,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot
3,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot
4,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot


In [7]:
train = data_df.sample(frac=0.8)
val = data_df.drop(train.index)
test = val.sample(frac=0.5)
val = val.drop(test.index)

print(f'Train size: {len(train)}, Validation size: {len(val)}, Test size: {len(test)}')

Train size: 16511, Validation size: 2064, Test size: 2064


In [8]:
# Create a LabelEncoder for the Labels
label_encoder = LabelEncoder()
label_encoder.fit(data_df['label'])

# Create a transform for transforming the images in the same - appropriate form
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.ConvertImageDtype(dtype=float)
])

In [56]:
class PlantsDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.labels = torch.tensor(label_encoder.transform(dataframe['label'])).to(device)
        
    def __len__(self):
        return self.dataframe.shape[0]
    
    def __getitem__(self, indx):
        image = Image.open(self.dataframe.iloc[indx, 0]).convert('RGB')
        
        if self.transform:
            image = self.transform(image).to(device)
        
        label = self.labels[indx]
        
        return image, label

In [58]:
train_data = PlantsDataset(train, transform=transform)
val_data = PlantsDataset(val, transform=transform)
test_data = PlantsDataset(test, transform=transform)

print(val_data.__getitem__(1500))

(tensor([[[0.4588, 0.6039, 0.5843,  ..., 0.6745, 0.7451, 0.6314],
         [0.6471, 0.4863, 0.4784,  ..., 0.6745, 0.7098, 0.6039],
         [0.6510, 0.6275, 0.5686,  ..., 0.5451, 0.6824, 0.7098],
         ...,
         [0.4039, 0.4784, 0.6392,  ..., 0.7098, 0.7412, 0.7922],
         [0.4431, 0.5451, 0.4784,  ..., 0.6588, 0.5608, 0.6902],
         [0.4510, 0.4039, 0.3451,  ..., 0.7608, 0.6941, 0.6667]],

        [[0.3451, 0.4902, 0.4706,  ..., 0.6000, 0.6706, 0.5569],
         [0.5333, 0.3725, 0.3647,  ..., 0.6000, 0.6353, 0.5294],
         [0.5373, 0.5137, 0.4549,  ..., 0.4706, 0.6078, 0.6353],
         ...,
         [0.2980, 0.3725, 0.5333,  ..., 0.6392, 0.6706, 0.7216],
         [0.3373, 0.4392, 0.3725,  ..., 0.5882, 0.4902, 0.6196],
         [0.3451, 0.2980, 0.2392,  ..., 0.6902, 0.6235, 0.5961]],

        [[0.3137, 0.4588, 0.4392,  ..., 0.5451, 0.6157, 0.5020],
         [0.5020, 0.3412, 0.3333,  ..., 0.5451, 0.5804, 0.4745],
         [0.5059, 0.4824, 0.4235,  ..., 0.4157, 0.5529, 0

In [69]:
train_loader = DataLoader(dataset=train_data, shuffle=True)
val_loader = DataLoader(dataset=val_data, shuffle=True)
test_loader = DataLoader(dataset=test_data, shuffle=True)

In [None]:
#odel
class Pla