In [39]:
!pip install opendatasets --quiet
import opendatasets as od
od.download('https://www.kaggle.com/datasets/emmarex/plantdisease')

Skipping, found downloaded files in ".\plantdisease" (use force=True to force download)



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [40]:
import torch
from torch import nn
from torch.optim import Adam
from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import pandas as pd
import os

In [41]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [42]:
root_path = 'plantdisease/PlantVillage/'
img_path = []
labels_path = []

for label in os.listdir(root_path):
    for item in os.listdir(f'{root_path}/{label}'):
        img_path.append(f'{root_path}/{label}/{item}')
        labels_path.append(label)
        
print(f'Number of Images: {len(img_path)}')

Number of Images: 20638


In [43]:
# Creation of our Dataframe
data_df = pd.DataFrame(zip(img_path, labels_path), columns = ['image_path', 'label'])

# Print the distribution of data among classes and the format of our DataFrame.
print(data_df['label'].value_counts())
data_df.head()

label
Tomato__Tomato_YellowLeaf__Curl_Virus          3208
Tomato_Bacterial_spot                          2127
Tomato_Late_blight                             1909
Tomato_Septoria_leaf_spot                      1771
Tomato_Spider_mites_Two_spotted_spider_mite    1676
Tomato_healthy                                 1591
Pepper__bell___healthy                         1478
Tomato__Target_Spot                            1404
Potato___Early_blight                          1000
Potato___Late_blight                           1000
Tomato_Early_blight                            1000
Pepper__bell___Bacterial_spot                   997
Tomato_Leaf_Mold                                952
Tomato__Tomato_mosaic_virus                     373
Potato___healthy                                152
Name: count, dtype: int64


Unnamed: 0,image_path,label
0,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot
1,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot
2,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot
3,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot
4,plantdisease/PlantVillage//Pepper__bell___Bact...,Pepper__bell___Bacterial_spot


In [44]:
train = data_df.sample(frac=0.8)
val = data_df.drop(train.index)
test = val.sample(frac=0.5)
val = val.drop(test.index)

print(f'Train size: {len(train)}, Validation size: {len(val)}, Test size: {len(test)}')

Train size: 16510, Validation size: 2064, Test size: 2064


In [45]:
# Create a LabelEncoder for the Labels
label_encoder = LabelEncoder()
label_encoder.fit(data_df['label'])

# Create a transform for transforming the images in the same - appropriate form
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.ConvertImageDtype(dtype=torch.float)
])

In [46]:
class PlantsDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.labels = torch.tensor(label_encoder.transform(dataframe['label']), dtype=torch.long).to(device)
        
    def __len__(self):
        return self.dataframe.shape[0]
    
    def __getitem__(self, indx):
        image = Image.open(self.dataframe.iloc[indx, 0]).convert('RGB')
        
        if self.transform:
            image = self.transform(image).to(device)
        
        label = self.labels[indx]
        
        return image, label

In [47]:
train_data = PlantsDataset(train, transform=transform)
val_data = PlantsDataset(val, transform=transform)
test_data = PlantsDataset(test, transform=transform)

print(val_data.__getitem__(1500))

(tensor([[[0.7804, 0.7882, 0.7373,  ..., 0.6824, 0.6078, 0.6980],
         [0.7216, 0.7412, 0.7451,  ..., 0.7529, 0.6824, 0.6706],
         [0.7216, 0.7255, 0.7608,  ..., 0.6627, 0.6392, 0.5725],
         ...,
         [0.4706, 0.4627, 0.4706,  ..., 0.5020, 0.5098, 0.4824],
         [0.4235, 0.4471, 0.4627,  ..., 0.4706, 0.4706, 0.4510],
         [0.3569, 0.4118, 0.4471,  ..., 0.4392, 0.5255, 0.4275]],

        [[0.7412, 0.7490, 0.6980,  ..., 0.6588, 0.5843, 0.6745],
         [0.6824, 0.7020, 0.7059,  ..., 0.7294, 0.6588, 0.6471],
         [0.6824, 0.6863, 0.7216,  ..., 0.6392, 0.6157, 0.5490],
         ...,
         [0.4157, 0.4078, 0.4157,  ..., 0.4471, 0.4549, 0.4275],
         [0.3686, 0.3922, 0.4078,  ..., 0.4157, 0.4157, 0.3961],
         [0.3020, 0.3569, 0.3922,  ..., 0.3843, 0.4706, 0.3725]],

        [[0.7451, 0.7529, 0.7020,  ..., 0.6667, 0.5922, 0.6824],
         [0.6863, 0.7059, 0.7098,  ..., 0.7373, 0.6667, 0.6549],
         [0.6863, 0.6902, 0.7255,  ..., 0.6471, 0.6235, 0

In [48]:
Learning_Rate = 1e-3
Batch_Size = 16
Epochs = 1000

train_loader = DataLoader(dataset=train_data, batch_size=Batch_Size, shuffle=True)
val_loader = DataLoader(dataset=val_data, batch_size=Batch_Size, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=Batch_Size, shuffle=True)

In [49]:
#Model
class Plants(nn.Module):
    
    def __init__(self, number_of_classes):
        super().__init__()

        # Convolutions
        self.conv2d1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2d2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv2d3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv2d4 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        # Maxpooling
        self.maxpooling = nn.MaxPool2d(kernel_size=2, stride=2)

        # Activation Function
        self.activation = nn.LeakyReLU()

        # Flatten Layer
        self.flatten = nn.Flatten()

        # Dense Layers
        self.dense1 = nn.Linear((128*16*16), 256)
        self.dense2 = nn.Linear(256, 128)
        self.dense3 = nn.Linear(128, 64)
        self.output = nn.Linear(64, number_of_classes)

    def forward(self, x):
                                    # x = (3, 256, 256)
        # Stage 1
        x = self.conv2d1(x)        # (16, 256, 256)
        x = self.maxpooling(x)     # (16, 128, 128)
        x = self.activation(x)     # (16, 128, 128)
        
        # Stage 2
        x = self.conv2d2(x)        # (32, 128, 128)
        x = self.maxpooling(x)     # (32, 64, 64)
        x = self.activation(x)     # (32, 64, 64)
    
        # Stage 3
        x = self.conv2d3(x)        # (64, 64, 64)
        x = self.maxpooling(x)     # (64, 32, 32)
        x = self.activation(x)     # (64, 32, 32)

        # Stage 4
        x = self.conv2d4(x)        # (128, 32, 32)
        x = self.maxpooling(x)     # (128, 16, 16)
        x = self.activation(x)     # (128, 16, 16)

        # Stage 5
        x = self.flatten(x)
        
        # Stage 6
        x = self.dense1(x)
        x = self.activation(x)
        
        # Stage 7
        x = self.dense2(x)
        x = self.activation(x)
        
        # Stage 8
        x = self.dense3(x)
        x = self.activation(x)
        
        # Stage 9
        x = self.output(x)
        
        return x

In [50]:
model =  Plants(len(data_df['label'].unique())).to(device)

In [51]:
from torchsummary import summary
summary(model, (3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 256, 256]             448
         MaxPool2d-2         [-1, 16, 128, 128]               0
         LeakyReLU-3         [-1, 16, 128, 128]               0
            Conv2d-4         [-1, 32, 128, 128]           4,640
         MaxPool2d-5           [-1, 32, 64, 64]               0
         LeakyReLU-6           [-1, 32, 64, 64]               0
            Conv2d-7           [-1, 64, 64, 64]          18,496
         MaxPool2d-8           [-1, 64, 32, 32]               0
         LeakyReLU-9           [-1, 64, 32, 32]               0
           Conv2d-10          [-1, 128, 32, 32]          73,856
        MaxPool2d-11          [-1, 128, 16, 16]               0
        LeakyReLU-12          [-1, 128, 16, 16]               0
          Flatten-13                [-1, 32768]               0
           Linear-14                  [

In [52]:
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=Learning_Rate)

In [54]:
import time
from tqdm import tqdm

total_loss_train_plot = []
total_loss_validation_plot = []
total_acc_train_plot = []
total_acc_validation_plot = []
time_plot = []

for epoch in range(Epochs):
    epoch_start = time.time()
    total_acc_train = 0
    total_loss_train = 0
    total_loss_val = 0
    total_acc_val = 0
    
    for images, labels in tqdm(train_loader):
        optimizer.zero_grad()
        
        outputs = model(images)
        
        train_loss = criterion(outputs, labels)
        total_loss_train += train_loss.item()
        
        train_loss.backward()
        train_acc = (torch.argmax(outputs, axis=1) == labels).sum().item()
        
        total_acc_train += train_acc
        optimizer.step()
        
    with torch.no_grad():
        for images, labels in tqdm(val_loader):
            outputs = model(images)
            val_loss = criterion(outputs, labels)
            total_loss_val += val_loss.item()
            
            val_acc = (torch.argmax(outputs, axis=1) == labels).sum().item()
            total_acc_val += val_acc
            
    total_loss_train_plot.append(round(total_loss_train/1000, 4))
    total_loss_validation_plot.append(round(total_loss_val/1000, 4))
    
    total_acc_train_plot.append(round((total_acc_train/train_data.__len__()) * 100, 4))
    total_acc_validation_plot.append(round((total_acc_val/val_data.__len__()) * 100, 4))
      
    epoch_finish = round((time.time() - epoch_start)/60, 2)
        
    print(f'''Epoch {epoch+1}/{Epochs} Time: {epoch_finish} min, Train Loss: {round(total_loss_train/1000, 4)} Train Accuracy: {round((total_acc_train/train_data.__len__()) * 100, 4)}
              Validation Loss: {round(total_loss_val/1000, 4)} Validation Accuracy {round((total_acc_val/val_data.__len__()) * 100, 4)}
              ''')

100%|██████████████████████████████████████████████████████████████████████████████| 1032/1032 [07:36<00:00,  2.26it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 129/129 [00:26<00:00,  4.90it/s]


NameError: name 'EPOCHS' is not defined

with torch.no_grad():
    total_loss_test = 0
    total_acc_test = 0
    for images, labels in test_loader:
        predictions = model(predictions)
        
        acc = (torch.argmax(predictions, axis=1) == labels).sum().item()
        loss = criterion(predictions, labels)
        total_loss_test += loss.item()
        