# Import the Dataset

In [None]:
import kaggle

kaggle.api.authenticate()

try:
    files = kaggle.api.dataset_list_files('vipoooool/new-plant-diseases-dataset')
    print("Files in dataset:", files)
    
    kaggle.api.dataset_download_files('vipoooool/new-plant-diseases-dataset', path='.', unzip=True)
    print("Dataset downloaded successfully.")
except Exception as e:
    print("An error occurred:", e)

Files in dataset: 
Dataset URL: https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset
Dataset downloaded successfully.


# Load the dataset

In [None]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_dir = r'D:\GUVI\CODE\Plant_Diseases_Detection\New Plant Diseases Dataset(Augmented)\New Plant Diseases Dataset(Augmented)\train'
valid_dir = r'D:\GUVI\CODE\Plant_Diseases_Detection\New Plant Diseases Dataset(Augmented)\New Plant Diseases Dataset(Augmented)\valid'

train_transforms = transforms.Compose([
    transforms.Resize((64, 64)),                            
    transforms.ToTensor(),                      
    transforms.Normalize([0.5],[0.5]),  
])

valid_transforms = transforms.Compose([
    transforms.Resize((64, 64)),                
    transforms.ToTensor(),                            
    transforms.Normalize([0.5],[0.5]),  
])

train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
valid_dataset = datasets.ImageFolder(root=valid_dir, transform=valid_transforms)


num_workers = os.cpu_count() or 1 

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=32, num_workers=num_workers)


print(f'Training samples: {len(train_dataset)}')
print(f'Validation samples: {len(valid_dataset)}')

print(len(train_loader))

Training samples: 70295
Validation samples: 17572
2197


# Data Pruning

In [None]:
import random
from torch.utils.data import Subset

# Random sample of 75% of the dataset
num_samples = int(len(train_dataset) * 0.75)  
print(num_samples)
indices = random.sample(range(len(train_dataset)), num_samples)
train_subset = Subset(train_dataset, indices)


reducedtrain_loader = DataLoader(train_subset, batch_size=32, shuffle=True, num_workers=num_workers)
print(len(reducedtrain_loader))

52721
1648


# Train the model - Using Pre-trained model - ResNet18

In [1]:
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset


train_dir = r'D:\GUVI\CODE\Plant_Diseases_Detection\New Plant Diseases Dataset(Augmented)\New Plant Diseases Dataset(Augmented)\train'
valid_dir = r'D:\GUVI\CODE\Plant_Diseases_Detection\New Plant Diseases Dataset(Augmented)\New Plant Diseases Dataset(Augmented)\valid'


train_transforms = transforms.Compose([
    transforms.Resize((64, 64)),                
    transforms.RandomHorizontalFlip(),             
    transforms.ColorJitter(brightness=0.2, contrast=0.2), 
    transforms.ToTensor(),                          
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  
])

valid_transforms = transforms.Compose([
    transforms.Resize((64, 64)),                
    transforms.ToTensor(),                         
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 
])

train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
valid_dataset = datasets.ImageFolder(root=valid_dir, transform=valid_transforms)

num_samples = int(len(train_dataset) * 0.5)  
indices = random.sample(range(len(train_dataset)), num_samples)
train_subset = Subset(train_dataset, indices)

batch_size = 64  
num_workers = os.cpu_count() or 1

train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, num_workers=num_workers)


model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) 


num_classes = len(train_dataset.classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)


device = torch.device("cpu")  
model.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


best_val_loss = float('inf')
patience = 3
patience_counter = 0


num_epochs = 2

for epoch in range(num_epochs):
    model.train()  
    running_loss = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')
    
    model.eval()  
    val_running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for val_images, val_labels in valid_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)
            
            val_outputs = model(val_images)
            val_loss = criterion(val_outputs, val_labels)
            val_running_loss += val_loss.item()
            _, predicted = torch.max(val_outputs.data, 1)
            total += val_labels.size(0)
            correct += (predicted == val_labels).sum().item()

    avg_val_loss = val_running_loss / len(valid_loader)
    print(f'Validation Loss: {avg_val_loss:.4f}, Accuracy: {100 * correct / total:.2f}%')


    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'best_model.pth') 
        print("Best model saved.")
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print("Early stopping triggered.")
        break 

Epoch [1/2], Loss: 0.5554
Validation Loss: 0.4119, Accuracy: 87.68%
Best model saved.
Epoch [2/2], Loss: 0.2491
Validation Loss: 0.1946, Accuracy: 93.84%
Best model saved.


In [None]:
from PIL import Image
import streamlit as st


class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(32 * 16 * 16, 64)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.conv1(x)))
        x = self.pool(nn.ReLU()(self.conv2(x)))
        x = torch.flatten(x, start_dim=1)
        x = nn.ReLU()(self.fc1(x))
        return self.fc2(x)

model = models.resnet18(weights=None)
num_classes = len(train_dataset.classes)  
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load(r"D:\GUVI\CODE\Plant_Diseases_Detection\best_model.pth", map_location=torch.device('cpu')))
model.eval() 


transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

class_names = train_dataset.classes


st.title("Plant Disease Detection")
st.write("Upload a plant leaf image to detect disease")

uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    
    image = Image.open(uploaded_file).convert("RGB")
    st.image(image, caption="Uploaded Image", use_column_width=True)

    
    image_tensor = transform(image).unsqueeze(0) 

    with torch.no_grad():
        outputs = model(image_tensor)
        _, predicted_idx = torch.max(outputs.data, 1)
        predicted_class_name = class_names[predicted_idx.item()]

    
    st.write(f"Prediction: **{predicted_class_name}**")

  model.load_state_dict(torch.load(r"D:\GUVI\CODE\Plant_Diseases_Detection\best_model.pth", map_location=torch.device('cpu')))
2024-11-09 16:09:42.527 
  command:

    streamlit run d:\GUVI\CODE\Plant_Diseases_Detection\.venv\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


# Simple CNN Model

In [None]:
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

train_dir = r'D:\GUVI\CODE\Plant_Diseases_Detection\New Plant Diseases Dataset(Augmented)\New Plant Diseases Dataset(Augmented)\train'
valid_dir = r'D:\GUVI\CODE\Plant_Diseases_Detection\New Plant Diseases Dataset(Augmented)\New Plant Diseases Dataset(Augmented)\valid'


train_transforms = transforms.Compose([
    transforms.Resize((64, 64)),         
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

valid_transforms = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
valid_dataset = datasets.ImageFolder(root=valid_dir, transform=valid_transforms)

num_samples = int(len(train_dataset) * 0.5)  
indices = random.sample(range(len(train_dataset)), num_samples)
train_subset = Subset(train_dataset, indices)

num_workers = os.cpu_count() or 1

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=32, num_workers=num_workers)

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) 
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(32 * 16 * 16, 64)
        self.fc2 = nn.Linear(64, len(train_dataset.classes))  

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.conv1(x)))
        x = self.pool(nn.ReLU()(self.conv2(x)))
        x = torch.flatten(x, start_dim=1)
        x = nn.ReLU()(self.fc1(x))
        return self.fc2(x)

model = SimpleCNN()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cpu")  
model.to(device)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()  
    running_loss = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    model.eval()  
    val_running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for val_images, val_labels in valid_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)
            
            val_outputs = model(val_images)
            val_loss = criterion(val_outputs, val_labels)
            val_running_loss += val_loss.item()
            _, predicted = torch.max(val_outputs.data, 1)
            total += val_labels.size(0)
            correct += (predicted == val_labels).sum().item()

    print(f'Validation Loss: {val_running_loss/len(valid_loader):.4f}, Accuracy: {100 * correct / total:.2f}%')

Epoch [1/10], Loss: 1.4376
Validation Loss: 0.8393, Accuracy: 73.25%
Epoch [2/10], Loss: 0.6855
Validation Loss: 0.5838, Accuracy: 81.61%
Epoch [3/10], Loss: 0.4883
Validation Loss: 0.4408, Accuracy: 85.86%


KeyboardInterrupt: 

In [None]:
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset


train_dir = r'D:\GUVI\CODE\Plant_Diseases_Detection\New Plant Diseases Dataset(Augmented)\New Plant Diseases Dataset(Augmented)\train'
valid_dir = r'D:\GUVI\CODE\Plant_Diseases_Detection\New Plant Diseases Dataset(Augmented)\New Plant Diseases Dataset(Augmented)\valid'

train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

valid_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
valid_dataset = datasets.ImageFolder(root=valid_dir, transform=valid_transforms)


num_samples = int(len(train_dataset) * 0.5)
indices = random.sample(range(len(train_dataset)), num_samples)
train_subset = Subset(train_dataset, indices)

num_workers = os.cpu_count() or 1

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=32, num_workers=num_workers)

print(f'Training samples: {len(train_subset)}')
print(f'Validation samples: {len(valid_dataset)}')

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 56 * 56, 128) 
        self.fc2 = nn.Linear(128, len(train_dataset.classes))

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.conv1(x)))
        x = self.pool(nn.ReLU()(self.conv2(x)))
        
        x = torch.flatten(x, start_dim=1)
        
        x = nn.ReLU()(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cpu")  
model.to(device)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    
    model.eval()
    val_running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for val_images, val_labels in valid_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)
            
            val_outputs = model(val_images)
            val_loss = criterion(val_outputs, val_labels)
            
            val_running_loss += val_loss.item()
            _, predicted = torch.max(val_outputs.data, 1)
            total += val_labels.size(0)
            correct += (predicted == val_labels).sum().item()

    print(f'Validation Loss: {val_running_loss/len(valid_loader):.4f}, Accuracy: {100 * correct / total:.2f}%')

Training samples: 35147
Validation samples: 17572
