<a href="https://colab.research.google.com/github/Rajdeep183/Real-Estate-Predictor/blob/main/disease_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!git clone https://github.com/spMohanty/PlantVillage-Dataset.git

Cloning into 'PlantVillage-Dataset'...
remote: Enumerating objects: 163235, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 163235 (delta 2), reused 1 (delta 0), pack-reused 163229 (from 1)[K
Receiving objects: 100% (163235/163235), 2.00 GiB | 28.17 MiB/s, done.
Resolving deltas: 100% (101/101), done.
Updating files: 100% (182401/182401), done.


In [3]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define paths
base_dir = "./PlantVillage-Dataset/raw/color"
train_dir = os.path.join(base_dir, "train")
valid_dir = os.path.join(base_dir, "valid")

# Create train and valid directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(valid_dir, exist_ok=True)

# Get all class (disease) folders, excluding 'train' and 'valid'
disease_classes = [d for d in os.listdir(base_dir)
                   if os.path.isdir(os.path.join(base_dir, d)) and d not in ['train', 'valid']]

# Split images into train (80%) and valid (20%)
for disease in disease_classes:
    disease_path = os.path.join(base_dir, disease)
    images = os.listdir(disease_path)

    # Skip if there are no images in the folder
    if len(images) == 0:
        continue

    # Split the dataset
    train_images, valid_images = train_test_split(images, test_size=0.2, random_state=42)

    # Create new class folders inside train and valid directories
    os.makedirs(os.path.join(train_dir, disease), exist_ok=True)
    os.makedirs(os.path.join(valid_dir, disease), exist_ok=True)

    # Move training images to the class subfolder within train_dir
    for img in train_images:
        src = os.path.join(disease_path, img)
        dst = os.path.join(train_dir, disease, img)
        shutil.move(src, dst)

    # Move validation images to the class subfolder within valid_dir
    for img in valid_images:
        src = os.path.join(disease_path, img)
        dst = os.path.join(valid_dir, disease, img)
        shutil.move(src, dst)

print("Dataset successfully split into train and valid folders!")

Dataset successfully split into train and valid folders!


In [4]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define transformations (resize + convert to tensor)
transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])

# Load train and validation datasets
train_dir = "./PlantVillage-Dataset/raw/color/train"
valid_dir = "./PlantVillage-Dataset/raw/color/valid"

train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
valid_dataset = datasets.ImageFolder(root=valid_dir, transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

print(f"Train size: {len(train_dataset)}, Valid size: {len(valid_dataset)}")

Train size: 43429, Valid size: 10876


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 (for pretrained models)
    transforms.RandomHorizontalFlip(),  # Data Augmentation
    transforms.RandomRotation(10),
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize for pretrained models
])

# Load datasets
train_dir = "/content/PlantVillage-Dataset/raw/color/train"
valid_dir = "/content/PlantVillage-Dataset/raw/color/valid"

train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
valid_dataset = datasets.ImageFolder(root=valid_dir, transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

# Get class names
class_names = train_dataset.classes
print(f"Classes: {class_names}, Total: {len(class_names)}")


Classes: ['Apple___Apple_scab', 'Apple___Black_rot', 'Apple___Cedar_apple_rust', 'Apple___healthy', 'Blueberry___healthy', 'Cherry_(including_sour)___Powdery_mildew', 'Cherry_(including_sour)___healthy', 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot', 'Corn_(maize)___Common_rust_', 'Corn_(maize)___Northern_Leaf_Blight', 'Corn_(maize)___healthy', 'Grape___Black_rot', 'Grape___Esca_(Black_Measles)', 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)', 'Grape___healthy', 'Orange___Haunglongbing_(Citrus_greening)', 'Peach___Bacterial_spot', 'Peach___healthy', 'Pepper,_bell___Bacterial_spot', 'Pepper,_bell___healthy', 'Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy', 'Raspberry___healthy', 'Soybean___healthy', 'Squash___Powdery_mildew', 'Strawberry___Leaf_scorch', 'Strawberry___healthy', 'Tomato___Bacterial_spot', 'Tomato___Early_blight', 'Tomato___Late_blight', 'Tomato___Leaf_Mold', 'Tomato___Septoria_leaf_spot', 'Tomato___Spider_mites Two-spotted_spider_mite', 'Tomato___

In [10]:
import torch
import torchvision.models as models
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Use EfficientNet-B3 for better performance
model = models.efficientnet_b3(pretrained=True)

# Freeze early layers for transfer learning
for param in model.features[:-3].parameters():
    param.requires_grad = False

# Custom classifier with dropout for regularization
num_features = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(num_features, 512),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, len(class_names))
)

model = model.to(device)
print(f"EfficientNet-B3 model ready with {len(class_names)} classes!")


EfficientNet-B3 model ready with 38 classes!


In [11]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # Label smoothing for better generalization
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)  # AdamW with weight decay

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=3
)


In [12]:
# Replace training loop with validation tracking, early stopping, and progress bars
from tqdm import tqdm
import time

num_epochs = 5
best_val_acc = 0.0
patience = 5
patience_counter = 0

for epoch in range(num_epochs):
    print(f"\n{'='*60}")
    print(f"EPOCH {epoch+1}/{num_epochs}")
    print(f"{'='*60}")

    # Training phase
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    # Training progress bar
    train_pbar = tqdm(train_loader, desc=f"Training Epoch {epoch+1}",
                     ncols=100, colour='blue', leave=False)

    for batch_idx, (images, labels) in enumerate(train_pbar):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

        # Update progress bar with current metrics
        current_acc = 100 * correct / total
        current_loss = running_loss / (batch_idx + 1)
        train_pbar.set_postfix({
            'Loss': f'{current_loss:.4f}',
            'Acc': f'{current_acc:.2f}%'
        })

    train_acc = 100 * correct / total
    avg_loss = running_loss / len(train_loader)

    # Validation phase
    model.eval()
    val_correct, val_total = 0, 0
    val_loss = 0.0

    # Validation progress bar
    val_pbar = tqdm(valid_loader, desc=f"Validation Epoch {epoch+1}",
                   ncols=100, colour='green', leave=False)

    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(val_pbar):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

            # Update progress bar with current metrics
            current_val_acc = 100 * val_correct / val_total
            current_val_loss = val_loss / (batch_idx + 1)
            val_pbar.set_postfix({
                'Loss': f'{current_val_loss:.4f}',
                'Acc': f'{current_val_acc:.2f}%'
            })

    val_acc = 100 * val_correct / val_total
    val_loss_avg = val_loss / len(valid_loader)

    # Epoch summary
    print(f"\n📊 EPOCH {epoch+1} SUMMARY:")
    print(f"   🔵 Train - Loss: {avg_loss:.4f}, Accuracy: {train_acc:.2f}%")
    print(f"   🟢 Valid - Loss: {val_loss_avg:.4f}, Accuracy: {val_acc:.2f}%")

    # Learning rate scheduling
    old_lr = optimizer.param_groups[0]['lr']
    scheduler.step(val_acc)
    new_lr = optimizer.param_groups[0]['lr']

    if new_lr != old_lr:
        print(f"   📉 Learning rate reduced: {old_lr:.6f} → {new_lr:.6f}")

    # Early stopping and model saving
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_plant_disease_model.pth")
        patience_counter = 0
        print(f"   ⭐ NEW BEST MODEL! Validation Accuracy: {val_acc:.2f}%")
        print(f"   💾 Model saved as 'best_plant_disease_model.pth'")
    else:
        patience_counter += 1
        print(f"   ⏳ No improvement. Patience: {patience_counter}/{patience}")

    if patience_counter >= patience:
        print(f"\n🛑 Early stopping triggered after {epoch+1} epochs")
        print(f"   Best validation accuracy achieved: {best_val_acc:.2f}%")
        break

    # Add a small delay for better visualization
    time.sleep(0.5)

print(f"\n🎉 TRAINING COMPLETED!")
print(f"   🏆 Best validation accuracy: {best_val_acc:.2f}%")
print(f"   📁 Best model saved as: 'best_plant_disease_model.pth'")


EPOCH 1/5





📊 EPOCH 1 SUMMARY:
   🔵 Train - Loss: 0.9440, Accuracy: 92.71%
   🟢 Valid - Loss: 0.7621, Accuracy: 97.95%
   ⭐ NEW BEST MODEL! Validation Accuracy: 97.95%
   💾 Model saved as 'best_plant_disease_model.pth'

EPOCH 2/5





📊 EPOCH 2 SUMMARY:
   🔵 Train - Loss: 0.7814, Accuracy: 97.76%
   🟢 Valid - Loss: 0.7208, Accuracy: 98.82%
   ⭐ NEW BEST MODEL! Validation Accuracy: 98.82%
   💾 Model saved as 'best_plant_disease_model.pth'

EPOCH 3/5





📊 EPOCH 3 SUMMARY:
   🔵 Train - Loss: 0.7575, Accuracy: 98.36%
   🟢 Valid - Loss: 0.7312, Accuracy: 98.80%
   ⏳ No improvement. Patience: 1/5

EPOCH 4/5





📊 EPOCH 4 SUMMARY:
   🔵 Train - Loss: 0.7455, Accuracy: 98.67%
   🟢 Valid - Loss: 0.7249, Accuracy: 98.57%
   ⏳ No improvement. Patience: 2/5

EPOCH 5/5





📊 EPOCH 5 SUMMARY:
   🔵 Train - Loss: 0.7331, Accuracy: 99.00%
   🟢 Valid - Loss: 0.7084, Accuracy: 98.99%
   ⭐ NEW BEST MODEL! Validation Accuracy: 98.99%
   💾 Model saved as 'best_plant_disease_model.pth'

🎉 TRAINING COMPLETED!
   🏆 Best validation accuracy: 98.99%
   📁 Best model saved as: 'best_plant_disease_model.pth'


In [14]:
import torch

# Set model to evaluation mode
model.eval()
correct, total = 0, 0

# Disable gradient calculation (faster inference)
with torch.no_grad():
    for images, labels in valid_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU/CPU

        # Get model predictions
        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        # Update accuracy calculation
        correct += torch.sum(preds == labels).item()
        total += labels.size(0)

# Compute validation accuracy
valid_acc = 100.0 * correct / total
print(f" Validation Accuracy: {valid_acc:.2f}%")



 Validation Accuracy: 98.92%


In [16]:
torch.save(model.state_dict(), "potato_disease_model.pth")


In [17]:
model.load_state_dict(torch.load("potato_disease_model.pth"))


<All keys matched successfully>

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import torch.nn as nn
import torchvision.models as models

# Assuming device is already defined (e.g., device = torch.device("cuda" if torch.cuda.is_available() else "cpu"))
# Assuming valid_loader is already defined
# Assuming class_names is already defined with 38 class names

# Placeholder for class_names - REPLACE WITH YOUR ACTUAL LIST OF 38 CLASS NAMES
class_names = [f'class_{i}' for i in range(38)]


# Enhanced final evaluation with test-time augmentation
def test_time_augmentation(model, image, device, num_augmentations=5):
    """Apply test-time augmentation for better predictions"""
    model.eval()
    predictions = []

    # Original prediction
    with torch.no_grad():
        output = model(image)
        predictions.append(torch.softmax(output, dim=1))

    # Augmented predictions
    for _ in range(num_augmentations):
        # Apply random transformations
        aug_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(degrees=5),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        # Convert back to PIL and apply augmentation
        img_pil = transforms.ToPILImage()(image.squeeze(0).cpu())
        aug_img = aug_transform(img_pil).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(aug_img)
            predictions.append(torch.softmax(output, dim=1))

    # Average all predictions
    avg_prediction = torch.mean(torch.stack(predictions), dim=0)
    return avg_prediction

# Load best model and evaluate
# Re-define the model architecture to match the saved state_dict
model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.IMAGENET1K_V1) # Use weights instead of pretrained
num_features = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(num_features, 512),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, len(class_names)) # Ensure the output layer size matches the number of classes (38)
)
model = model.to(device)

# Print state dict keys and shapes for comparison (optional, but good for debugging)
print("Model state_dict keys and shapes:")
for key, value in model.state_dict().items():
    print(f"{key}: {value.shape}")

saved_state_dict = torch.load("best_plant_disease_model.pth")
print("\nSaved state_dict keys and shapes:")
for key, value in saved_state_dict.items():
    print(f"{key}: {value.shape}")


model.load_state_dict(saved_state_dict)
model.eval()

correct, total = 0, 0
all_predictions = []
all_labels = []

with torch.no_grad():
    for images, labels in valid_loader:
        images, labels = images.to(device), labels.to(device)

        # Use test-time augmentation for better accuracy
        batch_predictions = []
        for i in range(images.size(0)):
            single_image = images[i:i+1]
            avg_pred = test_time_augmentation(model, single_image, device)
            batch_predictions.append(avg_pred)

        batch_predictions = torch.cat(batch_predictions, dim=0)
        _, preds = torch.max(batch_predictions, 1)

        correct += (preds == labels).sum().item()
        total += labels.size(0)

        all_predictions.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

final_accuracy = 100.0 * correct / total
print(f"Final Test Accuracy with TTA: {final_accuracy:.2f}%")

# Classification report
from sklearn.metrics import classification_report, confusion_matrix
print("\nDetailed Classification Report:")
print(classification_report(all_labels, all_predictions, target_names=class_names))

Model state_dict keys and shapes:
features.0.0.weight: torch.Size([40, 3, 3, 3])
features.0.1.weight: torch.Size([40])
features.0.1.bias: torch.Size([40])
features.0.1.running_mean: torch.Size([40])
features.0.1.running_var: torch.Size([40])
features.0.1.num_batches_tracked: torch.Size([])
features.1.0.block.0.0.weight: torch.Size([40, 1, 3, 3])
features.1.0.block.0.1.weight: torch.Size([40])
features.1.0.block.0.1.bias: torch.Size([40])
features.1.0.block.0.1.running_mean: torch.Size([40])
features.1.0.block.0.1.running_var: torch.Size([40])
features.1.0.block.0.1.num_batches_tracked: torch.Size([])
features.1.0.block.1.fc1.weight: torch.Size([10, 40, 1, 1])
features.1.0.block.1.fc1.bias: torch.Size([10])
features.1.0.block.1.fc2.weight: torch.Size([40, 10, 1, 1])
features.1.0.block.1.fc2.bias: torch.Size([40])
features.1.0.block.2.0.weight: torch.Size([24, 40, 1, 1])
features.1.0.block.2.1.weight: torch.Size([24])
features.1.0.block.2.1.bias: torch.Size([24])
features.1.0.block.2.1.r

In [41]:
import torch
import torchvision.transforms as transforms
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


model = models.efficientnet_b0(pretrained=True)


num_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_features, 2)  # 2 classes: Diseased, Healthy

model = model.to(device)
model.eval()  # Set model to evaluation mode

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match EfficientNet input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

image_path = input("Enter image path: ")
image = Image.open(image_path).convert("RGB")  # Convert to RGB
image = transform(image).unsqueeze(0).to(device)  # Add batch dimension

# Make prediction
with torch.no_grad():
    output = model(image)
    predicted_class = torch.argmax(output, dim=1).item()

class_names = ["Healthy", "Diseased"]
print(f"Prediction: {class_names[predicted_class]}")



Enter image path: /content/disease.jpg
Prediction: Healthy
