In [1]:
from google.colab import files

# Upload kaggle.json (downloaded from your Kaggle account)
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"finalyears","key":"641083736e668c4375a0e70bd4c5b8af"}'}

In [2]:
import os
import zipfile

# Make kaggle directory and move the uploaded kaggle.json there
os.makedirs('/root/.kaggle', exist_ok=True)
!mv kaggle.json /root/.kaggle/

# Give proper permissions
!chmod 600 /root/.kaggle/kaggle.json

In [3]:
!kaggle datasets download -d vipoooool/new-plant-diseases-dataset

Dataset URL: https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset
License(s): copyright-authors
Downloading new-plant-diseases-dataset.zip to /content
 99% 2.68G/2.70G [00:14<00:00, 194MB/s]
100% 2.70G/2.70G [00:14<00:00, 199MB/s]


In [4]:
with zipfile.ZipFile('new-plant-diseases-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('plant_disease_dataset')

In [5]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
import os

In [6]:
train_dir = '/content/plant_disease_dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train'
valid_dir = '/content/plant_disease_dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid'

# Transforms
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

valid_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load Data
train_data = datasets.ImageFolder(train_dir, transform=train_transforms)
valid_data = datasets.ImageFolder(valid_dir, transform=valid_transforms)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=32, shuffle=False)

# Class names
class_names = train_data.classes
print(f"Number of classes: {len(class_names)}")

Number of classes: 38


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

# Replace classifier
num_features = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, len(class_names)),
    nn.LogSoftmax(dim=1)
)

model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 194MB/s]


In [8]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

epochs = 5
for epoch in range(epochs):
    model.train()
    running_loss = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        logps = model(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validation
    model.eval()
    val_loss = 0
    accuracy = 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            logps = model(inputs)
            val_loss += criterion(logps, labels).item()

            ps = torch.exp(logps)
            top_p, top_class = ps.topk(1, dim=1)
            equals = top_class == labels.view(*top_class.shape)
            accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

    print(f"Epoch {epoch+1}/{epochs}.. "
          f"Train loss: {running_loss/len(train_loader):.3f}.. "
          f"Validation loss: {val_loss/len(valid_loader):.3f}.. "
          f"Validation accuracy: {accuracy/len(valid_loader):.3f}")

Epoch 1/5.. Train loss: 0.627.. Validation loss: 0.207.. Validation accuracy: 0.932
Epoch 2/5.. Train loss: 0.330.. Validation loss: 0.154.. Validation accuracy: 0.949
Epoch 3/5.. Train loss: 0.292.. Validation loss: 0.151.. Validation accuracy: 0.948
Epoch 4/5.. Train loss: 0.267.. Validation loss: 0.126.. Validation accuracy: 0.957
Epoch 5/5.. Train loss: 0.256.. Validation loss: 0.132.. Validation accuracy: 0.955


In [9]:
torch.save(model.state_dict(), 'plant_disease_model.pth')

In [10]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblib

# Function to extract features using ResNet18 (without final layer)
def extract_features(data_loader):
    feature_model = models.resnet18(pretrained=True)
    feature_model.fc = nn.Identity()  # remove final classifier
    feature_model = feature_model.to(device)
    feature_model.eval()

    features = []
    labels = []

    with torch.no_grad():
        for inputs, label in data_loader:
            inputs = inputs.to(device)
            feature_vector = feature_model(inputs)
            features.append(feature_vector.cpu().numpy())
            labels.append(label.numpy())

    features = np.concatenate(features)
    labels = np.concatenate(labels)
    return features, labels

In [11]:
train_features, train_labels = extract_features(train_loader)
valid_features, valid_labels = extract_features(valid_loader)

rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(train_features, train_labels)

# Save the Random Forest model
joblib.dump(rf_model, 'RandomForest.pkl')



['RandomForest.pkl']

In [15]:
from PIL import Image
def predict_with_cnn(image_path, model_path):
    image = Image.open(image_path)
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
    image = transform(image).unsqueeze(0).to(device)

    model = models.resnet18(pretrained=True)
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(256, len(class_names)),
        nn.LogSoftmax(dim=1)
    )
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model.eval()

    with torch.no_grad():
        logps = model(image)
        ps = torch.exp(logps)
        top_p, top_class = ps.topk(1, dim=1)

    predicted_class = class_names[top_class]
    return predicted_class

In [16]:
def predict_with_rf(image_path, rf_model_path):
    from PIL import Image

    image = Image.open(image_path)
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
    image = transform(image).unsqueeze(0).to(device)

    feature_model = models.resnet18(pretrained=True)
    feature_model.fc = nn.Identity()  # Remove final classifier
    feature_model = feature_model.to(device)
    feature_model.eval()

    with torch.no_grad():
        features = feature_model(image).cpu().numpy()

    rf_model = joblib.load(rf_model_path)
    prediction = rf_model.predict(features)
    predicted_class = class_names[prediction[0]]
    return predicted_class

In [18]:
print(predict_with_cnn('/content/plant_disease_dataset/new plant diseases dataset(augmented)/New Plant Diseases Dataset(Augmented)/train/Blueberry___healthy/00fee259-67b7-4dd7-8b36-12503bbdba14___RS_HL 2681_180deg.JPG', 'plant_disease_model.pth'))
print(predict_with_rf('/content/plant_disease_dataset/new plant diseases dataset(augmented)/New Plant Diseases Dataset(Augmented)/train/Blueberry___healthy/00fee259-67b7-4dd7-8b36-12503bbdba14___RS_HL 2681_180deg.JPG', 'RandomForest.pkl'))

  model.load_state_dict(torch.load(model_path))


Blueberry___healthy
Blueberry___healthy
