<a href="https://colab.research.google.com/github/MeghP89/Plant_Disease_Predictor_CNN/blob/main/Plant_Disease_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

plant_pathology_2020_fgvc7_path = kagglehub.competition_download('plant-pathology-2020-fgvc7')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from torch.utils.data import Dataset, DataLoader, TensorDataset
from PIL import Image
import torch
from torchvision import transforms

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [None]:
train_data = pd.read_csv('../input/plant-pathology-2020-fgvc7/train.csv')

In [None]:
train_data.head()

In [None]:
train_data.shape

In [None]:
train_data.columns

In [None]:
train_data.info()

In [None]:
train_data.iloc[2, 0]

In [None]:
train_data.isnull().sum()  # how many missing values per column

In [None]:
def transform_img(img_path):
    img = Image.open(f"../input/plant-pathology-2020-fgvc7/images/{img_path}.jpg").convert("RGB")
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    img_tensor = transform(img)
    return img_tensor

In [None]:
train_data["image_tensor"] = train_data["image_id"].apply(transform_img)

In [None]:
train_data.to_csv("/kaggle/working/processed_train.csv")

In [None]:
train_data.head()

In [None]:
train_data.pop('image_id')

In [None]:
class MultiLabelImageDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]                      # get row by position
        image = row['image_tensor'].clone().detach().float()
        labels = row[['healthy','multiple_diseases','rust','scab']].to_numpy(dtype=np.float32)
        labels = torch.tensor(labels, dtype=torch.float32)
        return image, labels


In [None]:
train_dataset = MultiLabelImageDataset(train_data)

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)

In [None]:
images, labels = next(iter(train_loader))
print(images.shape)  # should be (4, 3, H, W)
print(labels.shape)  # should be (4, 4)

In [None]:
class MultiLabelCNN(nn.Module):
    def __init__(self, num_labels=4):
        super(MultiLabelCNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(64*28*28, 128)
        self.fc2 = nn.Linear(128, num_labels)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = x.view(x.size(0), -1)

        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))

        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model = MultiLabelCNN(num_labels=4).to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 7

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images = images.to(device, dtype=torch.float32)
        labels = labels.to(device, dtype=torch.float32)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

In [None]:
test_data = pd.read_csv('../input/plant-pathology-2020-fgvc7/test.csv')

In [None]:
test_data["image_tensor"] = test_data["image_id"].apply(transform_img)

In [None]:
test_data.to_csv("/kaggle/working/processed_test.csv")

In [None]:
test_data.pop("image_id")

In [None]:
class TestImageDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = row['image_tensor'].clone().detach().float()
        return image

In [None]:
test_dataset = TestImageDataset(test_data)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=0
)

In [None]:
all_outputs = []

model.eval()
with torch.no_grad():
    for images in test_loader:
        images = images.to(device, dtype=torch.float32)
        outputs = model(images)
        all_outputs.append(outputs)

all_outputs_tensor = torch.cat(all_outputs, dim=0)

# Example access
print(all_outputs_tensor[0])
print(all_outputs_tensor[0, 2])

all_outputs_np = all_outputs_tensor.cpu().numpy()
print(all_outputs_np[0])


In [None]:
pred_labels = (all_outputs_tensor > 0.5).int()
print(pred_labels)  # tensor([0, 0, 1, 0])

In [None]:
pred_labels.shape

In [None]:
# Suppose your model is called 'model'
torch.save(model.state_dict(), "/kaggle/working/Crop_Disease_Classifier.pth")
