In [None]:
# Week 1 - Data Processing for Crop Disease Detection

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import cv2
from sklearn.model_selection import train_test_split

# Path to dataset (update this after downloading from Kaggle)
data_dir = "PlantVillage"   # Example: "./dataset/PlantVillage"

# List categories (folder names are labels)
categories = os.listdir(data_dir)
print("Classes found:", categories)

# Parameters
img_size = 128   # resize images to 128x128
X = []
y = []

# Load dataset (only first 200 per class for demo)
for idx, category in enumerate(categories):
    folder = os.path.join(data_dir, category)
    for img in os.listdir(folder)[:200]:  
        img_path = os.path.join(folder, img)
        try:
            img_array = cv2.imread(img_path)
            img_array = cv2.resize(img_array, (img_size, img_size))
            X.append(img_array)
            y.append(idx)
        except Exception as e:
            pass

# Convert to numpy arrays
X = np.array(X) / 255.0   # normalize pixel values (0-1)
y = np.array(y)

print("Dataset Shape:", X.shape, y.shape)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training data:", X_train.shape)
print("Testing data:", X_test.shape)

# Plot class distribution
plt.figure(figsize=(8,5))
sns.countplot(x=y)
plt.title("Class Distribution")
plt.xlabel("Class Index")
plt.ylabel("Count")
plt.show()

# Plot sample images
plt.figure(figsize=(10,5))
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.imshow(X_train[i])
    plt.title(categories[y_train[i]])
    plt.axis('off')
plt.show()


Classes found: ['Pepper__bell___Bacterial_spot', 'Pepper__bell___healthy', 'PlantVillage', 'Potato___Early_blight', 'Potato___healthy', 'Potato___Late_blight', 'Tomato_Bacterial_spot', 'Tomato_Early_blight', 'Tomato_healthy', 'Tomato_Late_blight', 'Tomato_Leaf_Mold', 'Tomato_Septoria_leaf_spot', 'Tomato_Spider_mites_Two_spotted_spider_mite', 'Tomato__Target_Spot', 'Tomato__Tomato_mosaic_virus', 'Tomato__Tomato_YellowLeaf__Curl_Virus']
