# Optimized VGG16 Fine-tuning for Plant Disease Classification

This notebook implements an optimized approach for fine-tuning VGG16 on a hierarchical plant disease classification task.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard
from tensorflow.keras.metrics import Precision, Recall
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


np.random.seed(42)
tf.random.set_seed(42)

ModuleNotFoundError: No module named 'pandas'

## 1. Data Loading and Preprocessing

In [2]:
#Load data
df_train = pd.read_csv("Train.csv")
df_valid = pd.read_csv("Valid.csv")

In [3]:
df_train['plant'].value_counts()

plant
Tomato                     18345
Apple                       7771
Corn_(maize)                7316
Grape                       7222
Potato                      5702
Pepper,_bell                3901
Strawberry                  3598
Peach                       3566
Cherry_(including_sour)     3509
Soybean                     2022
Orange                      2010
Blueberry                   1816
Raspberry                   1781
Squash                      1736
Name: count, dtype: int64

In [4]:
plants = sorted(df_train['plant'].unique())
num_plants = len(plants)

diseases_per_plant = {}
for plant in plants:
    diseases = sorted(df_train[df_train['plant'] == plant]['disease'].unique())
    diseases_per_plant[plant] = diseases
    print(f"{plant} has {len(diseases)} diseases: {diseases}")


max_diseases = max(len(diseases) for diseases in diseases_per_plant.values())

Apple has 4 diseases: ['Apple_scab', 'Black_rot', 'Cedar_apple_rust', 'healthy']
Blueberry has 1 diseases: ['healthy']
Cherry_(including_sour) has 2 diseases: ['Powdery_mildew', 'healthy']
Corn_(maize) has 4 diseases: ['Cercospora_leaf_spot Gray_leaf_spot', 'Common_rust_', 'Northern_Leaf_Blight', 'healthy']
Grape has 4 diseases: ['Black_rot', 'Esca_(Black_Measles)', 'Leaf_blight_(Isariopsis_Leaf_Spot)', 'healthy']
Orange has 1 diseases: ['Haunglongbing_(Citrus_greening)']
Peach has 2 diseases: ['Bacterial_spot', 'healthy']
Pepper,_bell has 2 diseases: ['Bacterial_spot', 'healthy']
Potato has 3 diseases: ['Early_blight', 'Late_blight', 'healthy']
Raspberry has 1 diseases: ['healthy']
Soybean has 1 diseases: ['healthy']
Squash has 1 diseases: ['Powdery_mildew']
Strawberry has 2 diseases: ['Leaf_scorch', 'healthy']
Tomato has 10 diseases: ['Bacterial_spot', 'Early_blight', 'Late_blight', 'Leaf_Mold', 'Septoria_leaf_spot', 'Spider_mites Two-spotted_spider_mite', 'Target_Spot', 'Tomato_Yell