# Potato Disease Classification

This notebook is the tutorial clone and has been updated to use this project's folder layout:
- Data source: `data/PlantVillage`
- Split output: `training/dataset/{train,val,test}`
- Saved artifacts (model + class names): `saved_models/`

Note: This notebook has been adapted to save the trained H5 model and class names into the project's `saved_models` directory.

In [None]:
# Project-specific paths and setup
from pathlib import Path
import json

# Root of the repository (adjust if your workspace is different)
ROOT = Path(r"C:/Learn Programming/Machine Learning/potato_disease")
DATA_ROOT = ROOT / 'data' / 'PlantVillage'  # original dataset location
SPLIT_DIR = ROOT / 'training' / 'dataset'   # where train/val/test will be created/expected
SAVED_MODELS = ROOT / 'saved_models'
SAVED_MODELS.mkdir(parents=True, exist_ok=True)
SAVED_H5 = SAVED_MODELS / 'potato_disease_model.h5'
CLASS_NAMES_PATH = SAVED_MODELS / 'class_names.json'

print('ROOT:', ROOT)
print('DATA_ROOT:', DATA_ROOT)
print('SPLIT_DIR:', SPLIT_DIR)
print('SAVED_H5:', SAVED_H5)
print('CLASS_NAMES_PATH:', CLASS_NAMES_PATH)

: 

If you haven't already split the PlantVillage dataset into train/val/test folders, the cell below will use `splitfolders` to create `training/dataset/train`, `training/dataset/val`, and `training/dataset/test`. This is optional if you already have those folders.

In [None]:
# Optional: split the dataset into train/val/test under `training/dataset`
# Requires: pip install split-folders
try:
    import splitfolders
except Exception as e:
    print('splitfolders not installed. Run: pip install split-folders')

if not SPLIT_DIR.exists():
    print('Splitting dataset from', DATA_ROOT, 'into', SPLIT_DIR)
    # Convert to strings for splitfolders API
    splitfolders.ratio(str(DATA_ROOT), output=str(SPLIT_DIR), seed=1337, ratio=(0.8, 0.1, 0.1))
else:
    print('Split dataset already exists at', SPLIT_DIR)

Dataset credits: https://www.kaggle.com/arjuntejaswi/plant-village

### Import all the Dependencies

In [None]:
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
from IPython.display import HTML

### Import data into tensorflow dataset object

Used splitfolders tool to split dataset into training, validation and test directories.

$ pip install split-folders

$ splitfolders --ratio 0.8 0.1 0.1 -- ./training/PlantVillage/


In [None]:
# Image size and channels
IMAGE_SIZE = 256
CHANNELS = 3

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
        str(SPLIT_DIR / 'train'),
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        batch_size=32,
        class_mode="sparse",
)

In [None]:
train_generator.class_indices

In [None]:
# Save class names so the API can map predictions back to labels
import json
class_names = list(train_generator.class_indices.keys())
CLASS_NAMES_PATH.write_text(json.dumps(class_names))
print('Saved class names to', CLASS_NAMES_PATH)

In [None]:
class_names = list(train_generator.class_indices.keys())
class_names

In [None]:
count=0
for image_batch, label_batch in train_generator:
#     print(label_batch)
    print(image_batch[0])
    break
#     count+=1
#     if count>2:
#         break

In [None]:
validation_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        horizontal_flip=True)
validation_generator = validation_datagen.flow_from_directory(
        str(SPLIT_DIR / 'val'),
        target_size=(IMAGE_SIZE,IMAGE_SIZE),
        batch_size=32,
        class_mode="sparse"
)

In [None]:
test_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        horizontal_flip=True)

test_generator = test_datagen.flow_from_directory(
        str(SPLIT_DIR / 'test'),
        target_size=(IMAGE_SIZE,IMAGE_SIZE),
        batch_size=32,
        class_mode="sparse"
)

In [None]:
for image_batch, label_batch in test_generator:
    print(image_batch[0])
    break

## Building the Model

In [None]:
# Build a simple CNN model
input_shape = (IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = len(class_names) if 'class_names' in globals() else 3

model = models.Sequential([
    layers.InputLayer(input_shape=input_shape),
    layers.Conv2D(32, kernel_size=(3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, kernel_size=(3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, kernel_size=(3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(n_classes, activation='softmax'),
])

In [None]:
model.summary()

### Compiling the Model
We use `adam` Optimizer, `SparseCategoricalCrossentropy` for losses, `accuracy` as a metric

In [None]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

In [None]:
# Dataset sample counts (computed)
print('Train samples:', getattr(train_generator, 'samples', 'unknown'))
print('Validation samples:', getattr(validation_generator, 'samples', 'unknown'))
print('Test samples:', getattr(test_generator, 'samples', 'unknown'))

In [None]:
# Helpful info: batch size and steps (optional)
print('Train batch_size:', train_generator.batch_size if hasattr(train_generator, 'batch_size') else 'unknown')
print('Validation batch_size:', validation_generator.batch_size if hasattr(validation_generator, 'batch_size') else 'unknown')

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=47,
    batch_size=32,
    validation_data=validation_generator,
    validation_steps=6,
    verbose=1,
    epochs=20,
)

In [None]:
scores = model.evaluate(test_generator)

In [None]:
scores

Scores is just a list containing loss and accuracy value

### Plotting the Accuracy and Loss Curves

In [None]:
history

You can read documentation on history object here: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History

In [None]:
history.params

In [None]:
history.history.keys()

**loss, accuracy, val loss etc are a python list containing values of loss, accuracy etc at the end of each epoch**

In [None]:
type(history.history['loss'])

In [None]:
len(history.history['loss'])

In [None]:
history.history['loss'][:5] # show loss for first 5 epochs

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
val_acc

In [None]:
acc

In [None]:
EPOCHS = 20

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(range(EPOCHS), acc, label='Training Accuracy')
plt.plot(range(EPOCHS), val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(range(EPOCHS), loss, label='Training Loss')
plt.plot(range(EPOCHS), val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

### Run prediction on a sample image

In [None]:
import numpy as np

for image_batch, label_batch in test_generator:
    first_image = image_batch[0]
    first_label = int(label_batch[0])
    
    print('first image to predict')
    plt.imshow(first_image)
    print('actual label:', class_names[first_label])
    
    batch_prediction = model.predict(np.expand_dims(first_image, axis=0))
    print('predicted label:', class_names[np.argmax(batch_prediction[0])])
    
    break

### Write a function for inference

In [None]:
import numpy as np
def predict(model, img):
    # img is a numpy array scaled between 0 and 1 (as produced by ImageDataGenerator)
    img_array = np.expand_dims(img, axis=0)
    predictions = model.predict(img_array)
    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

**Now run inference on few sample images**

In [None]:
plt.figure(figsize=(15, 15))
for images, labels in test_generator:
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i])
        
        predicted_class, confidence = predict(model, images[i])
        actual_class = class_names[int(labels[i])] 
        
        plt.title(f"Actual: {actual_class},\n Predicted: {predicted_class}.\n Confidence: {confidence}%")
        
        plt.axis("off")
    break

### Saving the Model

Save model in h5 format so that there is just one file and we can upload that to GCP conveniently

In [None]:
# Save model to the project's saved_models so the API can load it
model.save(str(SAVED_H5))
print('Saved model to', SAVED_H5)