## Potat Disease Classification

#### Import all the Dependencies

In [1]:
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
from IPython.display import HTML
import scipy

In [2]:
print(tf.__version__)

2.12.0


## Import data into tensorflow dataset object

#### Used splitfolders tool to split dataset into training, validation and test directories.
#### $ pip install split-folders

#### $ splitfolders --ratio 0.8 0.1 0.1 -- ./training/PlantVillage/

In [3]:
IMAGE_SIZE = 256
CHANNELS = 3

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
    '../dataset/train',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=32,
    class_mode="sparse",
 #         save_to_dir="C:\\Code\\potato-disease-classification\\training\\AugmentedImages"
)

Found 1506 images belonging to 3 classes.


In [5]:
train_generator.class_indices

{'Potato___Early_blight': 0, 'Potato___Late_blight': 1, 'Potato___healthy': 2}

In [6]:
class_names = list(train_generator.class_indices.keys())
class_names

['Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy']

In [7]:
for image_batch, label_batch in train_generator:
    print(image_batch[0])
    break


[[[0.49095592 0.48703435 0.5497795 ]
  [0.4815682  0.4745091  0.53882295]
  [0.49630326 0.48453856 0.5512052 ]
  ...
  [0.6498323  0.64591074 0.716499  ]
  [0.65321887 0.6492973  0.7198855 ]
  [0.6566054  0.65268385 0.7232721 ]]

 [[0.49157164 0.48765007 0.5503952 ]
  [0.48249182 0.47604844 0.54005444]
  [0.4919931  0.4802284  0.5468951 ]
  ...
  [0.6785408  0.67461926 0.7452075 ]
  [0.67730933 0.67338777 0.743976  ]
  [0.67607784 0.6721563  0.7427445 ]]

 [[0.49218738 0.4882658  0.55101097]
  [0.48341542 0.47758776 0.5412859 ]
  [0.48768294 0.4759182  0.5425849 ]
  ...
  [0.6591392  0.65521765 0.7258059 ]
  [0.6566763  0.6527547  0.72334296]
  [0.6542133  0.65029174 0.72088   ]]

 ...

 [[0.6256553  0.6217337  0.69232196]
  [0.61980575 0.6158842  0.6864724 ]
  [0.6139563  0.6100347  0.68062294]
  ...
  [0.672628   0.67654955 0.7471378 ]
  [0.69232106 0.69624263 0.76683086]
  [0.71282136 0.71674293 0.78733116]]

 [[0.60078406 0.5968625  0.6674507 ]
  [0.6013998  0.5974782  0.66806644]


In [8]:
validation_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    horizontal_flip=True
)
validation_generator = validation_datagen.flow_from_directory(
    '../dataset/val',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=32,
    class_mode="sparse"
)

Found 215 images belonging to 3 classes.


In [9]:
test_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    horizontal_flip=True
)
test_generator = test_datagen.flow_from_directory(
    '../dataset/test',
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=32,
    class_mode="sparse"
)

Found 431 images belonging to 3 classes.


In [10]:
for image_batch, label_batch in test_generator:
    print(image_batch[0])
    break

[[[0.45095715 0.38429046 0.4078199 ]
  [0.51567453 0.44900784 0.47253725]
  [0.5274629  0.46079627 0.48432568]
  ...
  [0.43875483 0.3799313  0.39953914]
  [0.4644276  0.40560406 0.4252119 ]
  [0.43411535 0.37529182 0.39489967]]

 [[0.44334006 0.3766734  0.4002028 ]
  [0.46898383 0.40231714 0.42584655]
  [0.46686476 0.4001981  0.4237275 ]
  ...
  [0.44275582 0.3839323  0.40354013]
  [0.4585303  0.39970678 0.41931462]
  [0.4351016  0.37627807 0.3958859 ]]

 [[0.4570543  0.39038762 0.41391703]
  [0.447639   0.38097233 0.40450174]
  [0.43001533 0.36334866 0.38687807]
  ...
  [0.4767833  0.41795978 0.43756762]
  [0.4620002  0.40317667 0.4227845 ]
  [0.42333692 0.3645134  0.38412124]]

 ...

 [[0.70999646 0.6707808  0.67470235]
  [0.7176904  0.6784747  0.6823963 ]
  [0.72596097 0.6867453  0.69066685]
  ...
  [0.67556465 0.6324274  0.6559568 ]
  [0.6672246  0.62408733 0.64761674]
  [0.67152065 0.6283834  0.6519128 ]]

 [[0.7084326  0.6692169  0.67313844]
  [0.7144625  0.67524683 0.6791684 ]


## Building the model

In [11]:
input_shape = (IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = 3

model = models.Sequential([
    layers.InputLayer(input_shape=input_shape),
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(n_classes, activation='softmax'),
])

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 127, 127, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 62, 62, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 60, 60, 64)        36928     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 30, 30, 64)       0

### Compiling the Model

#### We use adam Optimizer, SparseCategoricalCrossentropy for losses, accuracy as a metric

In [13]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

In [14]:
1506/32

47.0625

In [15]:
215/32

6.71875

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=47,
    batch_size=32,
    validation_data=validation_generator,
    validation_steps=6,
    verbose=1,
    epochs=20
)

Epoch 1/20

In [None]:
scores = model.evaluate(test_generator)

In [None]:
# Scores is just a list containing loss and accuracy value
scores

### Plotting the Accuracy and Loss Curves

In [None]:
history

#### You can read documentation on history object here: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/History

In [None]:
history.params

In [None]:
history.history.keys()

In [None]:
len(history.history['loss'])

In [None]:
history.history['loss'][:5] # show loss for first 5 epochs

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
acc

In [None]:
val_acc

In [None]:
EPOCHS = 20

plt.figure(figsize=(10, 6))
plt.subplot(1, 2, 1)
plt.plot(range(EPOCHS), acc, label='Training Accuracy')
plt.plot(range(EPOCHS), val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')


plt.subplot(1, 2, 2)
plt.plot(range(EPOCHS), loss, label='Training Loss')
plt.plot(range(EPOCHS), val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

### Run prediction on sample image

In [None]:
import numpy as np

for image_batch, label_batch in test_generator:
    first_image = image_batch[0]
    first_label = label_batch[0]

    print('first image to predict')
    plt.imshow(first_image)
    print(f'actual label: {class_names[int(first_label)]}')

    batch_prediction = model.predict(image_batch)
    print(f'predicted label: {class_names[np.argmax(batch_prediction[0])]}')

    break

### Write a function for inference

In [None]:
def predict(model, img):
    img_array = tf.keras.preprocessing.image.img_to_array(images[i])
    img_array = tf.expand_dims(img_array, 0)

    predictions = model.predict(img_array)

    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

#### Now run inference on few sample images

In [None]:
plt.figure(figsize=(15, 15))
for images, labels in test_generator:
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i])

        predicted_class, confidence = predict(model, images[i])
        actual_class = class_names[int(labels[i])]

        plt.title(f"Actual: {actual_class} \n predicted:{predicted_class} \n confidence: {confidence}")
        plt.axis("off")

    break

## Saving the Model
#### Save model in h5 format so that there is just one file and we can upload that to GCP conveniently

In [None]:
model.save("../models/potatoes.h5")