# Bean Disease Image Classification

This tutorial is adapted from the one [available at meritocracy.is](https://meritocracy.is/blog/2020/06/24/object-classification-in-tensorflow/).

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

print(f"TensorFlow version: {tf.__version__}")
print(f"TensorFlow-Datasets version: {tfds.__version__}")
n_gpus = len(tf.config.experimental.list_physical_devices('GPU'))
print(f"Number of available GPUs: {n_gpus}")

In [None]:
(train_data, val_data, test_data), info = tfds.load(
    'beans',
    split=['train', 'validation', 'test'],
    as_supervised=True,
    with_info=True)

In [None]:
sample_images = [i for i, _ in train_data.take(4)]
sample_labels = tfds.as_numpy([l for _, l in train_data.take(4)])
label_dict = {0: 'angular leaf spot', 1: 'bean rust', 2: 'healthy'}

fig, axs = plt.subplots(1,4, figsize=(12,4))
for i in range(4):
    axs[i].imshow(sample_images[i])
    axs[i].set_xlabel(label_dict[sample_labels[i]])
plt.tight_layout()
fig.show();

### Preprocess data set

In [None]:
height = 500
width = 500
n_channels = 3
n_classes = 3

batch_size = 32

def normalize_image(image, label, target_height = 500, target_width = 500):
    """
    Normalizes image from uint8 to float32 and resizes images by keeping
    the aspect ratio the same without distortion.
    """
    image = tf.cast(image, tf.float32)/255.
    image = tf.image.resize_with_crop_or_pad(image, target_height, target_width)
    return image, label

In [None]:
train_data = train_data.map(normalize_image, num_parallel_calls = tf.data.experimental.AUTOTUNE)
train_data = train_data.cache()
train_data = train_data.shuffle(info.splits['train'].num_examples)
train_data = train_data.batch(batch_size)
train_data = train_data.prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
val_data = val_data.map(normalize_image, num_parallel_calls = tf.data.experimental.AUTOTUNE)
val_data = val_data.batch(batch_size)
val_data = val_data.cache()
val_data = val_data.prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
test_data = test_data.map(normalize_image, num_parallel_calls = tf.data.experimental.AUTOTUNE)
test_data = test_data.batch(batch_size)
test_data = test_data.cache()
test_data = test_data.prefetch(tf.data.experimental.AUTOTUNE)

### Build classifier

In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

# hyperparameters
n_filters = 15
filter_size = 10
pool_size = 4
strides = 2
fc_output = 128
drop_prob = 0.25
lr = 0.001

In [None]:
model = tf.keras.models.Sequential([
    Conv2D(n_filters, filter_size, input_shape=(height, width, 3), strides=strides, padding='same', activation='relu'),
    MaxPooling2D(pool_size = pool_size),
    Dropout(drop_prob),
    Conv2D(n_filters, filter_size, strides = strides, padding = 'same', activation = 'relu'),
    MaxPooling2D(pool_size = pool_size),
    Dropout(drop_prob),
    Flatten(),
    Dense(fc_output, activation = 'relu'),
    Dense(n_classes, activation = 'softmax'),
])

model.summary()

In [None]:
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(lr),
    metrics=['accuracy']
)

This tutorial only trains the model for five epochs for the sake of simplicity. To achieve higher accuracy, train the model for more epochs and consider upgrading your Galileo account to include GPU capabilities.

In [None]:
n_epochs = 5

start = time.time()
history = model.fit(
    train_data,
    epochs=n_epochs,
    validation_data=val_data
)
end = time.time()

In [None]:
elapsed = round((end - start)/60)
print(f"Training time: {elapsed} minutes")

### Visualization training history

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = [i + 1 for i in range(n_epochs)]

plt.figure(figsize=(12,6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label="Training Accuracy")
plt.plot(epochs_range, val_acc, label="Validation Accuracy")
plt.legend(loc='lower right')
plt.title("Training and Validation Accuracy")

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.savefig("training_validation_history.png")
plt.show()

### Evaluate model performance

In [None]:
test_loss, test_acc = model.evaluate(test_data)
print(f"Final test accuracy: {test_acc}")