### Objectives
---
**Task**
* to predict different types of rice with Convolutional Neural Network algorithm
* to implement transfer learning with pre-trained model, MobileNetV2

**Result**
* achieved 99% in test set
* improved the remaining 1% by fine-tuning the model

### Libraries
---

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.random import set_seed
from tensorflow.keras.utils import load_img, img_to_array

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import classification_report

### Exploratory Data Analysis
---

In [None]:
PTH = "/kaggle/input/rice-image-dataset/Rice_Image_Dataset/"

for pth, _, files in os.walk(PTH):
    n_img = sum(1 for file in files if file.endswith(".jpg"))
    if n_img:
        print(f"{n_img:,} images are found in '{pth}'.")

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>images are well-balanced per class</li>
        <li>each rice type has 15,000 images and data augmentation is more than enough</li>
    </ul>
</div>

In [None]:
BATCH_SIZE = 32
IMG_SIZE = (224, 224)
SEED = 42

img_gen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
)

data_train = img_gen.flow_from_directory(
    directory=PTH,
    batch_size=BATCH_SIZE,
    target_size=IMG_SIZE,
    class_mode="categorical",
    subset="training",
    seed=SEED,
)

data_test = img_gen.flow_from_directory(
    directory=PTH,
    batch_size=BATCH_SIZE,
    target_size=IMG_SIZE,
    class_mode="categorical",
    subset="validation",
    shuffle=False,
    seed=SEED,
)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>8:2 split for train and test set</li>
        <li>omit devement set for simplicity</li>
    </ul>
</div>

In [None]:
def view_random_images(data, title, nr=2, nc=3, seed=SEED):
    set_seed(seed)
    
    img, lbl = data.next()
    y = list(data.class_indices.keys())
    smp = np.random.choice(range(len(lbl)), nr*nc, False)
    
    plt.figure(figsize=(6, 3))
    for pos, idx in enumerate(smp):
        yi = y[np.argmax(lbl[idx])]
        title_sub = f"Label: {yi}"
        ax = plt.subplot(nr, nc, pos+1)
        plt.imshow(img[idx], cmap="binary")
        plt.title(title_sub, fontsize=8)
        plt.axis("off")
    
    plt.suptitle(title, fontsize=12)
    plt.tight_layout()
    
    return None

In [None]:
view_random_images(data_train, "Sample Images from Train Set")

In [None]:
view_random_images(data_test, "Sample Images from Test Set")

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>not an expert in rice varieties, but the shape is clearly different for each other</li>
        <li>grain of rice per image make learning much easier without doubt</li>
    </ul>
</div>

### Model Implementation
---

In [None]:
IMG_SHAPE = IMG_SIZE + (3,)
N_CLASSES = data_train.num_classes

base_model = MobileNetV2(
    input_shape=IMG_SHAPE,
    include_top=False,
)
base_model.trainable = False

inputs = Input(shape=IMG_SHAPE)
x = base_model(inputs)
x = GlobalAveragePooling2D()(x)
x = Dropout(rate=0.2)(x)
outputs = Dense(units=N_CLASSES, activation="softmax")(x)

model = Model(inputs=inputs, outputs=outputs)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>apply transfer learning with MobileNetV2</li>
        <li>add drop out layer at the end to prevent overfit</li>
    </ul>
</div>

In [None]:
set_seed(SEED)
LEARNING_RATE = 0.001
EPOCHS = 3

model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

history = model.fit(
    data_train,
    epochs=EPOCHS,
    steps_per_epoch=len(data_train),
    validation_data=data_test,
    validation_steps=len(data_test),
)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>due to limited computing resource, only train for 3 epochs</li>
        <li>high quality images indeed helps learning easier</li>
    </ul>
</div>

### Model Evaluation
---

In [None]:
def plot_loss_curves(hist, init_epochs=0):
    h_line = [init_epochs-1, init_epochs-1]
    
    plt.figure(figsize=(6, 6))
    plt.subplot(2, 1, 1)
    plt.plot(hist["loss"], label="Loss (train)")
    plt.plot(hist["val_loss"], label="Loss (test)")
    if init_epochs:
        plt.plot(h_line, [0, max(plt.ylim())], label='Start fine-tuning')
    plt.legend(loc="upper right")
    plt.ylabel("Cross Entropy")
    plt.ylim([0, max(plt.ylim())])
    plt.title("Loss History")
    
    plt.figure(figsize=(6, 6))
    plt.subplot(2, 1, 2)
    plt.plot(hist["accuracy"], label="Accuracy (train)")
    plt.plot(hist["val_accuracy"], label="Accuracy (test)")
    if init_epochs:
        plt.plot(h_line, [min(plt.ylim()), 1], label='Start fine-tuning')
    plt.legend(loc="lower right")
    plt.ylabel("Accuracy")
    plt.ylim([min(plt.ylim()), 1])
    plt.xlabel("Epoch")
    plt.title("Accuracy History")
    
    return None

In [None]:
plot_loss_curves(history.history)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>curves in train and test set are close to each other</li>
        <li>almost perfect fit with the first training</li>
    </ul>
</div>

In [None]:
def get_prediction(model, data=data_test):
    img_pth = [PTH+"/"+file for file in data.filenames]
    lbl = list(data.class_indices.keys())
    y = data.classes
    y_pr = model.predict(data)
    y_hat = np.argmax(y_pr, axis=1)
    
    return img_pth, lbl, y, y_pr, y_hat

In [None]:
def show_classification_summary(y, y_hat, lbl):
    smy = classification_report(y, y_hat, target_names=lbl)
    print(smy)
    
    return None

In [None]:
def show_confusion_matrix(y, y_hat, lbl):
    lbl_map = {v: k for v, k in enumerate(lbl)}
    cm = pd.crosstab(
            pd.Series(y, name="actual").map(lbl_map),
            pd.Series(y_hat, name="predicted").map(lbl_map),
        ).style \
            .background_gradient("binary") \
            .set_properties(**{"width": "8em", "text-align": "center"}) \
            .set_table_styles([dict(selector="th",props=[("text-align", "center")])])
    display(cm)
    
    return None

In [None]:
def get_image(pth, img_size=IMG_SIZE):
    img = load_img(pth, target_size=img_size)
    img = img_to_array(img) / 255.
    
    return img

In [None]:
def view_random_predictions(title, pth, nr=2, nc=3, seed=SEED):
    set_seed(seed)
    smp = np.random.choice(range(len(y)), nr*nc, False)
    
    plt.figure(figsize=(6, 3))
    for pos, idx in enumerate(smp):
        img = get_image(pth[idx])
        yi = lbl[y[idx]]
        yi_hat = lbl[y_hat[idx]]
        yi_pr = y_pr[idx].max()
        tit_sub = f"Actual: {yi}\nPrediction: {yi_hat} ({yi_pr:.1%})"
        col = ["red", "green"][yi_hat == yi]
        
        ax = plt.subplot(nr, nc, pos+1)
        plt.imshow(img, cmap="binary")
        plt.title(tit_sub, fontsize=8, color=col)
        plt.axis("off")
    
    plt.suptitle(title, fontsize=12)
    plt.tight_layout()
    
    return None

In [None]:
img_pth, lbl, y, y_pr, y_hat = get_prediction(model)

In [None]:
show_classification_summary(y, y_hat, lbl)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>achieved 99% accuracy on test set</li>
        <li>other metrics also close to 1</li>
    </ul>
</div>

In [None]:
show_confusion_matrix(y, y_hat, lbl)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>majority fall into the diagonal</li>
        <li>mdoel slighly underperform on "Karacadag" when it is "Arborio"</li>
    </ul>
</div>

In [None]:
view_random_predictions("Sample Predictions from Test Set", img_pth)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>not only correct predictions, but also high predicted probabilities</li>
    </ul>
</div>

### Error Analysis
---

In [None]:
def view_random_false_predictions(title, pth=img_pth, nr=2, nc=3, seed=SEED):
    set_seed(seed)
    
    idx_fp = pd.Series(y != y_hat)[y != y_hat].index
    if len(idx_fp) == 0:
        print("All predictions are valid!")
        return None
    else:
        dim = [nr*nc, len(idx_fp)][len(idx_fp) <= nr*nc]
    smp = np.random.choice(idx_fp, dim, False)
    
    plt.figure(figsize=(6, 3))
    for pos, idx in enumerate(smp):
        img = get_image(img_pth[idx])
        yi = lbl[y[idx]]
        yi_hat = lbl[y_hat[idx]]
        yi_pr = y_pr[idx].max()
        tit_sub = f"Actual: {yi}\nPrediction: {yi_hat} ({yi_pr:.1%})"
        
        ax = plt.subplot(nr, nc, pos+1)
        plt.imshow(img, cmap="binary")
        plt.title(tit_sub, fontsize=8, color="red")
        plt.axis("off")
    
    plt.suptitle(title, fontsize=12)
    plt.tight_layout()
    
    return None

In [None]:
view_random_false_predictions("Random False Predictions from Test Set")

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>high predicted probabilities on false predictions</li>
    </ul>
</div>

### Model Fine-Tuning
---

In [None]:
FINE_TUNE_AT = 120

base_model = model.layers[1]
base_model.trainable = True

for layer in base_model.layers[:FINE_TUNE_AT]:
    layer.trainable = True

model.compile(
    optimizer=Adam(learning_rate=0.01*LEARNING_RATE),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>fine-tune the model with very small learning rate</li>
        <li>other remains unchanged</li>
    </ul>
</div>

In [None]:
set_seed(SEED)

history_2 = model.fit(
    data_train,
    epochs=EPOCHS*2-1,
    initial_epoch=history.epoch[-1],
    steps_per_epoch=len(data_train),
    validation_data=data_test,
    validation_steps=len(data_test),
)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>train with additional 3 epochs</li>
        <li>indeed outperform un-tuned model on the accuracy</li>
    </ul>
</div>

In [None]:
hist = {}
for k in history.history.keys():
    hist[k] = history.history[k] + history_2.history[k]

plot_loss_curves(hist, EPOCHS)

In [None]:
img_pth, lbl, y, y_pr, y_hat = get_prediction(model)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>train with additional 3 epochs</li>
        <li>indeed outperform un-tuned model on the accuracy</li>
    </ul>
</div>

In [None]:
show_classification_summary(y, y_hat, lbl)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>train with additional 3 epochs</li>
        <li>indeed outperform un-tuned model on the accuracy</li>
    </ul>
</div>

In [None]:
show_confusion_matrix(y, y_hat, lbl)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>resolved problem on predicting "Arborio" as "Karacadag"</li>
    </ul>
</div>

In [None]:
view_random_predictions("Sample Predictions from Test Set", img_pth)

<div style="color:dark; display:fill; border-radius:5px; background-color:#EAEAEA; font-size:16px; padding:0px 5px;">
    Comment:
    <ul>
        <li>perfect!</li>
    </ul>
</div>