In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [10]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow_hub as hub
import io

from tqdm.notebook import tqdm
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten, Activation, BatchNormalization, Rescaling
from keras.layers import RandomContrast, RandomZoom, RandomFlip, RandomRotation, RandomTranslation, RandomCrop, RandomBrightness
from keras.optimizers import Adam, RMSprop, SGD
from keras.models import Sequential
from keras.losses import SparseCategoricalCrossentropy
from keras.callbacks import ModelCheckpoint, LambdaCallback
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from google.colab import files

AUTOTUNE = tf.data.AUTOTUNE
np.set_printoptions(suppress=True)

#### Loading Dataset


In [9]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="QzWRLvI8sgZh0N2qBYaC")
project = rf.workspace("ignatius-pandu-adityawan-m038dsx0594-rzdje").project("paddysicknessclassification")
dataset = project.version(3).download("multiclass")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
loading Roboflow workspace...
loading Roboflow project...
Downloading Dataset Version Zip in PaddySicknessClassification-3 to multiclass: 98% [589725696 / 596210925] bytes

Extracting Dataset Version Zip to PaddySicknessClassification-3 in multiclass:: 100%|██████████| 8297/8297 [00:05<00:00, 1483.80it/s]


In [16]:
df = pd.read_csv("/content/PaddySicknessClassification-3/train/_classes.csv")
dict_disease = {}

for number, name in enumerate(df.columns):
    if name == 'filename':continue              # skip 1st column
    dict_disease[number-1] = name.strip()
dict_disease = {key: dict_disease[key] for key in dict_disease if key != 'Unlabeled'}
img_size = 224

In [27]:
def loadImg(filenames, labels, size, resize, rescale):
    images = []
    for i, name in enumerate(nameMsg := tqdm(filenames)):
        nameMsg.set_postfix_str(name)
        try:
            img = tf.keras.preprocessing.image.load_img(name)
        except:
            labels = np.delete(labels, i)
            continue
        img = tf.cast(img, tf.float32)

        if resize:
            img = tf.image.resize(img, (size, size))

        if rescale == 1:
            img = img / 255
        elif rescale == 2:
            img = (img / 127.5) - 1
        elif rescale == 0:
            img = tf.cast(img, tf.uint8)

        images.append(img)
    return images, labels


def parseData(filename, delimiter=',', size=False, resize=False, rescale=1, return_format='dataset'):
    if resize:
        if not size:
            raise Exception('Size must be specified when resize is true.')

    if return_format.lower() not in ['dataset', 'split']:
        raise Exception("Return format unspecified.")
    
    loaded_dataset = pd.read_csv(str(filename)+"_classes.csv", delimiter=delimiter)
    img_dir = np.array(loaded_dataset.pop('filename'))
    img_dir = np.ndarray.flatten(img_dir)

    for row, img_path in enumerate(img_dir):
        img_dir[row] = os.path.join(filename, img_path)

    labels = np.array(loaded_dataset.idxmax(axis=1).str.strip().astype(
        'category').cat.codes).reshape(-1, 1)

    images, labels = loadImg(img_dir, labels, size, resize, rescale)

    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if return_format == 'dataset':
        return dataset
    elif return_format == 'split':
        return images, labels
    else:
        raise Exception("Return format unspecified.")


def confusionMatrix(epoch, logs):
    yhat = model.predict(test_images)
    yhat = np.argmax(yhat, axis=1)
    cm = confusion_matrix(test_labels, yhat)

    fig, ax = plt.subplots(figsize=(6,6))
    ax.matshow(cm)
    for (x, y), value in np.ndenumerate(cm):
        plt.text(x, y, f"{value:.2f}", va="center", ha="center", color='white')
    ax.set_title(f"Confusion Matrix on epoch {epoch}\nVal accuracy: {logs.get('val_accuracy'):.2f}")
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.xaxis.set_ticks_position('bottom')
    fig.savefig(f"Epoch {epoch}.jpg")

In [28]:
with tf.device('/cpu:0'):
    train_dataset = parseData("/content/PaddySicknessClassification-3/train/", size=img_size,
                              resize=True, rescale=False) # Rescale = 1 untuk normalisasi [0,1], 2 untuk normalisasi [-1,1], 0 kalau gambar untuk print, selain itu gambar tdk diproses
    valid_dataset = parseData("/content/PaddySicknessClassification-3/valid/", size=img_size,
                              resize=True, rescale=False) # Rescale = 1 untuk normalisasi [0,1], 2 untuk normalisasi [-1,1], 0 kalau gambar untuk print, selain itu gambar tdk diproses

  0%|          | 0/6308 [00:00<?, ?it/s]

  0%|          | 0/1194 [00:00<?, ?it/s]

In [29]:
with tf.device('/cpu:0'):
    test_images_data, test_labels = parseData("/content/PaddySicknessClassification-3/test/", size=img_size,
                                 resize=True, rescale=False, return_format='split') # Rescale = 1 untuk normalisasi [0,1], 2 untuk normalisasi [-1,1], 0 kalau gambar untuk print, selain itu gambar tdk diproses
    
    test_images = tf.data.Dataset.from_tensor_slices(test_images_data)
    test_images = test_images.cache().batch(32).prefetch(AUTOTUNE)

  0%|          | 0/787 [00:00<?, ?it/s]

In [30]:
train = train_dataset.cache('train_dataset').shuffle(128).batch(32).prefetch(AUTOTUNE)
valid = valid_dataset.cache().batch(32).prefetch(AUTOTUNE)

#### Buat Model


In [31]:
base_model = hub.KerasLayer(
    "https://tfhub.dev/google/imagenet/mobilenet_v3_small_075_224/feature_vector/5", trainable=False)

In [32]:
model = Sequential([
    Input((img_size, img_size, 3)),
    Rescaling(1/255),               # range [0,1]
    # Rescaling(1/127.5, offset=-1),  # range [-1,1]
    RandomFlip(),
    RandomRotation(factor=0.3),
    RandomZoom(height_factor=(-0.1, 0.1)),
    RandomBrightness(factor=(-0.1,0.1), value_range=[0,1]),
    base_model,
    # Dense(512, 'selu'),
    # Dropout(0.2),
    # Dense(32, 'selu'),
    Dense(len(dict_disease.keys()), activation='softmax')
])

model.compile(
    loss=SparseCategoricalCrossentropy(),
    optimizer=Adam(0.0001),
    metrics=['accuracy']
)

bestCB = ModelCheckpoint(filepath='./checkpoint/best/', monitor='val_accuracy',
                         mode='max', verbose=1, save_best_only=True)

cmCB = LambdaCallback(on_epoch_end=confusionMatrix)

model.build([None, img_size, img_size, 3])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_1 (Rescaling)     (None, 224, 224, 3)       0         
                                                                 
 random_flip_1 (RandomFlip)  (None, 224, 224, 3)       0         
                                                                 
 random_rotation_1 (RandomRo  (None, 224, 224, 3)      0         
 tation)                                                         
                                                                 
 random_zoom_1 (RandomZoom)  (None, 224, 224, 3)       0         
                                                                 
 random_brightness_1 (Random  (None, 224, 224, 3)      0         
 Brightness)                                                     
                                                                 
 keras_layer_1 (KerasLayer)  (None, 1024)             

In [None]:
with tf.device('/gpu:0'):
# with tf.device('/gpu:0'):
    history = model.fit(
        train,
        validation_data=valid,
        epochs=100,
        callbacks=[bestCB, cmCB]
    )

Epoch 1/100
Epoch 1: val_accuracy improved from -inf to 0.48157, saving model to ./checkpoint/best/
Epoch 2/100
Epoch 2: val_accuracy improved from 0.48157 to 0.60469, saving model to ./checkpoint/best/
Epoch 3/100
Epoch 3: val_accuracy improved from 0.60469 to 0.64070, saving model to ./checkpoint/best/
Epoch 4/100
Epoch 4: val_accuracy improved from 0.64070 to 0.65745, saving model to ./checkpoint/best/
Epoch 5/100
Epoch 5: val_accuracy improved from 0.65745 to 0.67755, saving model to ./checkpoint/best/
Epoch 6/100
Epoch 6: val_accuracy improved from 0.67755 to 0.69095, saving model to ./checkpoint/best/
Epoch 7/100
Epoch 7: val_accuracy improved from 0.69095 to 0.70436, saving model to ./checkpoint/best/
Epoch 8/100
Epoch 8: val_accuracy improved from 0.70436 to 0.71524, saving model to ./checkpoint/best/
Epoch 9/100
Epoch 9: val_accuracy improved from 0.71524 to 0.72446, saving model to ./checkpoint/best/
Epoch 10/100
Epoch 10: val_accuracy improved from 0.72446 to 0.73367, saving

In [None]:
model.save("./checkpoint/latest/")

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = history.epoch

plt.plot(epochs, acc, label='Train Accuracy')
plt.plot(epochs, val_acc, label='Validation Accuracy')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Model's Accuracy")
plt.legend()
plt.show()

plt.plot(epochs, loss, label='Train Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Model's Loss")
plt.legend()
plt.show()

#### Evaluasi Model


In [None]:
# Load iterasi terakhir
# model = tf.keras.models.load_model("./checkpoint/latest/")

# Load terbaik
model = tf.keras.models.load_model("./checkpoint/best/")

In [None]:
test_dataset = tf.data.Dataset.from_tensor_slices((test_images_data, test_labels)).cache().batch(32).prefetch(AUTOTUNE)
eval_res = model.evaluate(test_dataset, verbose=0)
print(f"Test Dataset\nAccuracy: {eval_res[1]*100:.3f}%\nLoss: {eval_res[0]:.3f}")

topK = 3 # Ambil 3 kategori tertinggi untuk display ke user
for image, label in test_dataset.unbatch().shuffle(128).take(5):
    image = tf.cast(image, tf.uint8)
    plt.imshow(image)
    plt.show()

    image           = tf.cast(image, tf.float32) / 255
    image           = tf.expand_dims(image, axis=0)
    yhat            = model.predict(image)
    yhat_topk       = np.argpartition(-yhat, topK-1)[0][:3]
    true_label      = dict_disease[label.numpy()[0]]
    prob_res        = []
    disease_types   = []

    for disease in yhat_topk:
        disease_types.append(dict_disease[disease])
        prob_res.append(yhat[0][disease])
    
    print(f"Label asli: {true_label}.")
    for index_disease, disease in enumerate(disease_types):
        res = "benar" if true_label == disease else "salah"
        print(f"Prediksi {index_disease+1} adalah: {disease} ({res}) dengan probabilitas {prob_res[index_disease] * 100:.2f}%.")