# Tomato Leaf Disease: Binary Classification Code

## 1 Preparation

### Library Preparation

In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, f1_score

import tensorflow as tf
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from tensorflow.keras.metrics import Precision, Recall, F1Score, AUC
from tensorflow.keras.optimizers import Adam

import os
from IPython.display import FileLink

from dataset_builder import BinaryTomatoLeafDiseaseDataset, QuinaryTomatoLeafDiseaseDataset, MainTomatoLeafDiseaseDataset
from utils import one_hot_encode, preprocess, augment, split_data, predict
from plots import plot_confusion_matrix, plot_learning_rate, plot_training_log, dual_plot_training_log

import warnings
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'

### Directory Preparation

In [None]:
TFDS_DIR = './tfds/'
MODEL_DIR = './models/'
LOG_DIR = './logs/'
OUTPUT_DIR = './images/'

os.environ['TFDS_DATA_DIR'] = TFDS_DIR

for dir in [TFDS_DIR, MODEL_DIR, LOG_DIR, OUTPUT_DIR]:
    if not os.path.exists(dir):
        os.makedirs(dir)

### Data Preparation

In [3]:
builder = BinaryTomatoLeafDiseaseDataset()
builder.download_and_prepare(download_dir=None)  # No download needed, just prepares paths
num_classes = builder.info.features['label'].num_classes
class_names = builder._get_label_names()
ds = builder.as_dataset(split='train', as_supervised=True)
num_classes, class_names

(2, ['diseased', 'healthy'])

### Data Splitting

In [4]:
train_ds, val_ds, test_ds = split_data(ds, is_binary=True, class_names=class_names)
# del ds

(3391, 256, 256, 3) (3391, 1)

Overall Split Sizes:
Train set shape: (2373, 256, 256, 3), (2373, 1)
Validation set shape: (509, 256, 256, 3), (509, 1)
Test set shape: (509, 256, 256, 3), (509, 1)

Class distribution data has been saved to: f:\Kuliah\Pembelajaran Mesin dan Kecerdasan Buatan\RBL\SK5004_RBL_10121063_10121089


In [None]:
class_dist = pd.read_csv(os.path.join(LOG_DIR, 'class_distribution_binary.csv'))
class_dist

### Data Preprocessing

In [None]:
train_ds = train_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)

train_ds = train_ds.map(augment, num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
print('Visualizing augmented training images:')
plt.figure(figsize=(12, 12))
for images, labels in train_ds.take(1): # Take one batch
    for i in range(min(len(images), 16)): # Plot up to 16 images
        ax = plt.subplot(4, 4, i + 1)
        # Ensure the image is in a displayable format (NumPy array, 0-1 range)
        plt.imshow(images[i].numpy())
        # If you have metadata for label names:
        # plt.title(metadata.features['label'].int2str(labels[i].numpy()))
        plt.title(f'Label: {labels[i].numpy()}') # Fallback if no metadata
        plt.axis('off')
plt.tight_layout()
plt.show()

### Data Shuffling, Batching, and Prefetching

In [None]:
BATCH_SIZE = 32
train_ds = train_ds.shuffle(1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

## 2 Model Building

### Model Compilation

In [None]:
callbacks = [
    ModelCheckpoint(os.path.join(MODEL_DIR, 'best_model_main.keras'), 
                    monitor='val_loss', mode='min', 
                    save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=3, verbose=1),
    CSVLogger(os.path.join(LOG_DIR, 'training_log_binary.csv'))
]

class CustomCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(f'Epoch {epoch} ended. Val Loss: {logs['val_loss']:.4f} | Val F1-Macro: {logs['val_f1_macro']:.4f}')

callbacks.append(CustomCallback())

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(256, 256, 3)), 
    tf.keras.layers.Conv2D(16, (3, 3), use_bias=False),
    tf.keras.layers.BatchNormalization(),             
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(rate=0.1), 
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(rate=0.2), 
    tf.keras.layers.Dense(16, use_bias=False),
    tf.keras.layers.BatchNormalization(),             
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['binary_accuracy', 
                       Precision(), Recall(), 
                       F1Score(average=None, name='f1'), 
                       F1Score(average='micro', name='f1_micro'), 
                       F1Score(average='macro', name='f1_macro'), 
                       F1Score(average='weighted', name='f1_weighted'), 
                       AUC()])
model.summary()

### Model Training

In [None]:
history_obj = model.fit(train_ds, validation_data=val_ds, epochs=100, callbacks=callbacks)
params = history_obj.params
history = history_obj.history
history.tail()

### Model Loading (for easy-use)

In [None]:
log_path = os.path.join(LOG_DIR, 'training_log_binary.csv')
history = pd.read_csv(log_path)
# history.tail()

model_path = os.path.join(MODEL_DIR, 'best_model_binary.keras')
model = tf.keras.models.load_model(model_path)
model.summary()

### Model Training Log

In [None]:
plot_training_log(history['loss'], history['val_loss'], 'Binary Cross Entropy Loss', 'loss_binary')
plot_training_log(history['f1_macro'], history['val_f1_macro'], 'F1 Macro Average', 'f1_binary')
plot_learning_rate(history['learning_rate'], 'Learning Rate', 'lr_binary')

In [None]:
dual_plot_training_log('Binary Model Training Log', 'log_binary', 
                       history['loss'], history['val_loss'], 'Binary Cross Entropy Loss', 
                       history['f1_macro'], history['val_f1_macro'], 'F1 Macro Average')

### Model Evaluation

In [None]:
y_true, y_pred = predict(model, test_ds, is_binary=True)

In [None]:
print(classification_report(y_true, y_pred, target_names=class_names))
cm = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm, class_names=class_names, figsize=(3, 2.8), cmap='cool', filename='cm_binary')