##### Imports & Setup

In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models
from src.data_loader import collect_image_paths, stratified_split


##### Dataset & Encoding

In [2]:
DATASET_PATH = "../data/raw/PlantVillage"
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32

image_paths, labels = collect_image_paths(DATASET_PATH)
X_train, y_train, X_val, y_val, X_test, y_test = stratified_split(
    image_paths, labels
)

class_names = sorted(set(labels))
class_to_index = {name: idx for idx, name in enumerate(class_names)}

y_train = np.array([class_to_index[y] for y in y_train])
y_val   = np.array([class_to_index[y] for y in y_val])
y_test  = np.array([class_to_index[y] for y in y_test])


##### Build tf.data Pipelines

In [3]:
from tensorflow.keras.applications.resnet50 import preprocess_input

def build_dataset(image_paths, labels, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((image_paths, labels))

    def load_image(path, label):
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, IMAGE_SIZE)
        img = tf.cast(img, tf.float32)
        img = preprocess_input(img)  
        return img, label

    ds = ds.map(load_image, num_parallel_calls=tf.data.AUTOTUNE)

    if shuffle:
        ds = ds.shuffle(1000)

    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)


In [4]:
train_ds = build_dataset(X_train, y_train, shuffle=True)
val_ds   = build_dataset(X_val, y_val)
test_ds  = build_dataset(X_test, y_test)


##### Build ResNet50 (FROZEN BACKBONE)

In [5]:
base_model = ResNet50(
    weights="imagenet",
    include_top=False,
    input_shape=(224, 224, 3)
)

base_model.trainable = False  # CRITICAL

x = layers.GlobalAveragePooling2D()(base_model.output)
x = layers.Dense(256, activation="relu")(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(len(class_names), activation="softmax")(x)

model = models.Model(inputs=base_model.input, outputs=outputs)


In [6]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                


                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                                  
 conv1_bn (BatchNormalization)  (None, 112, 112, 64  256         ['conv1_conv[0][0]']             
                                )                                                                 
                                                                                                  
 conv1_relu (Activation)        (None, 112, 112, 64  0           ['conv1_bn[0][0]']               
                                )                                                                 
                                                                                                  
 pool1_pad

In [7]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
import json

with open("../results/metrics/class_names.json", "w") as f:
    json.dump(class_names, f)


In [9]:
import json
import os
from src.data_loader import collect_image_paths, stratified_split

# Recreate the SAME split used during training
image_paths, labels = collect_image_paths(DATASET_PATH)
_, _, _, _, X_test, y_test = stratified_split(image_paths, labels)

# Encode labels using SAME mapping
y_test_encoded = [class_to_index[y] for y in y_test]

# Normalize paths (Windows-safe)
X_test = [os.path.abspath(p) for p in X_test]

# Save frozen test samples
with open("../results/metrics/test_samples.json", "w") as f:
    json.dump(
        {
            "paths": X_test,
            "labels": y_test_encoded
        },
        f
    )


In [10]:
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test accuracy: {test_acc:.4f}")


Test accuracy: 0.9616


In [11]:
model.save("../results/metrics/resnet50_tl_model.h5")
