In [None]:
# Cell A: imports and quick checks
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight
from collections import Counter

# Paths
base = '/content/drive/MyDrive/Tomato_dataset/cnn_crops'  # change if needed
ripe_train_dir = os.path.join(base, 'ripe/train')
ripe_val_dir   = os.path.join(base, 'ripe/val')
green_train_dir= os.path.join(base, 'green/train')
green_val_dir  = os.path.join(base, 'green/val')

# Quick helper to print counts
def folder_counts(p):
    out = {}
    for c in sorted(os.listdir(p)):
        cp = os.path.join(p, c)
        if os.path.isdir(cp):
            out[c] = sum([1 for f in os.listdir(cp) if os.path.isfile(os.path.join(cp, f))])
    return out

print("Ripe train counts:", folder_counts(ripe_train_dir))
print("Ripe val counts:  ", folder_counts(ripe_val_dir))
print("Green train counts:", folder_counts(green_train_dir))
print("Green val counts:  ", folder_counts(green_val_dir))

# If you decide to remove 'R_pests' and 'G_pests', do it now (optional)
# Example: os.rename(src, dst) or delete folder content. Be careful!
# e.g.:
# import shutil
# shutil.rmtree('/path/to/R_pests')  # only if you're sure


In [None]:
# Cell B: create model/training function using MobileNetV2
IMG_SIZE = 160           # smaller for CPU, change to 224 if desired
BATCH_SIZE = 12          # small to fit CPU
EPOCHS = 30
AUTOTUNE = tf.data.AUTOTUNE

def make_data_generators(train_dir, val_dir, img_size=IMG_SIZE, batch_size=BATCH_SIZE):
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=25,
        width_shift_range=0.1,
        height_shift_range=0.1,
        brightness_range=(0.7,1.3),
        shear_range=0.1,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    val_datagen = ImageDataGenerator(rescale=1./255)

    train_gen = train_datagen.flow_from_directory(
        train_dir, target_size=(img_size, img_size),
        batch_size=batch_size, class_mode='categorical', shuffle=True
    )
    val_gen = val_datagen.flow_from_directory(
        val_dir, target_size=(img_size, img_size),
        batch_size=batch_size, class_mode='categorical', shuffle=False
    )
    return train_gen, val_gen

def compute_class_weights(generator):
    # generator.classes is the class index for each sample
    classes = generator.classes
    labels = np.unique(classes)
    cw = compute_class_weight(class_weight='balanced', classes=labels, y=classes)
    # return a dict mapping {class_index: weight}
    return {i: cw[i] for i in labels}

def build_transfer_model(num_classes, img_size=IMG_SIZE, base_trainable=False):
    base = tf.keras.applications.MobileNetV2(
        input_shape=(img_size, img_size, 3),
        include_top=False, weights='imagenet', pooling='avg'
    )
    base.trainable = base_trainable  # freeze by default
    inputs = tf.keras.Input(shape=(img_size, img_size, 3))
    x = base(inputs, training=False)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs, outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

def train_with_transfer(train_dir, val_dir, out_path, img_size=IMG_SIZE, batch_size=BATCH_SIZE, epochs=EPOCHS):
    train_gen, val_gen = make_data_generators(train_dir, val_dir, img_size, batch_size)
    num_classes = len(train_gen.class_indices)
    print("Num classes:", num_classes, "Class indices:", train_gen.class_indices)
    class_weights = compute_class_weights(train_gen)
    print("Class weights:", class_weights)

    model = build_transfer_model(num_classes, img_size, base_trainable=False)
    ckpt = ModelCheckpoint(out_path, save_best_only=True, monitor='val_accuracy', mode='max')
    es = EarlyStopping(patience=6, restore_best_weights=True, monitor='val_accuracy')
    rlp = ReduceLROnPlateau(patience=3, factor=0.5, monitor='val_loss')

    steps_per_epoch = max(1, train_gen.samples // batch_size)
    val_steps = max(1, val_gen.samples // batch_size)

    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=epochs,
        class_weight=class_weights,
        callbacks=[ckpt, es, rlp],
        steps_per_epoch=steps_per_epoch,
        validation_steps=val_steps,
        verbose=1
    )

    # Optionally fine-tune: unfreeze last few layers and train with low LR
    base = model.layers[1]  # MobileNetV2 base if you used the builder above - check model.layers
    base.trainable = True
    # freeze all except top N layers
    fine_tune_at = int(len(base.layers) * 0.8)
    for layer in base.layers[:fine_tune_at]:
        layer.trainable = False
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
    ft_history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=10,
        class_weight=class_weights,
        callbacks=[EarlyStopping(patience=4, restore_best_weights=True), ReduceLROnPlateau(patience=2)],
        steps_per_epoch=steps_per_epoch,
        validation_steps=val_steps,
        verbose=1
    )
    return model, history, ft_history


In [None]:
# Cell C: run training
ripe_model_path = '/content/drive/MyDrive/Tomato_dataset/models/ripe_mobilenet.keras'   # prefer .keras
green_model_path = '/content/drive/MyDrive/Tomato_dataset/models/green_mobilenet.keras'

# Ripe
_ = train_with_transfer(ripe_train_dir, ripe_val_dir, ripe_model_path)

# Green
_ = train_with_transfer(green_train_dir, green_val_dir, green_model_path)
