In [3]:
import os
import sys
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Ensure src directory is in the path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../src")))
from preprocessing import (
    load_train_data,
    encode_labels,
    compute_class_weights,
    save_label_encoder,
)

# ✅ PATH SETUP
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), "../data"))
TRAIN_FOLDER = os.path.join(BASE_DIR, "train")
TRAIN_CSV = os.path.join(BASE_DIR, "train_labels.csv")
LABEL_ENCODER_PATH = os.path.join(BASE_DIR, "label_encoder_classes.json")
MODEL_PATH = os.path.join(BASE_DIR, "soil_classifier_model.h5")

# ✅ LOAD AND PREPROCESS DATA
train_df = load_train_data(TRAIN_CSV, TRAIN_FOLDER)
train_df, le = encode_labels(train_df)
save_label_encoder(le, LABEL_ENCODER_PATH)

# ✅ SPLIT TRAIN/VAL
train_data, val_data = train_test_split(
    train_df, test_size=0.2, stratify=train_df["label"], random_state=42
)
class_weights = compute_class_weights(train_data["label"])

# ✅ IMAGE GENERATORS
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
train_gen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=30,
    zoom_range=0.3,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
)
val_gen = ImageDataGenerator(rescale=1.0 / 255)

train_flow = train_gen.flow_from_dataframe(
    train_data,
    TRAIN_FOLDER,
    x_col="image_id",
    y_col="soil_type",
    target_size=IMG_SIZE,
    class_mode="categorical",
    batch_size=BATCH_SIZE,
)
val_flow = val_gen.flow_from_dataframe(
    val_data,
    TRAIN_FOLDER,
    x_col="image_id",
    y_col="soil_type",
    target_size=IMG_SIZE,
    class_mode="categorical",
    batch_size=BATCH_SIZE,
    shuffle=False,
)

# ✅ IMPROVED MODEL DEFINITION
model = tf.keras.Sequential(
    [
        tf.keras.layers.Conv2D(
            32, (3, 3), padding="same", activation="relu", input_shape=(224, 224, 3)
        ),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Conv2D(128, (3, 3), padding="same", activation="relu"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation="relu"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(4, activation="softmax"),
    ]
)


model.compile(
    optimizer="adam",
    loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
    metrics=["accuracy"],
)


# ✅ CALLBACKS
callbacks = [
    ReduceLROnPlateau(monitor="val_loss", patience=1, factor=0.5, verbose=1),
    # EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)
]

# ✅ TRAIN
history = model.fit(
    train_flow,
    validation_data=val_flow,
    epochs=25,
    callbacks=callbacks,
    class_weight=class_weights,
)

# ✅ SAVE MODEL
model.save(MODEL_PATH)
print(f"Improved model trained and saved at: {MODEL_PATH}")

Found 970 validated image filenames belonging to 4 classes.
Found 244 validated image filenames belonging to 4 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 533ms/step - accuracy: 0.6096 - loss: 1.8938 - val_accuracy: 0.5984 - val_loss: 3.7331 - learning_rate: 0.0010
Epoch 2/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 525ms/step - accuracy: 0.7999 - loss: 0.7037 - val_accuracy: 0.3648 - val_loss: 3.3523 - learning_rate: 0.0010
Epoch 3/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 503ms/step - accuracy: 0.8194 - loss: 0.6180 - val_accuracy: 0.6885 - val_loss: 2.3079 - learning_rate: 0.0010
Epoch 4/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 512ms/step - accuracy: 0.8032 - loss: 0.6941 - val_accuracy: 0.6762 - val_loss: 1.0525 - learning_rate: 0.0010
Epoch 5/25
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 480ms/step - accuracy: 0.8474 - loss: 0.5497
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1



Improved model trained and saved at: /Users/sagnikdey/Downloads/FINAL/data/soil_classifier_model.h5
