In [1]:
import os
import sys
import json
from sklearn.metrics import classification_report
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../src")))
from preprocessing import load_train_data, encode_labels

# ✅ Paths
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), "../data"))
DOCS_DIR = os.path.abspath(os.path.join(os.getcwd(), "../docs/cards"))
os.makedirs(DOCS_DIR, exist_ok=True)

TRAIN_CSV = os.path.join(BASE_DIR, "train_labels.csv")
TRAIN_FOLDER = os.path.join(BASE_DIR, "train")
MODEL_PATH = os.path.join(BASE_DIR, "soil_classifier_model.h5")

# ✅ Load data and encode labels
train_df = load_train_data(TRAIN_CSV, TRAIN_FOLDER)
train_df, le = encode_labels(train_df)

# ✅ Split into train/val (use val for evaluation)
train_data, val_data = train_test_split(
    train_df, test_size=0.2, stratify=train_df["label"], random_state=42
)

# ✅ Prepare validation data generator
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
val_gen = ImageDataGenerator(rescale=1.0 / 255)
val_flow = val_gen.flow_from_dataframe(
    val_data,
    TRAIN_FOLDER,
    x_col="image_id",
    y_col="soil_type",
    target_size=IMG_SIZE,
    class_mode="categorical",
    batch_size=BATCH_SIZE,
    shuffle=False,
)

# ✅ Load trained model
model = load_model(MODEL_PATH)

# ✅ Predict on validation data
pred_probs = model.predict(val_flow, verbose=1)
# Get filenames used in val_flow
filenames_in_flow = [os.path.basename(f) for f in val_flow.filenames]
filtered_val_data = val_data[val_data["image_id"].isin(filenames_in_flow)].reset_index(
    drop=True
)

# Get true and predicted labels
true_labels = filtered_val_data["soil_type"].values
pred_labels = le.inverse_transform(np.argmax(pred_probs, axis=1))


# ✅ Calculate F1 scores per class
f1_report = classification_report(true_labels, pred_labels, output_dict=True)
f1_scores = {
    "alluvial soil": round(f1_report.get("Alluvial soil", {}).get("f1-score", 0.0), 4),
    "red soil": round(f1_report.get("Red soil", {}).get("f1-score", 0.0), 4),
    "black soil": round(f1_report.get("Black soil", {}).get("f1-score", 0.0), 4),
    "clay soil": round(f1_report.get("Clay soil", {}).get("f1-score", 0.0), 4),
}

# ✅ Save metrics to ml-metric.json
ml_metrics = {
    "_comment": "This JSON file containing the ml-metrics",
    "Name": "Sagnik Dey",
    "Kaggle Username": "sagnikd7",
    "Team Name": "NA (Individual)",
    "f1 scores": {
        "_comment": "Here are the class wise f1 scores",
        "alluvial soil": f1_scores["alluvial soil"],
        "red soil": f1_scores["red soil"],
        "black soil": f1_scores["black soil"],
        "clay soil": f1_scores["clay soil"],
    },
}

ml_metric_path = os.path.join(DOCS_DIR, "ml-metric.json")
with open(ml_metric_path, "w") as f:
    json.dump(ml_metrics, f, indent=4)

print(f"\n📊 Metrics saved to {ml_metric_path}")

Found 244 validated image filenames belonging to 4 classes.


  self._warn_if_super_not_called()


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 112ms/step

📊 Metrics saved to /Users/sagnikdey/Downloads/FINAL/challenge-1/docs/cards/ml-metric.json
