In [1]:
import os
import sys
import tensorflow as tf
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from collections import Counter
import warnings

warnings.filterwarnings("ignore")

# Ensure src directory is in the path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../src")))
from preprocessing import load_label_encoder
from postprocessing import predict_and_prepare_submission, get_image_data_generator

# ✅ PATH SETUP
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), "../data"))
TEST_FOLDER = os.path.join(BASE_DIR, "test")
TEST_CSV = os.path.join(BASE_DIR, "test_ids.csv")
LABEL_ENCODER_PATH = os.path.join(BASE_DIR, "label_encoder_classes.json")
MODEL_PATH = os.path.join(BASE_DIR, "soil_classifier_model.h5")
SUBMISSION_PATH = os.path.join(BASE_DIR, "submission.csv")

# ✅ LOAD MODEL & LABEL ENCODER
model = tf.keras.models.load_model(MODEL_PATH)
le = load_label_encoder(LABEL_ENCODER_PATH)

# ✅ LOAD TEST DATA
test_df = pd.read_csv(TEST_CSV)
test_df["path"] = test_df["image_id"].apply(lambda x: os.path.join(TEST_FOLDER, x))
test_df = test_df[test_df["path"].apply(os.path.exists)]

# ✅ Accept extensions including .gif and .webp
valid_extensions = [".jpg", ".jpeg", ".png", ".gif", ".webp"]
test_df = test_df[
    test_df["path"].apply(lambda x: os.path.splitext(x)[-1].lower() in valid_extensions)
]

if test_df.empty:
    raise ValueError(
        "No valid test images found! Please check the test folder and image formats."
    )

# ✅ IMAGE DATA GENERATOR
val_gen = get_image_data_generator(rescale=1.0 / 255)
test_flow = val_gen.flow_from_dataframe(
    test_df,
    TEST_FOLDER,
    x_col="image_id",
    y_col=None,
    target_size=(224, 224),
    class_mode=None,
    batch_size=1,
    shuffle=False,
    validate_filenames=False,  # Disable strict checking of image formats
)

# ✅ PREDICT & SUBMIT
submission = predict_and_prepare_submission(
    model, test_flow, test_df, TEST_CSV, le, SUBMISSION_PATH
)
print(submission.head())
print(f"Submission file created at: {SUBMISSION_PATH}")



Found 341 non-validated image filenames.
[1m341/341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step
Prediction class distribution: Counter({np.str_('Black Soil'): 123, np.str_('Red soil'): 100, np.str_('Clay soil'): 74, np.str_('Alluvial soil'): 44})
Submission file saved at: /Users/sagnikdey/Downloads/FINAL/challenge-1/data/submission.csv
            image_id      soil_type
0  img_cdf80d6f.jpeg  Alluvial soil
1   img_c0142a80.jpg  Alluvial soil
2   img_91168fb0.jpg  Alluvial soil
3   img_9822190f.jpg     Black Soil
4  img_e5fc436c.jpeg      Clay soil
Submission file created at: /Users/sagnikdey/Downloads/FINAL/challenge-1/data/submission.csv
