In [None]:
# 1. Import required libraries
import pandas as pd                     # For handling CSV files and dataframes
import numpy as np                      # For numerical operations
import os                               # For interacting with the file system
from tensorflow.keras.models import load_model  # To load the pre-trained Keras model
from tensorflow.keras.preprocessing import image  # For image loading and preprocessing
from tqdm import tqdm                   # For progress bars during prediction
from sklearn.metrics import f1_score    # To evaluate predictions if ground-truth is available

In [None]:
# 2. Define constants and paths
MODEL_PATH = "soil_model_mobilenetv2.h5"  # Path to the trained model weights
TEST_DIR = "../data/soil_competition-2025/test"  # Directory containing test images
TEST_CSV = "../data/soil_competition-2025/test_ids.csv"  # CSV with list of test image filenames
IMG_SIZE = (224, 224)  # Input size expected by the model (height, width)


In [None]:
# 3. Load the trained model from disk
model = load_model(MODEL_PATH)  # Load the saved Keras model

In [None]:
# 4. Load test image IDs from CSV
test_df = pd.read_csv(TEST_CSV)  # Read the CSV into a dataframe
test_images = test_df["image_id"].tolist()  # Extract image filenames into a list

In [None]:
# 5. Define a helper function to preprocess a single image
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=IMG_SIZE)  # Load image and resize
    img_array = image.img_to_array(img)  # Convert image to numpy array
    img_array = img_array / 255.0  # Normalize pixel values to [0, 1]
    return np.expand_dims(img_array, axis=0)  # Add batch dimension for prediction

In [None]:
# 6. Predict soil vs non-soil on each test image
predictions = []  # List to store model predictions (0 or 1)
# Loop through each image and predict
for img_name in tqdm(test_images):  # Show progress bar using tqdm
    img_path = os.path.join(TEST_DIR, img_name)  # Construct full path to image
    img_tensor = preprocess_image(img_path)  # Preprocess image
    pred = model.predict(img_tensor)[0][0]  # Get predicted probability
    label = int(pred > 0.5)  # Convert to binary label: 1 if soil, 0 if non-soil
    predictions.append(label)  # Store prediction

In [None]:
# 7. Add predictions to the test DataFrame
test_df["label"] = predictions  # Add predicted labels as a new column

In [None]:
# 8. Save the predictions to CSV for submission
test_df.to_csv("submission.csv", index=False)  # Save to submission.csv without row indices
# Optional: Display few predictions (if desired)
print(test_df.head())  # Show first few rows of the predictions