In [None]:
Inference

Team Name: EnviroMinds \
Team Members: Sanjana Sudarsan, Swetha Sriram, Lohithaa K M \
Leaderboard Rank: 30

In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
from tqdm import tqdm
from torchvision import transforms, models
import joblib

# Paths
TEST_DIR = "/kaggle/input/soil-binary/soil_competition-2025/test"
TEST_IDS_CSV = "/kaggle/input/soil-binary/soil_competition-2025/test_ids.csv"
SUBMISSION_CSV = "submission.csv"
MODEL_DIR = "trained_models"

# Read test IDs
test_df = pd.read_csv(TEST_IDS_CSV)
test_ids = test_df['image_id'].tolist()

# Transforms (same as training normal_tf)
normal_tf = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Feature extractor setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT).to(device)
feature_extractor = torch.nn.Sequential(*list(base_model.children())[:-1]).eval()

def extract_feats(paths, tf):
    feats = []
    for p in tqdm(paths, desc="Extracting Features"):
        img = Image.open(p).convert("RGB")
        img_t = tf(img).unsqueeze(0).to(device)
        with torch.no_grad():
            feat = feature_extractor(img_t).view(1, -1).cpu().numpy()
        feats.append(feat[0])
    return np.array(feats)

def get_paths(ids, dir_path):
    return [os.path.join(dir_path, img_id) for img_id in ids]

# Load models and scalers
iso_models = []
ocsvm_models = []
scalers = []

num_models = len([name for name in os.listdir(MODEL_DIR) if name.startswith("iso_")])

for i in range(num_models):
    iso_models.append(joblib.load(os.path.join(MODEL_DIR, f"iso_{i}.pkl")))
    ocsvm_models.append(joblib.load(os.path.join(MODEL_DIR, f"ocsvm_{i}.pkl")))
    scalers.append(joblib.load(os.path.join(MODEL_DIR, f"scaler_{i}.pkl")))

# Load threshold
with open(os.path.join(MODEL_DIR, "best_thresh.txt"), "r") as f:
    best_thresh = float(f.read().strip())

# Prepare test paths and extract features
test_paths = get_paths(test_ids, TEST_DIR)
X_test = extract_feats(test_paths, normal_tf)

# Predict scores
iso_scores = np.mean([m.decision_function(X_test) for m in iso_models], axis=0)
ocsvm_scores = np.mean([m.decision_function(s.transform(X_test)) for m, s in zip(ocsvm_models, scalers)], axis=0)

final_scores = (iso_scores + ocsvm_scores) / 2
test_preds = (final_scores >= best_thresh).astype(int)

# Save submission
submission = pd.DataFrame({
    "image_id": test_df['image_id'],
    "soil_type": test_preds
})
submission.to_csv(SUBMISSION_CSV, index=False)
print(f"Saved submission to {SUBMISSION_CSV}")

Extracting Features: 100%|██████████| 967/967 [00:18<00:00, 51.64it/s]


Saved submission to submission.csv
