In [None]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from PIL import Image
import warnings
import joblib  # for loading models
warnings.filterwarnings('ignore')

# --- Parameters ---
data_dir = '../data/soil-classification-part-2'
test_image_dir = os.path.join(data_dir, 'test')
test_ids_csv = os.path.join(data_dir, 'test_ids.csv')
image_size = (224, 224)
output_submission_csv = 'submission.csv'
model_save_path = '../data/oneclass_svm_model.joblib'
scaler_save_path = '../data/scaler.joblib'
pca_save_path = '../data/pca.joblib'

# --- Utility function ---
def load_and_preprocess(img_path):
    img = Image.open(img_path).convert('RGB')
    img = img.resize(image_size)
    x = np.array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x

# ===========================
# 1. Load Pre-trained Models
# ===========================
print("Loading pre-trained models...")
ocsvm = joblib.load(model_save_path)
scaler = joblib.load(scaler_save_path)
pca = joblib.load(pca_save_path)
print("Models loaded successfully.")

# ===========================
# 2. Preprocessing Test Data
# ===========================
print("Loading and preprocessing test data...")
test_ids_df = pd.read_csv(test_ids_csv)
test_image_paths = [
    os.path.join(test_image_dir, f"{img_id}")
    for img_id in test_ids_df['image_id']
]

test_images = np.vstack([load_and_preprocess(p) for p in test_image_paths])
print(f"Loaded and preprocessed {test_images.shape[0]} test images")

base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
test_features = base_model.predict(test_images, verbose=1)
print(f"Extracted test features shape: {test_features.shape}")

# ===========================
# 3. Inference
# ===========================
print("Starting inference...")
test_features_scaled = scaler.transform(test_features)
test_features_pca = pca.transform(test_features_scaled)
test_pred = ocsvm.predict(test_features_pca)

# ===========================
# 4. Postprocessing
# ===========================
print("Postprocessing predictions...")
test_ids_df['label'] = test_pred
test_ids_df['label'] = test_ids_df['label'].replace(-1, 0)
test_ids_df.to_csv(output_submission_csv, index=False)
print(f"Saved submission to '{output_submission_csv}'")
