In [64]:
# --- Core Libraries ---
import pandas as pd
import numpy as np
import os
import torch  # The core deep learning library (PyTorch)

# Advanced Feature Generation Libraries
# For Text Embeddings
from sentence_transformers import SentenceTransformer
# For Image Embeddings
import timm
from PIL import Image
import torchvision.transforms as transforms

# --- Modeling and Utilities ---
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from tqdm.notebook import tqdm  # For beautiful progress bars

print("All necessary libraries imported successfully.")


# --- Device Configuration (Crucial for Deep Learning) ---
# This will automatically use a GPU if one is available (much faster), otherwise it will use the CPU.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

All necessary libraries imported successfully.
Using device: cpu


In [72]:
# This is our last and most powerful correction technique.

from sklearn.isotonic import IsotonicRegression

def smape(y_true, y_pred):
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    return np.mean(numerator / denominator) * 100

print("--- Step 1: Preparing the data for the Super-Corrector ---")

# 'calibrated_oof_predictions' holds the predictions from our best model (the 47% SMAPE one).
# These are the flawed predictions we will fix.
flawed_predictions_train = calibrated_oof_predictions
# 'true_prices' holds the real prices. This is our target.
true_prices_train = true_prices

print("--- Step 2: Training the 'Super-Corrector' (Isotonic Regression) ---")
# The Isotonic Regression model will learn how to map our flawed predictions to the true prices.
iso_corrector = IsotonicRegression(out_of_bounds="clip")
iso_corrector.fit(flawed_predictions_train, true_prices_train)

print(" Super-Corrector has been trained.")


# THE MOMENT OF TRUTH - Verify the Final SMAPE ---
print("\n--- Applying the Super-Corrector to our training predictions ---")
# Use the trained Super-Corrector to make final, perfectly calibrated predictions.
final_oof_predictions = iso_corrector.predict(flawed_predictions_train)

# Calculate the SMAPE of these new, corrected predictions.
final_isotonic_smape = smape(true_prices_train, final_oof_predictions)

print("\n-------------------------------------------------")
print(f"THE FINAL ISOTONIC-CALIBRATED SMAPE SCORE IS: {final_isotonic_smape:.4f}%")
print("-------------------------------------------------")


--- Step 1: Preparing the data for the Super-Corrector ---
--- Step 2: Training the 'Super-Corrector' (Isotonic Regression) ---
 Super-Corrector has been trained.

--- Applying the Super-Corrector to our training predictions ---

-------------------------------------------------
THE FINAL ISOTONIC-CALIBRATED SMAPE SCORE IS: 43.2366%
-------------------------------------------------


In [74]:
print("\n--- Generating the final submission file with the Super-Corrector ---")

# 'calibrated_predictions' holds the flawed TEST set predictions from our 47% model.
flawed_predictions_test = calibrated_predictions

# Use our trained Super-Corrector to make the final, corrected predictions on the test set.
final_predictions = iso_corrector.predict(flawed_predictions_test)
final_predictions = np.clip(final_predictions, 0.01, 4000)  # Final safety clip

# Create the submission file
submission_df = pd.DataFrame({'sample_id': test_df['sample_id'], 'price': final_predictions})

# Correct file name syntax
SUBMISSION_PATH = os.path.join(PROJECT_ROOT, "test_out.csv")
submission_df.to_csv(SUBMISSION_PATH, index=False)

print(f"\n FINAL SUBMISSION created: test_out.csv")
print("This is the result of the final calibration. This is the file to submit.")
display(submission_df.head())



--- Generating the final submission file with the Super-Corrector ---

 FINAL SUBMISSION created: test_out.csv
This is the result of the final calibration. This is the file to submit.


Unnamed: 0,sample_id,price
75000,100179,31.286661
75001,245611,44.360311
75002,146263,48.616726
75003,95658,17.750664
75004,36806,48.616726


In [75]:
import joblib
import os

MODELS_DIR = os.path.join(PROJECT_ROOT, "models")
os.makedirs(MODELS_DIR, exist_ok=True) 

print("--- Saving all trained models ---")

# Save the specialist models
joblib.dump(model_cheap, os.path.join(MODELS_DIR, "model_cheap_specialist.pkl"))
joblib.dump(model_expensive, os.path.join(MODELS_DIR, "model_expensive_specialist.pkl"))
print(" Specialist models saved.")

# Save the final "Super-Corrector"
joblib.dump(iso_corrector, os.path.join(MODELS_DIR, "iso_corrector.pkl"))
print(" Isotonic Calibration model saved.")

# We can also save the original router model (the first Super-Text model)
# We need to retrain it quickly outside the loop
print("\n--- Saving the router model ---")
# This is the model from our first super-text attempt
router_model = lgb.LGBMRegressor(objective='regression_l1', metric='rmse', n_estimators=2000, learning_rate=0.01)
router_model.fit(X, y) # Retrain on all data
joblib.dump(router_model, os.path.join(MODELS_DIR, "router_log_price_model.pkl"))
print("Router model saved.")

print("\n\n--- All models have been saved to the '/models' directory. ---")

--- Saving all trained models ---
 Specialist models saved.
 Isotonic Calibration model saved.

--- Saving the router model ---
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.100972 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 101632
[LightGBM] [Info] Number of data points in the train set: 75000, number of used features: 400
[LightGBM] [Info] Start training from score 2.708050
Router model saved.


--- All models have been saved to the '/models' directory. ---
