In [7]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor


In [8]:
def compute_metrics(true_vals, pred_vals):
    """Return RMSE and R² for given predictions."""
    rmse_val = mean_squared_error(true_vals, pred_vals, squared=False)
    r2_val = r2_score(true_vals, pred_vals)
    return rmse_val, r2_val

In [9]:
#Recorded model results
performance = {
    "Tabular Model": {
        "rmse": 88521.20,
        "r2": 0.9376
    },
    "Image Model": {
        "rmse": 549792.75,
        "r2": -1.2756156
    },
    "Fusion Model": {
        "rmse": 489662.99,
        "r2": -0.6775
    }
}

In [10]:
# Convert results to DataFrame for comparison
summary_df = pd.DataFrame.from_dict(
    performance,
    orient="index"
).reset_index()

summary_df.columns = ["Model Type", "RMSE", "R2"]

summary_df

Unnamed: 0,Model Type,RMSE,R2
0,Tabular Model,88521.2,0.9376
1,Image Model,549792.75,-1.275616
2,Fusion Model,489662.99,-0.6775


In [12]:
# Read test data
test_data = pd.read_csv("/content/drive/MyDrive/satellite-property-valuation/data/raw/test_data.csv")
test_data["id"] = test_data["id"].astype(int)

test_data.shape


(5404, 20)

In [13]:
# Columns excluded from model input
ignore_cols = ["id", "date"]

X_test_features = test_data.drop(columns=ignore_cols)

In [17]:
# Load full training set for consistent preprocessing
train_data = pd.read_csv("/content/drive/MyDrive/satellite-property-valuation/data/raw/train_data.csv")

X_train_features = train_data.drop(
    columns=["id", "date", "price"]
)

# Standardize features
scaler = StandardScaler()
scaler.fit(X_train_features)

X_test_norm = scaler.transform(X_test_features)


In [18]:
final_model = XGBRegressor(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

# Log-transform target for stability
y_train_log = np.log1p(train_data["price"])

final_model.fit(X_train_features, y_train_log)


In [19]:
# Predict in log-space
test_preds_log = final_model.predict(X_test_norm)

# Convert back to original price scale
test_preds_price = np.expm1(test_preds_log)


In [21]:
submission_df = pd.DataFrame({
    "id": test_data["id"],
    "predicted_price": test_preds_price
})

submission_df.head()

# Save predictions
submission_df.to_csv(
    "/content/drive/MyDrive/satellite-property-valuation/outputs/predictions.csv",
    index=False
)

