In [2]:
import pandas as pd
import numpy as np
from custom_xgoost.xgboost_simulated_annealing import CustomXGBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

df = pd.read_csv("data/ames.csv")
df = df.apply(pd.to_numeric, errors='coerce').dropna(axis=1, how='all')

y = df["SalePrice"].values
X = df.drop(columns=["SalePrice"]).values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb = CustomXGBoostRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    initial_temp=1.0,
    cooling_rate=0.95,
    max_iter=50
)
xgb.fit(X_train, y_train)

y_train_pred = xgb.predict(X_train)
y_test_pred = xgb.predict(X_test)

train_mse = np.mean((y_train - y_train_pred) ** 2)
test_mse = np.mean((y_test - y_test_pred) ** 2)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print("Train MSE:", train_mse)
print("Test MSE:", test_mse)
print()
print("Train RMSE:", train_rmse)
print("Test RMSE:", test_rmse)
print()
print("Train R²:", train_r2)
print("Test R²:", test_r2)


Train MSE: 1392578549.992827
Test MSE: 3481120269.8807697

Train RMSE: 37317.26879063937
Test RMSE: 59001.01922747411

Train R²: 0.9895883896573285
Test R²: 0.9750199784686737
