In [None]:
import pandas as pd
import numpy as np
from custom_xgoost.xgboost_genetic_algorithm_split import CustomXGBoost
from sklearn.metrics import r2_score

df_train = pd.read_csv("data/kc_house_data.csv")
df_test = pd.read_csv("data/unseen_kc_house_data.csv")

df_train = df_train.apply(pd.to_numeric, errors='coerce').dropna(axis=1, how='all')
df_test = df_test.apply(pd.to_numeric, errors='coerce').dropna(axis=1, how='all')

y_train = df_train["price"].values
X_train = df_train.drop(columns=["price"]).values

y_test = df_test["price"].values
X_test = df_test.drop(columns=["price"]).values

model = CustomXGBoost(n_estimators=10, learning_rate=0.1, max_depth=3, min_samples_split=5)
model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

train_mse = np.mean((y_train - y_train_pred) ** 2)
test_mse = np.mean((y_test - y_test_pred) ** 2)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print("Train MSE:", train_mse)
print("Test MSE:", test_mse)
print()
print("Train RMSE:", train_rmse)
print("Test RMSE:", test_rmse)
print()
print("Train R²:", train_r2)
print("Test R²:", test_r2)
