In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import numpy as np
import pandas as pd

data = pd.read_csv("../../processed_data/data.csv", index_col=0)
data = data.drop(columns = ['id', 'source', 'coordinates', 'latitude', 'longitude' ])

# Assuming 'data' is a pandas DataFrame with features and 'price' as the target
X = data.drop('price', axis=1)
y = data['price']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for each model
param_grid = {
    'n_estimators': [i for i in range(50, 1001, 100)],
    'learning_rate': [i/100.0 for i in range(1, 100, 8)],  # Corrected range for learning rate
    'max_depth': [i for i in range(3, 16)],  # Max depth up to 15
    'min_child_weight': [i for i in range(1, 10, 2)],
    'gamma': [i/10.0 for i in range(0, 9)],  # Corrected range for gamma
    'subsample': [i/10.0 for i in range(4, 11, 1)],  # Corrected range for subsample
    'colsample_bytree': [0.4, 0.6, 0.8, 0.9, 1.0]
}

model = XGBRegressor()

grid_search = GridSearchCV(
    estimator = model,
    param_grid = param_grid,
    cv = 3,
    scoring='neg_mean_absolute_error'
)
grid_search.fit(X_train, y_train)

In [None]:
mean_absolute_error(
    grid_search.best_estimator_.predict(X_test),
    y_test
)

In [9]:
param_grid

{'n_estimators': [50,
  125,
  200,
  275,
  350,
  425,
  500,
  575,
  650,
  725,
  800,
  875,
  950],
 'learning_rate': [0.01,
  0.09,
  0.17,
  0.25,
  0.33,
  0.41,
  0.49,
  0.57,
  0.65,
  0.73,
  0.81,
  0.89,
  0.97],
 'max_depth': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
 'min_child_weight': [1, 3, 5, 7, 9],
 'gamma': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
 'subsample': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
 'colsample_bytree': [0.4, 0.6, 0.8, 0.9, 1.0]}