In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = pd.read_csv('../Data/imputed_data_7.csv')

X = data.drop(columns=['Price']).values
y = data['Price'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)

params = {
    "n_estimators": [100, 200, 300, 400, 500],
    "max_depth": [None, 5, 10, 20],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "max_features": [None, "sqrt", "log2"]
}

rf = RandomForestRegressor(random_state=13)
grid_search = GridSearchCV(rf, params, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_

rf_best = RandomForestRegressor(**best_params, random_state=13)

rf_best.fit(X_train, y_train)

y_pred = rf_best.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5

mae = mean_absolute_error(y_test, y_pred)

r_squared = r2_score(y_test, y_pred)

print("Best hyperparameters:", best_params)

Best hyperparameters: {'max_depth': 10, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

data = pd.read_csv('../Data/imputed_data_7.csv')
X = data.drop(columns=['Price']).values
y = data['Price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)

best_params = {
    'max_depth': None,
    'max_features': None,
    'min_samples_leaf': 1,
    'min_samples_split': 2,
    'n_estimators': 500
}

rf_best = RandomForestRegressor(**best_params, random_state=13)

cv_scores = cross_val_score(rf_best, X, y, cv=5, scoring='neg_mean_squared_error')
cv_rmse = (-cv_scores) ** 0.5

rf_best.fit(X_train, y_train)
y_pred = rf_best.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
mae = mean_absolute_error(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)

cv_scores_r2 = cross_val_score(rf_best, X, y, cv=5, scoring='r2')
cv_r2 = cv_scores_r2.mean()
cv_scores_mae = cross_val_score(rf_best, X, y, cv=5, scoring='neg_mean_absolute_error')
cv_mae = -cv_scores_mae.mean()

print("Best hyperparameters:", best_params)
print("Cross-validated RMSE:", cv_rmse.mean())
print("Mean Cross-validated MAE:", cv_mae)
print("Mean R-squared from cross-validation:", cv_r2)
print("MSE:", mse)

Best hyperparameters: {'max_depth': None, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 500}
Cross-validated RMSE: 34.40951252269133
Mean Cross-validated MAE: 25.788806371203588
Mean R-squared from cross-validation: 0.87539644614997
MSE: 1052.3284739703015
