In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, root_mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('rentprediction_dataset_v5.csv')

y = df['rent']
X = df.drop(columns={'rent'})

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

rfr = RandomForestRegressor(random_state=42)
rfr.fit(X_train, y_train)
y_pred = rfr.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)

print(f'Selected Features R-squared value: {r2}')
print(f'Selected Features Mean Squared Error: {mse}')
print(f'Selected Features Mean Squared Error: {rmse}')

Selected Features R-squared value: 0.7788159270396624
Selected Features Mean Squared Error: 340247.8664887005
Selected Features Mean Squared Error: 583.3076945221111


In [8]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, root_mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import RandomizedSearchCV


df = pd.read_csv('rentprediction_dataset_v5.csv')

y = df['rent']
X = df.drop(columns={'rent'})

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

rfr = RandomForestRegressor(random_state=42)

param_distributions = {
    'n_estimators': np.arange(100, 1001, 100),
    'max_features': ['sqrt', 'log2', None],
    'max_depth': np.arange(10, 101, 10),
    'min_samples_split': np.arange(2, 11),
    'min_samples_leaf': np.arange(1, 11),
    'bootstrap': [True, False]
}

random_search = RandomizedSearchCV(
    estimator=rfr,
    param_distributions=param_distributions,
    n_iter=100,  # Number of parameter settings sampled
    cv=5,        # 5-fold cross-validation
    verbose=2,
    random_state=42,
    n_jobs=-1    # Use all available cores
)

random_search.fit(X_train, y_train)
y_pred = random_search.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)

print(f'Selected Features R-squared value: {r2}')
print(f'Selected Features Mean Squared Error: {mse}')
print(f'Selected Features Mean Squared Error: {rmse}')

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=10, min_samples_split=7, n_estimators=100; total time=   0.7s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=10, min_samples_split=7, n_estimators=100; total time=   0.7s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=10, min_samples_split=7, n_estimators=100; total time=   0.8s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=10, min_samples_split=7, n_estimators=100; total time=   0.7s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=10, min_samples_split=7, n_estimators=100; total time=   0.7s
[CV] END bootstrap=False, max_depth=70, max_features=log2, min_samples_leaf=8, min_samples_split=2, n_estimators=300; total time=   1.8s
[CV] END bootstrap=False, max_depth=70, max_features=log2, min_samples_leaf=8, min_samples_split=2, n_estimators=30

In [10]:
best_params = random_search.best_params_
best_model = random_search.best_estimator_

# Print the best parameters
print("Best Hyperparameters:", best_params)

# Optionally, print the best model
print("Best Model:", best_model)

Best Hyperparameters: {'n_estimators': 1000, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': 10, 'bootstrap': True}
Best Model: RandomForestRegressor(max_depth=10, max_features=None, min_samples_leaf=2,
                      n_estimators=1000, random_state=42)
