# ADS Capstone Project: Airbnb Pricing Prediction
#### By Renetta Nelson, Michael Nguyen and Jacqueline Urenda

# Modeling

In [13]:
#importing libraries

import pandas as pd
import numpy as np
import random
import xgboost
from xgboost import XGBRegressor


from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score
from tabulate import tabulate

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

import warnings
warnings.filterwarnings('ignore')

In [2]:
%store -r X_train
%store -r X_test
%store -r y_train
%store -r y_test


## Decision Tree Modeling

#### Setting baseline decision tree model

In [3]:
#fitting baseline regressor

DCTR = DecisionTreeRegressor()
DCTR.fit(X_train,y_train)


#### Evaluating performance of baseline model

In [4]:
y_pred = DCTR.predict(X_test)

In [5]:
y_pred_train = DCTR.predict(X_train)

In [6]:
MSE = mean_squared_error(y_test, y_pred)
RMSE = mean_squared_error(y_test, y_pred, squared = False)
R2 = r2_score(y_test, y_pred)
MAPE = mean_absolute_percentage_error(y_test, y_pred)


n = len(y_test)
k = len(X_test.columns)
Adjusted_R2 = 1-(((1-R2)*(n-1))/(n-k-1))

In [7]:
DCTR_Results = [['MSE', 'RMSE', 'R2', 'Adjusted_R2', 'MAPE'], 
         [MSE, RMSE, R2, Adjusted_R2, MAPE]]

print(tabulate(DCTR_Results))

-----------------  ------------------  ------------------  ------------------  ------------------
MSE                RMSE                R2                  Adjusted_R2         MAPE
66934.93773981836  258.71787286505423  0.9620149641003602  0.9620139857349796  0.1828629788219595
-----------------  ------------------  ------------------  ------------------  ------------------


## XGBoost Rgression Model

###

In [10]:
import xgboost
from xgboost import XGBRegressor

In [13]:
XGBR = XGBRegressor()

XGBR.fit(X_train, y_train)

In [14]:
y_pred_XGB = XGBR.predict(X_test)

In [17]:
XGB_MSE = mean_squared_error(y_test, y_pred_XGB)
XGB_RMSE = mean_squared_error(y_test, y_pred_XGB, squared = False)
XGB_R2 = r2_score(y_test, y_pred_XGB)
XGB_MAPE = mean_absolute_percentage_error(y_test, y_pred_XGB)


n = len(y_test)
k = len(X_test.columns)
XGB_Adjusted_R2 = 1-(((1-R2)*(n-1))/(n-k-1))

In [18]:
XGB_Results = [['MSE', 'RMSE', 'R2', 'Adjusted_R2', 'MAPE'], 
         [XGB_MSE, XGB_RMSE,XGB_R2, XGB_Adjusted_R2, XGB_MAPE]]

print(tabulate(XGB_Results))

----------------  -----------------  ------------------  ------------------  ------------------
MSE               RMSE               R2                  Adjusted_R2         MAPE
72983.4207401773  270.1544386830935  0.9585824989085637  0.9620139857349796  0.3090160273277967
----------------  -----------------  ------------------  ------------------  ------------------


## Random Forrest Model

In [3]:
#convert 
sample_indices = np.random.choice(X_train.shape[0], 5000, replace=False)
X_train_sample = X_train.iloc[sample_indices, :]
y_train_sample = y_train.iloc[sample_indices]

#### Fitting baseline model:

In [4]:
#fit baseline model
baseline_rf = RandomForestRegressor(random_state=42)
baseline_rf.fit(X_train_sample, y_train_sample)

In [5]:
#importance
importances = baseline_rf.feature_importances_

In [6]:
#sort by importance(10)
indices = np.argsort(importances)[-10:]
X_train_reduced = X_train.iloc[:, indices]

In [7]:
top_features = X_train.columns[indices]

# Select top features from the sample
X_train_sample_reduced = X_train_sample[top_features]

#### Setting up hyperparemeters

In [8]:
#set up hyperparameters
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
max_features = ['auto', 'sqrt']
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth}

In [9]:
rf = RandomForestRegressor(random_state=42)

In [10]:
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid, n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)

In [14]:
#fit random search
rf_random.fit(X_train_sample_reduced, y_train_sample)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


  warn(
  warn(
  warn(
  warn(


[CV] END .max_depth=10, max_features=auto, n_estimators=1400; total time=  27.5s


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=1000; total time=  32.0s
[CV] END .max_depth=20, max_features=auto, n_estimators=1000; total time=  32.0s


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=1000; total time=  33.2s
[CV] END ..max_depth=50, max_features=sqrt, n_estimators=800; total time=  15.6s
[CV] END ..max_depth=50, max_features=sqrt, n_estimators=800; total time=  15.3s


  warn(


[CV] END .max_depth=10, max_features=auto, n_estimators=1400; total time=  32.8s


  warn(


[CV] END ..max_depth=50, max_features=sqrt, n_estimators=800; total time=  13.4s


  warn(


[CV] END .max_depth=10, max_features=auto, n_estimators=1400; total time=  33.3s


  warn(


[CV] END .max_depth=60, max_features=auto, n_estimators=1000; total time=  39.6s


  warn(


[CV] END .max_depth=60, max_features=auto, n_estimators=2000; total time= 1.3min


  warn(


[CV] END .max_depth=60, max_features=auto, n_estimators=2000; total time= 1.3min
[CV] END .max_depth=60, max_features=auto, n_estimators=2000; total time= 1.3min
[CV] END .max_depth=60, max_features=auto, n_estimators=1000; total time=  40.7s
[CV] END ..max_depth=90, max_features=sqrt, n_estimators=600; total time=  10.5s
[CV] END ..max_depth=90, max_features=sqrt, n_estimators=600; total time=  10.5s
[CV] END ..max_depth=90, max_features=sqrt, n_estimators=600; total time=  10.5s
[CV] END max_depth=None, max_features=sqrt, n_estimators=800; total time=  13.5s


  warn(


[CV] END max_depth=None, max_features=sqrt, n_estimators=800; total time=  13.5s


  warn(


[CV] END .max_depth=60, max_features=auto, n_estimators=1000; total time=  38.8s


  warn(


[CV] END max_depth=None, max_features=sqrt, n_estimators=800; total time=  13.8s


  warn(


[CV] END .max_depth=10, max_features=auto, n_estimators=2000; total time=  43.2s


  warn(


[CV] END .max_depth=50, max_features=auto, n_estimators=1400; total time=  51.3s


  warn(


[CV] END .max_depth=50, max_features=auto, n_estimators=1400; total time=  51.4s


  warn(


[CV] END .max_depth=50, max_features=auto, n_estimators=1400; total time=  51.7s


  warn(


[CV] END ..max_depth=80, max_features=auto, n_estimators=800; total time=  29.0s


  warn(


[CV] END ..max_depth=80, max_features=auto, n_estimators=800; total time=  29.1s


  warn(


[CV] END .max_depth=10, max_features=auto, n_estimators=2000; total time=  43.3s


  warn(


[CV] END .max_depth=10, max_features=auto, n_estimators=2000; total time=  43.0s


  warn(


[CV] END ..max_depth=80, max_features=auto, n_estimators=800; total time=  29.2s


  warn(


[CV] END .max_depth=30, max_features=auto, n_estimators=1200; total time=  44.1s


  warn(


[CV] END .max_depth=30, max_features=auto, n_estimators=1200; total time=  45.1s


  warn(


[CV] END .max_depth=30, max_features=auto, n_estimators=1200; total time=  45.3s
[CV] END .max_depth=60, max_features=sqrt, n_estimators=1000; total time=  16.3s
[CV] END max_depth=110, max_features=auto, n_estimators=1400; total time=  52.2s
[CV] END .max_depth=60, max_features=sqrt, n_estimators=1000; total time=  16.3s


  warn(


[CV] END max_depth=110, max_features=auto, n_estimators=1400; total time=  52.0s


  warn(


[CV] END .max_depth=60, max_features=sqrt, n_estimators=1000; total time=  16.7s


  warn(


[CV] END max_depth=110, max_features=auto, n_estimators=1400; total time=  52.3s
[CV] END ..max_depth=60, max_features=sqrt, n_estimators=800; total time=  13.1s
[CV] END ..max_depth=60, max_features=sqrt, n_estimators=800; total time=  13.1s
[CV] END ..max_depth=60, max_features=sqrt, n_estimators=800; total time=  13.4s
[CV] END max_depth=None, max_features=sqrt, n_estimators=200; total time=   3.3s
[CV] END max_depth=None, max_features=sqrt, n_estimators=200; total time=   3.6s
[CV] END max_depth=None, max_features=sqrt, n_estimators=200; total time=   3.5s


  warn(


[CV] END .max_depth=90, max_features=auto, n_estimators=1800; total time= 1.1min


  warn(


[CV] END .max_depth=90, max_features=auto, n_estimators=1800; total time= 1.1min


  warn(


[CV] END .max_depth=90, max_features=auto, n_estimators=1800; total time= 1.1min
[CV] END .max_depth=10, max_features=sqrt, n_estimators=1200; total time=  12.2s
[CV] END .max_depth=10, max_features=sqrt, n_estimators=1200; total time=  14.5s
[CV] END max_depth=100, max_features=auto, n_estimators=1200; total time=  48.6s
[CV] END .max_depth=10, max_features=sqrt, n_estimators=1200; total time=  14.3s
[CV] END max_depth=100, max_features=auto, n_estimators=1200; total time=  48.8s
[CV] END max_depth=100, max_features=auto, n_estimators=1200; total time=  48.9s


  warn(


[CV] END ..max_depth=80, max_features=sqrt, n_estimators=600; total time=   9.8s


  warn(


[CV] END ..max_depth=80, max_features=sqrt, n_estimators=600; total time=  10.0s
[CV] END ..max_depth=80, max_features=sqrt, n_estimators=600; total time=  10.0s


  warn(
  warn(


[CV] END .max_depth=70, max_features=auto, n_estimators=1200; total time=  49.8s


  warn(


[CV] END .max_depth=70, max_features=auto, n_estimators=1200; total time=  50.7s


  warn(


[CV] END .max_depth=70, max_features=auto, n_estimators=1200; total time=  51.0s
[CV] END .max_depth=70, max_features=auto, n_estimators=1600; total time= 1.1min
[CV] END .max_depth=10, max_features=sqrt, n_estimators=2000; total time=  19.9s
[CV] END .max_depth=10, max_features=sqrt, n_estimators=2000; total time=  19.8s
[CV] END .max_depth=10, max_features=sqrt, n_estimators=2000; total time=  20.1s
[CV] END max_depth=None, max_features=sqrt, n_estimators=1200; total time=  19.6s
[CV] END .max_depth=70, max_features=auto, n_estimators=1600; total time=  58.1s
[CV] END ..max_depth=20, max_features=sqrt, n_estimators=200; total time=   3.1s
[CV] END .max_depth=70, max_features=auto, n_estimators=1600; total time=  58.5s
[CV] END max_depth=None, max_features=sqrt, n_estimators=1200; total time=  19.7s
[CV] END ..max_depth=20, max_features=sqrt, n_estimators=200; total time=   3.2s
[CV] END ..max_depth=20, max_features=sqrt, n_estimators=200; total time=   3.2s
[CV] END max_depth=None, m

  warn(


[CV] END ..max_depth=10, max_features=sqrt, n_estimators=200; total time=   2.2s
[CV] END ..max_depth=10, max_features=sqrt, n_estimators=200; total time=   2.2s


  warn(
  warn(


[CV] END ..max_depth=10, max_features=sqrt, n_estimators=200; total time=   2.2s
[CV] END .max_depth=110, max_features=auto, n_estimators=400; total time=  14.4s
[CV] END .max_depth=110, max_features=auto, n_estimators=400; total time=  14.3s
[CV] END .max_depth=110, max_features=auto, n_estimators=400; total time=  14.6s
[CV] END max_depth=None, max_features=sqrt, n_estimators=1600; total time=  25.6s
[CV] END ..max_depth=80, max_features=sqrt, n_estimators=800; total time=  13.0s
[CV] END ..max_depth=80, max_features=sqrt, n_estimators=800; total time=  13.5s
[CV] END max_depth=None, max_features=sqrt, n_estimators=1600; total time=  26.6s
[CV] END max_depth=None, max_features=sqrt, n_estimators=1600; total time=  26.6s
[CV] END ..max_depth=80, max_features=sqrt, n_estimators=800; total time=  13.6s


  warn(


[CV] END .max_depth=50, max_features=sqrt, n_estimators=1400; total time=  31.3s


  warn(


[CV] END .max_depth=50, max_features=sqrt, n_estimators=1400; total time=  31.5s


  warn(


[CV] END .max_depth=50, max_features=sqrt, n_estimators=1400; total time=  31.4s


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=1200; total time=  40.9s


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=1800; total time= 1.2min


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=1800; total time= 1.1min


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=1800; total time= 1.1min


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=1200; total time=  49.1s


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=1200; total time=  49.6s
[CV] END .max_depth=100, max_features=auto, n_estimators=600; total time=  27.7s
[CV] END .max_depth=100, max_features=auto, n_estimators=600; total time=  27.6s
[CV] END .max_depth=10, max_features=sqrt, n_estimators=1800; total time=  20.0s


  warn(


[CV] END .max_depth=10, max_features=sqrt, n_estimators=1800; total time=  20.0s


  warn(


[CV] END .max_depth=10, max_features=sqrt, n_estimators=1800; total time=  20.0s


  warn(


[CV] END .max_depth=100, max_features=auto, n_estimators=600; total time=  23.8s
[CV] END .max_depth=100, max_features=sqrt, n_estimators=600; total time=   9.7s
[CV] END .max_depth=100, max_features=sqrt, n_estimators=600; total time=   9.8s
[CV] END .max_depth=100, max_features=sqrt, n_estimators=600; total time=   9.6s
[CV] END .max_depth=30, max_features=sqrt, n_estimators=1200; total time=  20.2s
[CV] END .max_depth=30, max_features=sqrt, n_estimators=1200; total time=  19.9s
[CV] END .max_depth=40, max_features=auto, n_estimators=2000; total time= 1.2min
[CV] END .max_depth=40, max_features=auto, n_estimators=2000; total time= 1.2min
[CV] END .max_depth=40, max_features=auto, n_estimators=2000; total time= 1.3min
[CV] END .max_depth=30, max_features=sqrt, n_estimators=1200; total time=  20.2s
[CV] END .max_depth=40, max_features=sqrt, n_estimators=2000; total time=  33.4s
[CV] END .max_depth=40, max_features=sqrt, n_estimators=2000; total time=  33.1s
[CV] END .max_depth=40, max_

  warn(


[CV] END .max_depth=90, max_features=sqrt, n_estimators=1200; total time=  19.8s


  warn(


[CV] END .max_depth=90, max_features=sqrt, n_estimators=1200; total time=  20.2s


  warn(


[CV] END .max_depth=90, max_features=sqrt, n_estimators=1200; total time=  20.2s
[CV] END ..max_depth=60, max_features=sqrt, n_estimators=600; total time=  12.2s
[CV] END ..max_depth=60, max_features=sqrt, n_estimators=600; total time=  10.2s
[CV] END ..max_depth=60, max_features=sqrt, n_estimators=600; total time=   9.8s


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=1600; total time= 1.0min


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=1600; total time= 1.0min


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=400; total time=  14.8s
[CV] END max_depth=None, max_features=auto, n_estimators=400; total time=  14.9s
[CV] END max_depth=None, max_features=auto, n_estimators=400; total time=  14.8s
[CV] END max_depth=None, max_features=auto, n_estimators=1600; total time= 1.0min
[CV] END .max_depth=10, max_features=sqrt, n_estimators=1400; total time=  14.3s
[CV] END .max_depth=10, max_features=sqrt, n_estimators=1400; total time=  14.5s
[CV] END .max_depth=10, max_features=sqrt, n_estimators=1400; total time=  14.4s


  warn(


[CV] END .max_depth=80, max_features=sqrt, n_estimators=1400; total time=  23.0s


  warn(


[CV] END .max_depth=80, max_features=sqrt, n_estimators=1400; total time=  23.4s


  warn(


[CV] END .max_depth=80, max_features=sqrt, n_estimators=1400; total time=  23.0s


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1400; total time=  50.1s


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1400; total time=  49.9s


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1400; total time=  50.0s
[CV] END .max_depth=90, max_features=auto, n_estimators=1200; total time=  42.9s
[CV] END .max_depth=20, max_features=sqrt, n_estimators=1800; total time=  27.4s
[CV] END .max_depth=20, max_features=sqrt, n_estimators=1800; total time=  27.5s
[CV] END .max_depth=90, max_features=auto, n_estimators=1200; total time=  42.7s
[CV] END .max_depth=90, max_features=auto, n_estimators=1200; total time=  43.1s
[CV] END .max_depth=20, max_features=sqrt, n_estimators=1800; total time=  27.8s


  warn(


[CV] END ..max_depth=40, max_features=auto, n_estimators=200; total time=   7.2s


  warn(


[CV] END .max_depth=70, max_features=sqrt, n_estimators=2000; total time=  31.9s


  warn(


[CV] END .max_depth=70, max_features=sqrt, n_estimators=2000; total time=  32.0s


  warn(


[CV] END ..max_depth=40, max_features=auto, n_estimators=200; total time=   7.2s


  warn(


[CV] END ..max_depth=40, max_features=auto, n_estimators=200; total time=   7.4s


  warn(


[CV] END .max_depth=70, max_features=sqrt, n_estimators=2000; total time=  32.0s


  warn(


[CV] END max_depth=110, max_features=auto, n_estimators=1000; total time=  35.6s


  warn(


[CV] END max_depth=110, max_features=auto, n_estimators=1000; total time=  35.6s


  warn(


[CV] END max_depth=110, max_features=auto, n_estimators=1000; total time=  35.9s


  warn(


[CV] END max_depth=100, max_features=auto, n_estimators=1400; total time=  49.5s


  warn(


[CV] END max_depth=100, max_features=auto, n_estimators=1400; total time=  49.2s


  warn(


[CV] END max_depth=100, max_features=auto, n_estimators=1400; total time=  49.6s
[CV] END max_depth=110, max_features=auto, n_estimators=1600; total time=  56.4s
[CV] END .max_depth=100, max_features=sqrt, n_estimators=800; total time=  12.7s
[CV] END .max_depth=100, max_features=sqrt, n_estimators=800; total time=  12.9s


  warn(


[CV] END max_depth=110, max_features=auto, n_estimators=1600; total time=  56.7s


  warn(


[CV] END .max_depth=100, max_features=sqrt, n_estimators=800; total time=  12.8s


  warn(


[CV] END max_depth=110, max_features=auto, n_estimators=1600; total time=  57.2s


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=1000; total time=  35.4s


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=1000; total time=  35.6s


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=1000; total time=  35.9s
[CV] END .max_depth=60, max_features=sqrt, n_estimators=1200; total time=  18.8s
[CV] END .max_depth=60, max_features=sqrt, n_estimators=1200; total time=  18.9s
[CV] END .max_depth=60, max_features=sqrt, n_estimators=1200; total time=  19.3s
[CV] END max_depth=110, max_features=auto, n_estimators=2000; total time= 1.2min
[CV] END max_depth=110, max_features=auto, n_estimators=2000; total time= 1.2min
[CV] END max_depth=110, max_features=auto, n_estimators=2000; total time= 1.2min
[CV] END ..max_depth=40, max_features=sqrt, n_estimators=800; total time=  12.8s
[CV] END max_depth=110, max_features=sqrt, n_estimators=2000; total time=  31.9s
[CV] END max_depth=110, max_features=sqrt, n_estimators=2000; total time=  31.8s
[CV] END ..max_depth=40, max_features=sqrt, n_estimators=800; total time=  12.8s
[CV] END max_depth=110, max_features=sqrt, n_estimators=2000; total time=  32.0s
[CV] END ..max_depth=40, ma

  warn(


[CV] END .max_depth=50, max_features=sqrt, n_estimators=1200; total time=  19.4s


  warn(


[CV] END .max_depth=50, max_features=sqrt, n_estimators=1200; total time=  19.5s


  warn(


[CV] END .max_depth=50, max_features=sqrt, n_estimators=1200; total time=  19.5s


  warn(


[CV] END ..max_depth=70, max_features=auto, n_estimators=200; total time=   7.0s


  warn(


[CV] END ..max_depth=70, max_features=auto, n_estimators=200; total time=   7.1s


  warn(


[CV] END ..max_depth=70, max_features=auto, n_estimators=200; total time=   8.4s


  warn(


[CV] END .max_depth=70, max_features=auto, n_estimators=1400; total time=  57.0s


  warn(


[CV] END .max_depth=70, max_features=auto, n_estimators=1400; total time=  56.9s


  warn(


[CV] END .max_depth=70, max_features=auto, n_estimators=1400; total time=  58.4s


  warn(


[CV] END ..max_depth=60, max_features=auto, n_estimators=400; total time=  14.5s


  warn(


[CV] END ..max_depth=60, max_features=auto, n_estimators=400; total time=  14.3s


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1600; total time= 1.1min


  warn(


[CV] END ..max_depth=60, max_features=auto, n_estimators=400; total time=  14.4s


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1600; total time=  59.0s


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1600; total time=  59.6s


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1800; total time= 1.1min


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1800; total time= 1.1min


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1800; total time= 1.1min
[CV] END .max_depth=60, max_features=auto, n_estimators=1800; total time= 1.1min


  warn(
  warn(


[CV] END .max_depth=50, max_features=auto, n_estimators=1000; total time=  39.1s


  warn(


[CV] END .max_depth=50, max_features=auto, n_estimators=1000; total time=  39.3s
[CV] END .max_depth=60, max_features=auto, n_estimators=1800; total time= 1.2min
[CV] END .max_depth=60, max_features=sqrt, n_estimators=1600; total time=  26.2s
[CV] END .max_depth=60, max_features=auto, n_estimators=1800; total time= 1.2min
[CV] END .max_depth=50, max_features=auto, n_estimators=1000; total time=  37.4s
[CV] END .max_depth=60, max_features=sqrt, n_estimators=1600; total time=  26.7s
[CV] END max_depth=100, max_features=sqrt, n_estimators=1000; total time=  16.0s
[CV] END .max_depth=60, max_features=sqrt, n_estimators=1600; total time=  26.1s
[CV] END max_depth=100, max_features=sqrt, n_estimators=1000; total time=  16.2s
[CV] END max_depth=100, max_features=sqrt, n_estimators=1000; total time=  16.2s
[CV] END ..max_depth=60, max_features=sqrt, n_estimators=400; total time=   6.4s
[CV] END .max_depth=90, max_features=sqrt, n_estimators=1600; total time=  26.0s
[CV] END ..max_depth=60, max

  warn(


[CV] END ..max_depth=60, max_features=sqrt, n_estimators=400; total time=   6.6s


  warn(


[CV] END .max_depth=90, max_features=sqrt, n_estimators=1600; total time=  26.1s


  warn(


[CV] END .max_depth=90, max_features=sqrt, n_estimators=1600; total time=  26.3s


  warn(


[CV] END .max_depth=70, max_features=auto, n_estimators=1000; total time=  36.4s


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=2000; total time= 1.3min


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=2000; total time= 1.3min
[CV] END .max_depth=20, max_features=auto, n_estimators=2000; total time= 1.3min
[CV] END .max_depth=70, max_features=auto, n_estimators=1000; total time=  42.9s
[CV] END .max_depth=40, max_features=sqrt, n_estimators=1200; total time=  19.2s


  warn(


[CV] END .max_depth=40, max_features=sqrt, n_estimators=1200; total time=  19.2s


  warn(


[CV] END .max_depth=40, max_features=sqrt, n_estimators=1200; total time=  19.7s


  warn(


[CV] END .max_depth=70, max_features=auto, n_estimators=1000; total time=  37.0s


  warn(


[CV] END ..max_depth=50, max_features=auto, n_estimators=600; total time=  22.1s


  warn(


[CV] END ..max_depth=50, max_features=auto, n_estimators=600; total time=  22.2s


  warn(


[CV] END ..max_depth=50, max_features=auto, n_estimators=600; total time=  22.0s
[CV] END .max_depth=10, max_features=auto, n_estimators=1200; total time=  25.4s
[CV] END .max_depth=10, max_features=auto, n_estimators=1200; total time=  25.4s
[CV] END .max_depth=10, max_features=auto, n_estimators=1200; total time=  25.4s


  warn(


[CV] END .max_depth=30, max_features=sqrt, n_estimators=1400; total time=  22.4s


  warn(


[CV] END .max_depth=30, max_features=sqrt, n_estimators=1400; total time=  22.6s


  warn(


[CV] END .max_depth=30, max_features=sqrt, n_estimators=1400; total time=  22.5s
[CV] END .max_depth=90, max_features=auto, n_estimators=1000; total time=  35.8s
[CV] END .max_depth=90, max_features=auto, n_estimators=1000; total time=  36.0s
[CV] END .max_depth=90, max_features=auto, n_estimators=1000; total time=  36.2s
[CV] END .max_depth=80, max_features=sqrt, n_estimators=2000; total time=  32.2s
[CV] END .max_depth=80, max_features=sqrt, n_estimators=2000; total time=  32.7s
[CV] END .max_depth=80, max_features=sqrt, n_estimators=2000; total time=  34.0s


  warn(


[CV] END .max_depth=50, max_features=sqrt, n_estimators=1600; total time=  28.1s


  warn(


[CV] END .max_depth=50, max_features=sqrt, n_estimators=1600; total time=  28.7s


  warn(


[CV] END .max_depth=50, max_features=sqrt, n_estimators=1600; total time=  31.7s
[CV] END .max_depth=110, max_features=sqrt, n_estimators=600; total time=   9.8s
[CV] END .max_depth=40, max_features=auto, n_estimators=1200; total time=  49.1s
[CV] END .max_depth=110, max_features=sqrt, n_estimators=600; total time=  10.1s


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1200; total time=  48.6s


  warn(


[CV] END .max_depth=40, max_features=auto, n_estimators=1200; total time=  48.4s
[CV] END .max_depth=110, max_features=sqrt, n_estimators=600; total time=  10.3s


  warn(
  warn(


[CV] END ..max_depth=80, max_features=auto, n_estimators=600; total time=  21.7s


  warn(


[CV] END ..max_depth=80, max_features=auto, n_estimators=600; total time=  21.4s


  warn(


[CV] END max_depth=110, max_features=auto, n_estimators=1800; total time= 1.1min
[CV] END max_depth=110, max_features=auto, n_estimators=1800; total time= 1.1min
[CV] END ..max_depth=20, max_features=sqrt, n_estimators=400; total time=   6.3s
[CV] END ..max_depth=80, max_features=auto, n_estimators=600; total time=  22.5s
[CV] END max_depth=110, max_features=auto, n_estimators=1800; total time= 1.1min
[CV] END ..max_depth=20, max_features=sqrt, n_estimators=400; total time=   6.6s
[CV] END ..max_depth=20, max_features=sqrt, n_estimators=400; total time=   6.7s
[CV] END ..max_depth=10, max_features=sqrt, n_estimators=600; total time=   6.1s
[CV] END ..max_depth=10, max_features=sqrt, n_estimators=600; total time=   6.2s
[CV] END ..max_depth=10, max_features=sqrt, n_estimators=600; total time=   6.2s


  warn(


[CV] END .max_depth=20, max_features=sqrt, n_estimators=1200; total time=  18.6s


  warn(


[CV] END .max_depth=20, max_features=sqrt, n_estimators=1200; total time=  18.7s
[CV] END .max_depth=20, max_features=sqrt, n_estimators=1200; total time=  18.8s


  warn(
  warn(


[CV] END ..max_depth=30, max_features=auto, n_estimators=600; total time=  21.6s


  warn(


[CV] END ..max_depth=30, max_features=auto, n_estimators=600; total time=  21.2s


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=1800; total time= 1.0min


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=1800; total time= 1.0min


  warn(


[CV] END .max_depth=20, max_features=auto, n_estimators=1800; total time= 1.1min


  warn(


[CV] END ..max_depth=30, max_features=auto, n_estimators=600; total time=  26.5s


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=800; total time=  39.4s


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=800; total time=  40.3s


  warn(


[CV] END max_depth=None, max_features=auto, n_estimators=800; total time=  37.9s


  warn(


[CV] END ..max_depth=90, max_features=auto, n_estimators=400; total time=  15.3s


  warn(


[CV] END ..max_depth=90, max_features=auto, n_estimators=400; total time=  15.0s


  warn(


[CV] END .max_depth=80, max_features=auto, n_estimators=1800; total time= 1.2min
[CV] END ..max_depth=90, max_features=auto, n_estimators=400; total time=  15.1s
[CV] END ..max_depth=30, max_features=sqrt, n_estimators=400; total time=   6.7s
[CV] END .max_depth=80, max_features=auto, n_estimators=1800; total time= 1.1min
[CV] END ..max_depth=30, max_features=sqrt, n_estimators=400; total time=   6.9s
[CV] END ..max_depth=30, max_features=sqrt, n_estimators=400; total time=   6.9s
[CV] END .max_depth=80, max_features=auto, n_estimators=1800; total time= 1.1min
[CV] END .max_depth=100, max_features=sqrt, n_estimators=200; total time=   3.4s
[CV] END .max_depth=100, max_features=sqrt, n_estimators=200; total time=   3.4s
[CV] END .max_depth=100, max_features=sqrt, n_estimators=200; total time=   3.4s
[CV] END .max_depth=70, max_features=sqrt, n_estimators=1600; total time=  27.3s
[CV] END .max_depth=70, max_features=sqrt, n_estimators=1600; total time=  27.0s
[CV] END .max_depth=70, max_

  warn(


[CV] END ..max_depth=30, max_features=auto, n_estimators=400; total time=  14.9s


  warn(


[CV] END .max_depth=40, max_features=sqrt, n_estimators=1400; total time=  23.4s


  warn(


[CV] END .max_depth=40, max_features=sqrt, n_estimators=1400; total time=  23.7s


  warn(


[CV] END .max_depth=40, max_features=sqrt, n_estimators=1400; total time=  23.6s


  warn(


[CV] END ..max_depth=30, max_features=auto, n_estimators=400; total time=  15.2s


  warn(


[CV] END ..max_depth=30, max_features=auto, n_estimators=400; total time=  15.2s


  warn(


[CV] END ..max_depth=90, max_features=auto, n_estimators=600; total time=  22.8s


  warn(


[CV] END ..max_depth=90, max_features=auto, n_estimators=600; total time=  22.5s


  warn(


[CV] END ..max_depth=90, max_features=auto, n_estimators=600; total time=  22.9s
[CV] END .max_depth=80, max_features=auto, n_estimators=1400; total time=  46.9s
[CV] END .max_depth=80, max_features=auto, n_estimators=1400; total time=  43.4s
[CV] END .max_depth=80, max_features=auto, n_estimators=1400; total time=  41.8s


In [15]:
best_params = rf_random.best_params_
print("Best parameters: ", best_params)

Best parameters:  {'n_estimators': 400, 'max_features': 'sqrt', 'max_depth': 60}


In [16]:
rf_best = RandomForestRegressor(n_estimators=best_params['n_estimators'], max_depth=best_params['max_depth'], max_features=best_params['max_features'], random_state=42)
rf_best.fit(X_train_reduced, y_train)

In [17]:
# Reduce test set
X_test_reduced = X_test[top_features]

# Predict on test set
y_pred_rf = rf_best.predict(X_test_reduced)

In [18]:
#SHAP and interpret
importances = rf_best.feature_importances_
feature_importance_df = pd.DataFrame({"Feature": top_features, "Importance": importances})
feature_importance_df = feature_importance_df.sort_values(by="Importance", ascending=False)
print(feature_importance_df)

                  Feature  Importance
5              listing_id    0.393552
8  host_neighbourhood_cat    0.244225
9          minimum_nights    0.192621
6            accommodates    0.052072
0     review_scores_value    0.035999
2       number_of_reviews    0.026642
4                bedrooms    0.017599
7                bathroom    0.016849
3                    beds    0.016261
1            average_temp    0.004182


In [20]:
rf_MSE = mean_squared_error(y_test, y_pred_rf)
rf_RMSE = mean_squared_error(y_test, y_pred_rf, squared = False)
rf_R2 = r2_score(y_test, y_pred_rf)
rf_MAPE = mean_absolute_percentage_error(y_test, y_pred_rf)


n = len(y_test)
k = len(X_test_reduced.columns)
rf_Adjusted_R2 = 1-(((1-rf_R2)*(n-1))/(n-k-1))

In [22]:
RF_Results = [['MSE', 'RMSE', 'R2', 'Adjusted_R2', 'MAPE'], 
         [rf_MSE, rf_RMSE,rf_R2, rf_Adjusted_R2, rf_MAPE]]

print(tabulate(RF_Results))

-----------------  -----------------  ------------------  ------------------  -------------------
MSE                RMSE               R2                  Adjusted_R2         MAPE
77696.99503085097  278.7418071098252  0.9559075836148075  0.9559071293535326  0.16821962791261985
-----------------  -----------------  ------------------  ------------------  -------------------
