In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import os
import seaborn as sns

In [None]:
path = os.getcwd()
airbnb = pd.read_csv(os.path.join(path,'airbnb.csv'))

In [None]:
(100*airbnb.isnull().sum()/airbnb.shape[0]).round(4)

In [None]:
airbnb = pd.get_dummies(airbnb, columns=["room_type"], prefix=["room_type_is"])
airbnb = pd.get_dummies(airbnb, columns=["property_type"], prefix=["property_type_is"])
airbnb = pd.get_dummies(airbnb, columns=["bed_type"], prefix=["bed_type_is"])
airbnb = pd.get_dummies(airbnb, columns=["host_response_time"], prefix=["host_response_time_is"])
airbnb = pd.get_dummies(airbnb, columns=["host_since_year"], prefix=["host_since_year_is"])
airbnb = pd.get_dummies(airbnb, columns=["city"], prefix=["city_is"])

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
airbnb['neighbourhood_cleansed_le'] = le.fit_transform(airbnb['neighbourhood_cleansed'])
# airbnb['city_le'] = le.fit_transform(airbnb['city'])
airbnb['state_le'] = le.fit_transform(airbnb['state'])
airbnb['country_le'] = le.fit_transform(airbnb['country'])

In [None]:
airbnb[['Year', 'Month', 'Day']] = airbnb.host_since_anniversary.str.split('/', expand=True)

In [None]:
(100*airbnb.isnull().sum()/airbnb.shape[0]).round(4)

In [None]:
X = airbnb[['zipcode', 'accommodates',
       'bathrooms', 'bedrooms', 'beds',  'guests_included',
       'extra_people', 'minimum_nights', 'host_response_rate',
       'number_of_reviews', 'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value', 
       'room_type_is_Entire home/apt', 'room_type_is_Private room',
       'room_type_is_Shared room', 'property_type_is_Apartment',
       'property_type_is_Bed & Breakfast', 'property_type_is_Boat',
       'property_type_is_Cabin', 'property_type_is_Camper/RV',
       'property_type_is_Chalet', 'property_type_is_Dorm',
       'property_type_is_Earth House', 'property_type_is_House',
       'property_type_is_Hut', 'property_type_is_Loft',
       'property_type_is_Other', 'property_type_is_Treehouse',
       'property_type_is_Villa', 'property_type_is_Yurt', 'bed_type_is_Airbed',
       'bed_type_is_Couch', 'bed_type_is_Futon', 'bed_type_is_Pull-out Sofa',
       'bed_type_is_Real Bed', 'host_response_time_is_a few days or more',
       'host_response_time_is_within a day',
       'host_response_time_is_within a few hours',
       'host_response_time_is_within an hour', 'host_since_year_is_2008',
       'host_since_year_is_2009', 'host_since_year_is_2010',
       'host_since_year_is_2011', 'host_since_year_is_2012',
       'host_since_year_is_2013', 'host_since_year_is_2014',
       'host_since_year_is_2015', 'city_is_Amsterdam', 'city_is_Badhoevedorp',
       'city_is_Diemen', 'city_is_Duivendrecht', 'city_is_Halfweg',
       'city_is_Landsmeer', 'neighbourhood_cleansed_le']]
y = airbnb['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.95, test_size = 0.05, random_state=0)
y_train = np.log(y_train)

In [None]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn import neighbors
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error 
from math import sqrt
from numpy import arange
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [None]:
params = {'n_neighbors':np.arange(1,21,2)}
knn = neighbors.KNeighborsRegressor()
knn_model = GridSearchCV(knn, params, cv=5)
knn_model.fit(X_train_scaled, y_train)
knn_model.best_params_

In [None]:
from numpy import arange
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
y_pred = knn_model.predict(X_test_scaled)
y_pred_f = np.exp(y_pred)

In [None]:
from sklearn.linear_model import LinearRegression
MLR = LinearRegression()
MLR.fit(X_train_scaled, y_train)

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
y_pred = MLR.predict(X_test_scaled)
y_pred_f = np.exp(y_pred)
np.sqrt(mean_squared_error(y_pred_f, y_test))
r2_score(y_test, y_pred_f)

In [None]:
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV

In [None]:
from numpy import arange
alphas=list(np.logspace(-100,1, 200, base=2))
Lasso_model = LassoCV(alphas=alphas, cv=5, random_state=0, max_iter = 2000)
Lasso_model.fit(X_train_scaled, y_train)

In [None]:
Lasso_model.alpha_

In [None]:
feature = ['zipcode', 'accommodates',
       'bathrooms', 'bedrooms', 'beds',  'guests_included',
       'extra_people', 'minimum_nights', 'host_response_rate',
       'number_of_reviews', 'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value', 
       'room_type_is_Entire home/apt', 'room_type_is_Private room',
       'room_type_is_Shared room', 'property_type_is_Apartment',
       'property_type_is_Bed & Breakfast', 'property_type_is_Boat',
       'property_type_is_Cabin', 'property_type_is_Camper/RV',
       'property_type_is_Chalet', 'property_type_is_Dorm',
       'property_type_is_Earth House', 'property_type_is_House',
       'property_type_is_Hut', 'property_type_is_Loft',
       'property_type_is_Other', 'property_type_is_Treehouse',
       'property_type_is_Villa', 'property_type_is_Yurt', 'bed_type_is_Airbed',
       'bed_type_is_Couch', 'bed_type_is_Futon', 'bed_type_is_Pull-out Sofa',
       'bed_type_is_Real Bed', 'host_response_time_is_a few days or more',
       'host_response_time_is_within a day',
       'host_response_time_is_within a few hours',
       'host_response_time_is_within an hour', 'host_since_year_is_2008',
       'host_since_year_is_2009', 'host_since_year_is_2010',
       'host_since_year_is_2011', 'host_since_year_is_2012',
       'host_since_year_is_2013', 'host_since_year_is_2014',
       'host_since_year_is_2015', 'city_is_Amsterdam', 'city_is_Badhoevedorp',
       'city_is_Diemen', 'city_is_Duivendrecht', 'city_is_Halfweg',
       'city_is_Landsmeer', 'neighbourhood_cleansed_le']

In [None]:
lst1_X = []
lst1_imp = []
for i in range(len(Lasso_model.coef_)):
    if Lasso_model.coef_[i] >= 0.05 or Lasso_model.coef_[i] <= -0.05:
        lst1_imp.append(Lasso_model.coef_[i])
        lst1_X.append(feature[i])

In [None]:
lst2_X = []
lst2_imp = []
for i in range(len(Lasso_model.coef_)):
    if -0.005 < Lasso_model.coef_[i] < 0.005:
        lst2_imp.append(Lasso_model.coef_[i])
        lst2_X.append(feature[i])
        

In [None]:
lst3_X = []
lst3_imp = []
for i in range(len(Lasso_model.coef_)):
    if -0.05 < Lasso_model.coef_[i] <= -0.005 or  0.005 <= Lasso_model.coef_[i] < 0.05:
        lst3_imp.append(Lasso_model.coef_[i])
        lst3_X.append(feature[i])

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(50,50))
plt.barh([i for i in lst1_X], [j for j in lst1_imp], color='blue')
plt.xlabel("importance",fontsize=100)
plt.ylabel("features",fontsize=100)
plt.xticks(fontsize=100)  
plt.yticks(fontsize=100,rotation = 45, horizontalalignment = 'right') 
plt.title("Lasso Regression Important Features",fontsize=100)
plt.savefig('Lasso Important',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,150))
plt.barh([i for i in lst2_X], [j for j in lst2_imp],color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right') 
plt.title("Lasso Regression Not Important Features",fontsize=150)
plt.savefig('LASSO Not',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,150))
plt.barh([i for i in lst3_X], [j for j in lst3_imp], color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right') 
plt.title("Lasso Regression Other Features",fontsize=150)
plt.savefig('LASSO Other',bbox_inches = 'tight')
plt.show()

In [None]:
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV

In [None]:
alpha = np.logspace(-10,20,500,base = 2)
Ridge_model = RidgeCV(alphas=alpha, cv=5).fit(X_train_scaled, y_train)

In [None]:
Ridge_model.alpha_

In [None]:
lst4_X = []
lst4_imp = []
for i in range(len(Ridge_model.coef_)):
    if Ridge_model.coef_[i] >= 0.05 or Ridge_model.coef_[i] <= -0.05:
        lst4_imp.append(Ridge_model.coef_[i])
        lst4_X.append(feature[i])

lst5_X = []
lst5_imp = []
for i in range(len(Ridge_model.coef_)):
    if -0.005 < Ridge_model.coef_[i] < 0.005:
        lst5_imp.append(Ridge_model.coef_[i])
        lst5_X.append(feature[i])
        
lst6_X = []
lst6_imp = []
for i in range(len(Ridge_model.coef_)):
    if -0.05 < Ridge_model.coef_[i] <= -0.005 or  0.005 <= Ridge_model.coef_[i] < 0.05:
        lst6_imp.append(Ridge_model.coef_[i])
        lst6_X.append(feature[i])

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(75,50))
plt.barh([i for i in lst4_X], [j for j in lst4_imp], color='blue')
plt.xlabel("importance",fontsize=100)
plt.ylabel("features",fontsize=100)
plt.xticks(fontsize=100)  
plt.yticks(fontsize=100,rotation = 45, horizontalalignment = 'right') 
plt.title("Ridge Regression Important Features",fontsize=100)
plt.savefig('Ridge Important',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,150))
plt.barh([i for i in lst5_X], [j for j in lst5_imp], color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right') 
plt.title("Ridge Regression Not Important Features",fontsize=150)
plt.savefig('Ridge Not',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,180))
plt.barh([i for i in lst6_X], [j for j in lst6_imp], color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right')
plt.title("Ridge Regression Other Features",fontsize=150)
plt.savefig('Ridge Other',bbox_inches = 'tight')
plt.show()

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV

In [None]:
rf = RandomForestRegressor(random_state = 42)
random_grid = {'max_depth': [2, 5, 10,  20, 30, 40, 50],
 'n_estimators': [50, 100, 300, 500, 750, 1000, 1200, 2000],
 "min_samples_split": [2, 5, 7, 10, 12, 15]}
#RFRegModel = GridSearchCV(RandomForestRegressor(random_state=0), parameters,scoring='neg_mean_absolute_error',refit=False,cv=2, n_jobs=-1)
RFRegModel = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)
RFRegModel.fit(X_train_scaled, y_train)
RFRegModel.best_params_

In [None]:
RFRegModel = RandomForestRegressor( n_estimators= 1000, min_samples_split = 7, max_depth = 20)
RFRegModel.fit(X_train_scaled, y_train)

In [None]:
from numpy import arange
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
y_pred = RFRegModel.predict(X_test_scaled)
y_pred_f = np.exp(y_pred)

In [None]:
importances = RFRegModel.feature_importances_

In [None]:
lst7_X = []
lst7_imp = []
for i in range(len(RFRegModel.feature_importances_)):
    if RFRegModel.feature_importances_[i] >= 0.05 or RFRegModel.feature_importances_[i] <= -0.05:
        lst7_imp.append(RFRegModel.feature_importances_[i])
        lst7_X.append(feature[i])

lst8_X = []
lst8_imp = []
for i in range(len(RFRegModel.feature_importances_)):
    if -0.002 < RFRegModel.feature_importances_[i] < 0.002:
        lst8_imp.append(RFRegModel.feature_importances_[i])
        lst8_X.append(feature[i])
        
lst9_X = []
lst9_imp = []
for i in range(len(RFRegModel.feature_importances_)):
    if -0.05 <RFRegModel.feature_importances_[i] <= -0.002 or  0.002 <= RFRegModel.feature_importances_[i] < 0.05:
        lst9_imp.append(RFRegModel.feature_importances_[i])
        lst9_X.append(feature[i])

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(50,50))
plt.barh([i for i in lst7_X], [j for j in lst7_imp], color='blue')
plt.xlabel("importance",fontsize=100)
plt.ylabel("features",fontsize=100)
plt.xticks(fontsize=100)  
plt.yticks(fontsize=100,rotation = 45, horizontalalignment = 'right') 
plt.title("Random Forest Important Features",fontsize=100)
plt.savefig('Random Forest Important',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,150))
plt.barh([i for i in lst8_X], [j for j in lst8_imp], color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right')
plt.title("Radndom Forset Not Important Features",fontsize=150)
plt.savefig('Radndom Forset Not',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,150))
plt.barh([i for i in lst9_X], [j for j in lst9_imp], color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right')
plt.title("Random Forest Other Features",fontsize=150)
plt.savefig('Random Forest Other',bbox_inches = 'tight')
plt.show()

In [None]:
from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

In [None]:
gb= GradientBoostingRegressor(random_state = 42)
random_grid = {"learning_rate": [0.0001, 0.001, 0.01, 0.1, 0.15, 0.2, 0.25, 0.3],
 'max_depth': [2, 5, 10, 11, 12, 13, 15, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
 'max_features': ['auto', 'sqrt', None],
 'min_samples_leaf': [1, 2, 4, 6],
 "min_samples_split": [2, 5, 10, 12, 15, 17, 20, 22],
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}
reg = RandomizedSearchCV(estimator = gb, param_distributions = random_grid, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)

reg.fit(X_train_scaled, y_train)
print(reg.best_score_)
print(reg.best_params_)

In [None]:
gb.fit(X_train_scaled, y_train)

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
y_pred =GBoosting .predict(X_test_scaled)
y_pred_f = np.exp(y_pred)
np.sqrt(mean_squared_error(y_pred_f, y_test))

In [None]:
GBoosting = GradientBoostingRegressor(n_estimators = 1800, min_samples_split = 17, min_samples_leaf = 4, max_depth = 5, learning_rate = 0.01)
GBoosting.fit(X_train_scaled, y_train)

In [None]:
lst10_X = []
lst10_imp = []
for i in range(len(GBoosting.feature_importances_)):
    if GBoosting.feature_importances_[i] >= 0.1:
        lst10_imp.append(GBoosting.feature_importances_[i])
        lst10_X.append(feature[i])

lst11_X = []
lst11_imp = []
for i in range(len(GBoosting.feature_importances_)):
    if GBoosting.feature_importances_[i] < 0.002:
        lst11_imp.append(GBoosting.feature_importances_[i])
        lst11_X.append(feature[i])
        
lst12_X = []
lst12_imp = []
for i in range(len(GBoosting.feature_importances_)):
    if 0.002 <= GBoosting.feature_importances_[i] <= 0.1:
        lst12_imp.append(GBoosting.feature_importances_[i])
        lst12_X.append(feature[i])

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(50,50))
plt.barh([i for i in lst10_X], [j for j in lst10_imp], color='blue')
plt.xlabel("importance",fontsize=100)
plt.ylabel("features",fontsize=100)
plt.xticks(fontsize=100)  
plt.yticks(fontsize=100,rotation = 45, horizontalalignment = 'right')
plt.title("Gradient Boosting Important Features",fontsize=100)
plt.savefig('Gradient Boosting Important',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,180))
plt.barh([i for i in lst11_X], [j for j in lst11_imp], color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right')
plt.title("Gradient Boosting Not Important Features",fontsize=150)
plt.savefig('Gradient Boosting Not',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,180))
plt.barh([i for i in lst12_X], [j for j in lst12_imp], color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right')
plt.title("Gradient Boosting  Other Features",fontsize=150)
plt.savefig('Gradient Boosting Other',bbox_inches = 'tight')
plt.show()

In [None]:
importances = GBoosting.feature_importances_
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(12,15))
plt.barh([i for i in X], [j for j in importances], color='blue')
plt.xlabel("importance")
plt.ylabel("features")
plt.title("Gradient Boosting")
plt.savefig('Gradient Boosting')
plt.show()

In [None]:
from sklearn.model_selection import cross_val_score
from xgboost import XGBRegressor
from numpy import absolute

In [None]:
xg = XGBRegressor()
random_grid = {"learning_rate": [0.0001, 0.001, 0.005, 0.01, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16,  0.2,  0.3],
 'max_depth': [2,5, 7, 10, 11, 12, 13, 15, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
 'n_estimators': [100, 150, 175, 200, 250, 300, 400, 500, 600]}
reg1 = RandomizedSearchCV(estimator = xg, param_distributions = random_grid, n_iter = 100, cv = 5, verbose=2, random_state=42, n_jobs = -1)
reg1.fit(X_train_scaled, y_train)
print(reg1.best_score_)
print(reg1.best_params_)

In [None]:
XGBoosting = XGBRegressor(n_estimators = 176, learning_rate = 0.145, max_depth = 5, colsample_bylevel = 0.9)
XGBoosting.fit(X_train_scaled, y_train)

In [None]:
importances = XGBoosting.feature_importances_

In [None]:
lst13_X = []
lst13_imp = []
for i in range(len(XGBoosting.feature_importances_)):
    if XGBoosting.feature_importances_[i] >= 0.07:
        lst13_imp.append(XGBoosting.feature_importances_[i])
        lst13_X.append(feature[i])

lst14_X = []
lst14_imp = []
for i in range(len(XGBoosting.feature_importances_)):
    if XGBoosting.feature_importances_[i] < 0.005:
        lst14_imp.append(XGBoosting.feature_importances_[i])
        lst14_X.append(feature[i])
        
lst15_X = []
lst15_imp = []
for i in range(len(XGBoosting.feature_importances_)):
    if 0.005 <= XGBoosting.feature_importances_[i] <= 0.07:
        lst15_imp.append(XGBoosting.feature_importances_[i])
        lst15_X.append(feature[i])

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(50,30))
plt.barh([i for i in lst13_X], [j for j in lst13_imp], color='blue')
plt.xlabel("importance",fontsize=100)
plt.ylabel("features",fontsize=100)
plt.xticks(fontsize=100)  
plt.yticks(fontsize=100,rotation = 45, horizontalalignment = 'right')
plt.title("XGBoosting Important Features",fontsize=100)
plt.savefig('XGBoosting Important',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,150))
plt.barh([i for i in lst14_X], [j for j in lst14_imp], color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right')
plt.title("XGBoosting Not Important Features",fontsize=150)
plt.savefig('XGBoosting Boosting Not',bbox_inches = 'tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.figure(figsize=(100,180))
plt.barh([i for i in lst15_X], [j for j in lst15_imp], color='blue')
plt.xlabel("importance",fontsize=150)
plt.ylabel("features",fontsize=150)
plt.xticks(fontsize=150)  
plt.yticks(fontsize=150,rotation = 45, horizontalalignment = 'right')
plt.title("XGBoosting Other Features",fontsize=150)
plt.savefig('XGBoosting Other',bbox_inches = 'tight')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, precision_score, roc_auc_score

methods =  [knn_model, MLR, Lasso_model, Ridge_model, RFRegModel, GBoosting, XGBoosting]
#methods =  [MLR, Lasso_model, Ridge_model]
columns = ['rmse',  'r2']
rows = ['KNN','MLR','LASSO reg','Ridge reg', 'random forest','gradient boosting', 'XGBoosting']
#rows = ['MLR','LASSO reg','Ridge reg']
results = pd.DataFrame(0.0, columns=columns, index=rows)

for i, method in enumerate(methods):   
    y_pred = method.predict(X_test_scaled)
    y_pred_f = np.exp(y_pred)
    results.iloc[i,0] = np.sqrt(mean_squared_error(y_pred_f, y_test))
    results.iloc[i,1] = r2_score(y_test, y_pred_f)

In [None]:
results.round(4)