In [1]:
import pandas as pd
import numpy as np

from numpy import mean
from numpy import std

from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
#from sklearn.svm import SVR


from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.model_selection import RepeatedKFold

from matplotlib import pyplot


In [2]:
# Basic Ordinal Encoded Dataset for Non-Linear Models
ordinal = pd.read_csv('../Data/BaseOrdinal.csv')

In [3]:
X = ordinal.drop(['SalePrice'], axis=1).values
y = ordinal.SalePrice.values

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [4]:
# list of other models to evaluate

def get_models():
    models = dict()
    models['rf no bs'] = RandomForestRegressor(n_estimators=100, criterion='squared_error', bootstrap=False) 
    models['rf w bs'] = RandomForestRegressor(n_estimators=100, criterion='squared_error', bootstrap=True) 
    models['gradient boost'] = GradientBoostingRegressor()
    models['Xtreme GB'] = XGBRegressor()
    #models['svm'] = SVR()
    return models

In [5]:
# Use cross-validation to evaluate model performance

def evaluate_model(model, X, y):
    
    # define the evaluation procedure
    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    
    # evaluate the model and collect the results
    scores = cross_validate(model, X, y, scoring=['r2', 'neg_mean_absolute_error'], 
                                  cv=cv, n_jobs=-1) 
    
    return scores

In [6]:
models = get_models()

In [7]:
#results_r2, results_neg_mse, names = list(), list(), list()

for name, model in models.items():
    
    # evaluate the model
    scores = evaluate_model(model, X, y)
    #print(scores)
    
    # store the results
    results_r2 = []
    results_neg_mse = []
    names = []
    results_r2.append(scores['test_r2'])
    results_neg_mse.append(scores['test_neg_mean_absolute_error'])
    names.append(name)
    
    # summarize the performance along the way
    print('>%s %.3f (%.3f)' % (name, mean(results_r2), mean(np.abs(results_neg_mse))))

>rf no bs 0.787 (21883.556)
>rf w bs 0.890 (14913.072)
>gradient boost 0.903 (13738.377)
>Xtreme GB 0.894 (14644.156)
