In [36]:
from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler, scale
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor ,GradientBoostingRegressor
from lightgbm import LGBMRegressor
from sklearn import decomposition, ensemble
import tensorflow 
import xgboost
import pandas as pd
import numpy as np
from xgboost import XGBRegressor

In [2]:
df = pd.read_csv("insurance.csv")
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


Transforming Categorical Variables

In [3]:
encoder = LabelEncoder()
df["sex"] = encoder.fit_transform(df["sex"])#Famele--> 0   Male--> 1
df["smoker"] = encoder.fit_transform(df["smoker"])# Yes--> 1   No--> 0
df["region"] = encoder.fit_transform(df["region"])# Northeast--> 0   Nortwest--> 1  Southeast--> 2   Southwest--> 3


In [4]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
age,1338.0,39.207025,14.04996,18.0,27.0,39.0,51.0,64.0
sex,1338.0,0.505232,0.50016,0.0,0.0,1.0,1.0,1.0
bmi,1338.0,30.663397,6.098187,15.96,26.29625,30.4,34.69375,53.13
children,1338.0,1.094918,1.205493,0.0,0.0,1.0,2.0,5.0
smoker,1338.0,0.204783,0.403694,0.0,0.0,0.0,0.0,1.0
region,1338.0,1.515695,1.104885,0.0,1.0,2.0,2.0,3.0
charges,1338.0,13270.422265,12110.011237,1121.8739,4740.28715,9382.033,16639.912515,63770.42801


In [5]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(df[["age","sex","bmi","children","smoker","region"]],
                                                                    df["charges"],test_size=0.2,random_state=42)

Min-Max Scaler

In [6]:
scaler = MinMaxScaler((-1,1))
x_minmax_train = scaler.fit_transform(x_train)
x_minmax_test = scaler.fit_transform(x_test)

Standard Scaler

In [7]:
s_scaler = StandardScaler()
x_stand_train = s_scaler.fit_transform(x_train)
x_stand_test = s_scaler.fit_transform(x_test)

PCA

In [8]:
pca = PCA()
x_pcamm_train = pca.fit_transform(scale(x_minmax_train))
x_pcamm_test = pca.fit_transform(scale(x_minmax_test))

x_pcasta_train = pca.fit_transform(scale(x_stand_train))
x_pcasta_test = pca.fit_transform(scale(x_stand_test))

In [9]:
np.cumsum(np.round(pca.explained_variance_ratio_, decimals=4)*100)# We see that all variable variances can be explained.

array([ 22.35,  41.21,  58.18,  73.72,  87.77, 100.01])

Linear Regression

In [48]:
lin = linear_model.LinearRegression()
lin_model_minmax = lin.fit(x_pcamm_train,y_train)
lin_model_stan = lin.fit(x_pcasta_train,y_train)

In [51]:
accuracy = model_selection.cross_val_score(lin_model_minmax, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("Linear Regression Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = lin_model_minmax.predict(x_pcamm_test)
print("Linear Regression RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

Linear Regression Min-Max Accuracy: 0.70 (+/- 0.36)
Linear Regression Average Error Per Unit:  18537.824703378436


In [52]:
accuracy = model_selection.cross_val_score(lin_model_stan, x_pcasta_test, y_test, scoring= "r2",cv = 10)
print("Linear Regression Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = lin_model_minmax.predict(x_pcasta_test)
print("Linear Regression RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

Linear Regression Standard Accuracy: 0.70 (+/- 0.36)
Linear Regression Average Error Per Unit:  18537.824703378432


Ridge Regression

In [35]:
ridge = linear_model.Ridge(alpha=0.1)
ridge_model_minmax = ridge.fit(x_pcamm_train,y_train)
ridge_model_stan = ridge.fit(x_pcasta_train,y_train)

In [53]:
accuracy = model_selection.cross_val_score(ridge_model_minmax, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("Ridge Regression Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = ridge_model_minmax.predict(x_pcamm_test)
print("Ridge Regression RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

Ridge Regression Min-Max Accuracy: 0.70 (+/- 0.36)
Ridge Regression Average Error Per Unit:  18537.11498040787


In [37]:
accuracy = model_selection.cross_val_score(ridge_model_stan, x_pcasta_test, y_test, scoring= "r2",cv = 10)
print("Linear Regression Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))

Linear Regression Standard Accuracy: 0.70 (+/- 0.36)


In [56]:
lambdas = 10**np.linspace(10,-2,100)*0.5
ridge_cv = linear_model.RidgeCV(alphas= lambdas, scoring= "neg_mean_squared_error")
ridge_cv.fit(x_pcamm_train,y_train)
ridge_cv.alpha_

2.320794416806386

In [57]:
ridge_tuned = linear_model.Ridge(alpha= ridge_cv.alpha_)
ridge_tuned_minmax = ridge_tuned.fit(x_pcamm_train,y_train)

In [58]:
accuracy = model_selection.cross_val_score(ridge_tuned_minmax, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("Ridge Regression Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = ridge_tuned_minmax.predict(x_pcamm_test)
print("Ridge Regression RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

Ridge Regression Min-Max Accuracy: 0.71 (+/- 0.35)
Ridge Regression Average Error Per Unit:  18521.392591592008


Lasso Regression

In [59]:
las = linear_model.Lasso(alpha=0.1)
las_model_minmax = ridge.fit(x_pcamm_train,y_train)


In [60]:
accuracy = model_selection.cross_val_score(las_model_minmax, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("Lasso Regression Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = las_model_minmax.predict(x_pcamm_test)
print("Lasso Regression RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

Lasso Regression Min-Max Accuracy: 0.70 (+/- 0.36)
Lasso Regression Average Error Per Unit:  18537.11498040785


In [67]:
lambdas = 10**np.linspace(10,-2,100)*0.5
lasso_cv = linear_model.LassoCV(alphas= None, cv=10, max_iter=1000)
lasso_cv.fit(x_pcamm_train,y_train)
lasso_cv.alpha_

7.097229129571942

In [68]:
las_tuned = linear_model.Lasso(alpha= lasso_cv.alpha_)
las_tuned_minmax = las_tuned.fit(x_pcamm_train,y_train)

In [69]:
accuracy = model_selection.cross_val_score(las_tuned_minmax, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("Lasso Regression Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = las_tuned_minmax.predict(x_pcamm_test)
print("Lasso Regression RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

Lasso Regression Min-Max Accuracy: 0.70 (+/- 0.36)
Lasso Regression Average Error Per Unit:  18529.876369799207


ElasticNet

In [11]:
elas = linear_model.ElasticNet()
elas_model = elas.fit(x_pcamm_train,y_train)

In [40]:
accuracy = model_selection.cross_val_score(elas_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("ElasticNet Regression Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = elas_model.predict(x_pcamm_test)
print("ElasticNet Regression RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

ElasticNet Regression Min-Max Accuracy: 0.65 (+/- 0.20)
ElasticNet Regression RMSE:  16140.012411712582


In [28]:
lambdas = 10**np.linspace(10,-2,100)*0.5
elas_cv = linear_model.ElasticNetCV(alphas= lambdas, cv=10, max_iter=1000)
elas_cv.fit(x_pcamm_train,y_train)
elas_cv.alpha_

0.008737642000038414

In [29]:
elas_tuned = linear_model.Lasso(alpha= elas_cv.alpha_)
elas_tuned_model = elas_tuned.fit(x_pcamm_train,y_train)

In [37]:
accuracy = model_selection.cross_val_score(elas_tuned_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("ElasticNet Regression Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = elas_tuned_model.predict(x_pcamm_test)
print("ElasticNet Regression RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

ElasticNet Regression Min-Max Accuracy: 0.70 (+/- 0.36)
ElasticNet Regression RMSE:  18537.814909960907


K-Nearest Neighbors

In [42]:
knn = KNeighborsRegressor()
knn_model = knn.fit(x_pcamm_train,y_train)

In [44]:
accuracy = model_selection.cross_val_score(knn_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("KNN Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = knn_model.predict(x_pcamm_test)
print("KNN RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

KNN Min-Max Accuracy: 0.74 (+/- 0.37)
KNN RMSE:  17482.580124606247


In [47]:
knn_params = {'n_neighbors': np.arange(1,30,1),
              'weights':["uniform","distance"]}
knn_cv_model = GridSearchCV(knn,knn_params,cv=10,n_jobs=-1,verbose=2)
knn_cv_model.fit(x_pcamm_train,y_train)

Fitting 10 folds for each of 58 candidates, totalling 580 fits


In [48]:
knn_cv_model.best_params_

{'n_neighbors': 15, 'weights': 'distance'}

In [49]:
knn_tuned = KNeighborsRegressor(n_neighbors=15,weights= "distance")
knn_tuned_model = knn_tuned.fit(x_pcamm_train,y_train)

In [50]:
accuracy = model_selection.cross_val_score(knn_tuned_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("KNN Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = knn_tuned_model.predict(x_pcamm_test)
print("KNN RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

KNN Min-Max Accuracy: 0.73 (+/- 0.28)
KNN RMSE:  16880.34032496241


Support Vector Machine

In [86]:
svr = SVR(kernel='rbf')
svr_model = svr.fit(x_pcamm_train,y_train)

In [92]:
accuracy = model_selection.cross_val_score(svr_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("SVR Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = svr_model.predict(x_pcamm_test)
print("SVR RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) ) #"R2" value is negative? (Not likely)

SVR Min-Max Accuracy: -0.15 (+/- 0.21)
SVR RMSE:  12915.4002612367


In [82]:
svr_params ={"C": np.arange(1,10),
             "kernel" : ["rbf","linear","poly"],
             "gamma" :["scale","auto"],
             "tol" :[0.0001,0.001,0.01,0.1,0.00001],
             "epsilon": [0.1,0.01,0.5,0.2,0.8]}
svr_cv_model = GridSearchCV(svr,svr_params,cv=10,n_jobs=-1,verbose=2)
svr_cv_model.fit(x_pcamm_train,y_train)

Fitting 10 folds for each of 1350 candidates, totalling 13500 fits


In [83]:
svr_cv_model.best_params_

{'C': 9, 'epsilon': 0.8, 'gamma': 'scale', 'kernel': 'linear', 'tol': 0.1}

In [90]:
svr_tuned = SVR(C= 9, epsilon= 0.8, kernel= "linear",tol= 0.1)
svr_tuned_model = svr_tuned.fit(x_pcamm_train,y_train)

In [91]:
accuracy = model_selection.cross_val_score(svr_tuned_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("SVR Min-Max Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = svr_tuned_model.predict(x_pcamm_test)
print("SVR RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) ) #It's terrible.

SVR Min-Max Accuracy: 0.04 (+/- 0.19)
SVR RMSE:  15018.801095483293


Artificial Neural Networks

In [93]:
mlp = MLPRegressor()
mlp_model = mlp.fit(x_pcasta_train,y_train)



In [95]:
accuracy = model_selection.cross_val_score(mlp_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("MLP Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = mlp_model.predict(x_pcamm_test)
print("MLP RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) ) #Since the "R2" value is negative, we will not use MLP.



MLP Standard Accuracy: -1.20 (+/- 0.58)
MLP RMSE:  17757.63911934687




Classification and Regression Trees (CART)

In [96]:
cart = DecisionTreeRegressor()
cart_model = cart.fit(x_pcamm_train,y_train)

In [97]:
accuracy = model_selection.cross_val_score(cart_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("CART Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = cart_model.predict(x_pcamm_test)
print("CART  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

CART Standard Accuracy: 0.59 (+/- 0.40)
CART  RMSE:  19381.949898655545


In [98]:
cart_params = {"max_depth": list(range(1,20)),
               "min_samples_split":list(range(2,10)),
               "min_samples_leaf": list(range(1,10)),
               "max_leaf_nodes": list(range(2,20)),}
cart_cv_model = GridSearchCV(cart,cart_params,cv=5,n_jobs= -1, verbose= 2)
cart_cv_model.fit(x_pcamm_train,y_train)

Fitting 5 folds for each of 24624 candidates, totalling 123120 fits


In [99]:
cart_cv_model.best_params_

{'max_depth': 5,
 'max_leaf_nodes': 19,
 'min_samples_leaf': 6,
 'min_samples_split': 2}

In [102]:
cart_tuned = DecisionTreeRegressor(max_depth=5, max_leaf_nodes=19, min_samples_leaf=6, min_samples_split=2)
cart_tuned_model = cart_tuned.fit(x_pcamm_train,y_train)

In [103]:
accuracy = model_selection.cross_val_score(cart_tuned_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("CART Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = cart_tuned_model.predict(x_pcamm_test)
print("CART  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

CART Standard Accuracy: 0.63 (+/- 0.40)
CART  RMSE:  17104.50104545045


Bagging

In [10]:
bag = BaggingRegressor()
bag_model = bag.fit(x_pcamm_train,y_train)

In [11]:
accuracy = model_selection.cross_val_score(bag_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("Bagging Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = bag_model.predict(x_pcamm_test)
print("Bagging  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

Bagging Standard Accuracy: 0.70 (+/- 0.29)
Bagging  RMSE:  16725.393644876174


In [17]:
bag_params = {"n_estimators": [1000,1500,2000]}
bag_cv_model = GridSearchCV(bag,bag_params,n_jobs=-1,verbose=2,cv=10)
bag_cv_model.fit(x_pcamm_train,y_train)

Fitting 10 folds for each of 3 candidates, totalling 30 fits


In [18]:
bag_cv_model.best_params_

{'n_estimators': 1500}

In [21]:
bag_tuned = BaggingRegressor(n_estimators=1000)
bag_tuned_model = bag_tuned.fit(x_pcamm_train,y_train)

In [22]:
accuracy = model_selection.cross_val_score(bag_tuned_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("Bagging Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = bag_tuned_model.predict(x_pcamm_test)
print("Bagging  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

Bagging Standard Accuracy: 0.72 (+/- 0.29)
Bagging  RMSE:  17225.29739508329


Random Forest

In [23]:
rf = RandomForestRegressor()
rf_model = rf.fit(x_pcamm_train,y_train)

In [24]:
accuracy = model_selection.cross_val_score(rf_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("RandomForest Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = rf_model.predict(x_pcamm_test)
print("RandomForest  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

RandomForest Standard Accuracy: 0.72 (+/- 0.29)
RandomForest  RMSE:  17409.546616364554


In [25]:
rf_params = {"n_estimators": [100,200,500,1000],
             "max_features": ["sqrt","log2",1.0,]}
rf_cv_model = GridSearchCV(rf,rf_params,cv=10,n_jobs=-1,verbose=2)
rf_cv_model.fit(x_pcamm_train,y_train)

Fitting 10 folds for each of 12 candidates, totalling 120 fits


In [26]:
rf_cv_model.best_params_

{'max_features': 1.0, 'n_estimators': 1000}

In [27]:
rf_tuned = RandomForestRegressor(n_estimators=1000)
rf_tuned_model = rf_tuned.fit(x_pcamm_train,y_train)

In [28]:
accuracy = model_selection.cross_val_score(rf_tuned_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("RandomForest Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = rf_tuned_model.predict(x_pcamm_test)
print("RandomForest  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

RandomForest Standard Accuracy: 0.72 (+/- 0.30)
RandomForest  RMSE:  17211.748176782865


Gradient Boosting Machines (GBM)

In [29]:
gbm = GradientBoostingRegressor()
gbm_model = gbm.fit(x_pcamm_train,y_train)

In [30]:
accuracy = model_selection.cross_val_score(gbm_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("GBM Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = gbm_model.predict(x_pcamm_test)
print("GBM  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

GBM Standard Accuracy: 0.70 (+/- 0.28)
GBM  RMSE:  16867.52702923112


In [32]:
gbm_params = {"learning_rate": [0.1,0.01,0.2,0.5,0.8,0.9,1],
              "n_estimators": [100,200,500,1000],
              "subsample": [1.0,0.5,0.4,0.2,0.1,0.01]}
gbm_cv_model = GridSearchCV(gbm,gbm_params,cv=5,n_jobs=-1,verbose=2)
gbm_cv_model.fit(x_pcamm_train,y_train)

Fitting 5 folds for each of 168 candidates, totalling 840 fits


In [33]:
gbm_cv_model.best_params_

{'learning_rate': 0.01, 'n_estimators': 1000, 'subsample': 0.5}

In [34]:
gbm_tuned = GradientBoostingRegressor(learning_rate=0.01,n_estimators=1000,subsample=0.5)
gbm_tuned_model = gbm_tuned.fit(x_pcamm_train,y_train)

In [35]:
accuracy = model_selection.cross_val_score(gbm_tuned_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("GBM Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = gbm_tuned_model.predict(x_pcamm_test)
print("GBM  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

GBM Standard Accuracy: 0.72 (+/- 0.29)
GBM  RMSE:  16524.76862657099


XGBoost

In [38]:
xgb = XGBRegressor()
xgb_model = xgb.fit(x_pcamm_train,y_train)

In [40]:
accuracy = model_selection.cross_val_score(xgb_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("XGB Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = xgb_model.predict(x_pcamm_test)
print("XGB  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

XGB Standard Accuracy: 0.67 (+/- 0.37)
XGB  RMSE:  18117.14486421366


In [41]:
xgb_params = {"colsample_bytree":[0.4,0.5,0.6,0.9,1],
              "n_estimators" : [100,200,500,1000],
              "learning_rate": [0.1,0.001,0.5,0.8,1]}
xgb_cv_model= GridSearchCV(xgb,xgb_params,cv=5,n_jobs=-1,verbose=2)
xgb_cv_model.fit(x_pcamm_train,y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


In [42]:
xgb_cv_model.best_params_

{'colsample_bytree': 0.9, 'learning_rate': 0.1, 'n_estimators': 100}

In [43]:
xgb_tuned = XGBRegressor(colsample_bytree=0.9,learning_rate=0.1,n_estimators=100)
xgb_tuned_model = xgb_tuned.fit(x_pcamm_train,y_train)

In [44]:
accuracy = model_selection.cross_val_score(xgb_tuned_model, x_pcamm_test, y_test, scoring= "r2",cv = 10)
print("XGB Standard Accuracy: %0.2f (+/- %0.2f)" % (accuracy.mean(), accuracy.std() * 2))
y_pred = xgb_tuned_model.predict(x_pcamm_test)
print("XGB  RMSE: ", np.sqrt(mean_squared_error(y_test,y_pred)) )

XGB Standard Accuracy: 0.71 (+/- 0.25)
XGB  RMSE:  16538.269212432697
