In [52]:
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

In [53]:
data = pd.read_csv("Numeric_df.csv", index_col=False)
data

Unnamed: 0,fuelType,rating,renterTripsTaken,reviewCount,location.city,location.latitude,location.longitude,location.state,owner.id,rate.daily,vehicle.make,vehicle.model,vehicle.type,vehicle.year
0,1,5.00,13,12,722,47.449107,-122.308841,43,12847615,135,48,288,2,2019
1,1,5.00,2,1,790,35.111060,-106.276551,31,15621242,190,48,288,2,2018
2,3,4.92,28,24,6,35.127163,-106.566681,31,10199256,35,49,314,0,2012
3,2,5.00,21,20,6,35.149726,-106.711425,31,9365496,75,13,291,0,2018
4,2,5.00,3,1,6,35.208659,-106.601008,31,3553565,47,9,380,0,2010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5334,2,5.00,2,2,493,21.435401,-158.019400,11,8836511,50,37,360,2,2014
5335,2,5.00,32,27,353,21.292950,-157.836856,11,9794111,33,8,103,0,2017
5336,3,5.00,17,16,2,21.375507,-157.914919,11,2754690,49,26,198,0,2010
5337,2,4.94,18,17,384,21.378719,-157.727816,11,11313508,35,52,484,0,2013


In [54]:
X = data.drop(columns=["rate.daily"])
y = data["rate.daily"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [55]:
def modeler(modeltype, params = None):
    model = modeltype(**(params if params else {}))
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"{modeltype}: MSE: {mse}")
    return mse

In [56]:
mse_rf = modeler(RandomForestRegressor)
mse_lr = modeler(LinearRegression)
mse_xgb = modeler(XGBRegressor)
mse_svr = modeler(SVR)
mse_ridge = modeler(Ridge)
mse_lasso = modeler(Lasso)
mse_elasticnet = modeler(ElasticNet)

<class 'sklearn.ensemble._forest.RandomForestRegressor'>: MSE: 3114.0101831460674
<class 'sklearn.linear_model._base.LinearRegression'>: MSE: 7031.099413835456
<class 'xgboost.sklearn.XGBRegressor'>: MSE: 2516.6777465603973
<class 'sklearn.svm._classes.SVR'>: MSE: 7747.683016513504
<class 'sklearn.linear_model._ridge.Ridge'>: MSE: 7030.089012148179
<class 'sklearn.linear_model._coordinate_descent.Lasso'>: MSE: 6988.718712666199
<class 'sklearn.linear_model._coordinate_descent.ElasticNet'>: MSE: 7020.2757591711415


In [57]:
rf_params = {'n_estimators': 100, 'max_depth': 10}
lr_params = {'fit_intercept': True}
xgb_params = {'n_estimators': 100, 'learning_rate': 0.1}
svr_params = {'kernel': 'rbf', 'C': 1.0}
ridge_params = {'alpha': 1.0}
lasso_params = {'alpha': 0.1}
elasticnet_params = {'alpha': 0.1, 'l1_ratio': 0.5}

mse_rf = modeler(RandomForestRegressor, rf_params)
mse_lr = modeler(LinearRegression, lr_params)
mse_xgb = modeler(XGBRegressor, xgb_params)
mse_svr = modeler(SVR, svr_params)
mse_ridge = modeler(Ridge, ridge_params)
mse_lasso = modeler(Lasso, lasso_params)
mse_elasticnet = modeler(ElasticNet, elasticnet_params)


<class 'sklearn.ensemble._forest.RandomForestRegressor'>: MSE: 4132.902113823692
<class 'sklearn.linear_model._base.LinearRegression'>: MSE: 7031.099413835456
<class 'xgboost.sklearn.XGBRegressor'>: MSE: 2579.867257132113
<class 'sklearn.svm._classes.SVR'>: MSE: 7747.683016513504
<class 'sklearn.linear_model._ridge.Ridge'>: MSE: 7030.089012148179
<class 'sklearn.linear_model._coordinate_descent.Lasso'>: MSE: 7022.101666992839
<class 'sklearn.linear_model._coordinate_descent.ElasticNet'>: MSE: 6980.092732056473
