In [66]:

import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt


# Import CSV mtcars
data = pd.read_csv('https://gist.githubusercontent.com/ZeccaLehn/4e06d2575eb9589dbe8c365d61cb056c/raw/64f1660f38ef523b2a1a13be77b002b98665cdfe/mtcars.csv')
# Edit element of column header
# data.rename(columns={'Unnamed: 0':'brand'}, inplace=True)

list(data)


X = data[['cyl',
     'disp',
     'hp',
     'drat',
     'wt',
     'qsec',
     'vs',
     'am',
     'gear',
     'carb']].values
y = data['mpg'].values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=40)


# LINEAR REGRESSION
lr = LinearRegression()
lr.fit(X_train, y_train)
pred_train_lr= lr.predict(X_train)
print('Mean Square Error - Train: ' + str(np.sqrt(mean_squared_error(y_train,pred_train_lr))))
print('R^2 - Train: ' + str(r2_score(y_train, pred_train_lr)))
pred_test_lr= lr.predict(X_test)
print('Mean Square Error - Test: ' + str(np.sqrt(mean_squared_error(y_test,pred_test_lr))) )
print('R^2 - Test: ' + str(r2_score(y_test, pred_test_lr)))

# The mean squared error (MSE) of an estimator (of a procedure for estimating an unobserved quantity) measures the average of the squares of the errors—that is, the average squared difference between the estimated values and the actual value. 
# The MSE is a measure of the quality of an estimator—it is always non-negative, and values closer to zero are better. 
# The coefficient of determination, denoted R^2 or "R squared", is the proportion of the variance in the dependent variable that is predictable from the independent variable(s). 
# The R^2 value gives the measure of how much variance is explained by model.  Normally it ranges from 0 to 1, and if it is 1, it means any changes in independent variables will explain movements in the dependent variable, with 100% certainty.

Mean Square Error - Train: 1.256730769339527
R^2 - Train: 0.9576216186338592
Mean Square Error - Test: 5.317381182951843
R^2 - Test: 0.07636056837621896


In [67]:

# RIDGE REGRESSION
rr = Ridge(alpha=0.01)
rr.fit(X_train, y_train) 
pred_train_rr= rr.predict(X_train)
print('Mean Square Error - Train: ' + str(np.sqrt(mean_squared_error(y_train,pred_train_rr))))
print('R^2 - Train: ' + str(r2_score(y_train, pred_train_rr)))
pred_test_rr= rr.predict(X_test)
print('Mean Square Error - Test: ' + str(np.sqrt(mean_squared_error(y_test,pred_test_rr))) )
print('R^2 - Test: ' + str(r2_score(y_test, pred_test_rr)))


Mean Square Error - Train: 1.2568136975810997
R^2 - Train: 0.9576160255813377
Mean Square Error - Test: 5.276860048291041
R^2 - Test: 0.0903841301560453


In [68]:

# LASSO REGRESSION
model_lasso = Lasso(alpha=0.01)
model_lasso.fit(X_train, y_train) 
pred_train_lasso= model_lasso.predict(X_train)
print('Mean Square Error - Train: ' + str(np.sqrt(mean_squared_error(y_train,pred_train_lasso))))
print('R^2 - Train: ' + str(r2_score(y_train, pred_train_lasso)))
pred_test_lasso= model_lasso.predict(X_test)
print('Mean Square Error - Test: ' + str(np.sqrt(mean_squared_error(y_test,pred_test_lasso))))
print('R^2 - Test: ' + str(r2_score(y_test, pred_test_lasso)))


Mean Square Error - Train: 1.261726838462164
R^2 - Train: 0.9572840026871487
Mean Square Error - Test: 5.0753179454666695
R^2 - Test: 0.15854017700268797


In [69]:

# ELASTIC NET REGRESSION
model_enet = ElasticNet(alpha = 0.01)
model_enet.fit(X_train, y_train) 
pred_train_enet= model_enet.predict(X_train)
print('Mean Square Error - Train: ' + str(np.sqrt(mean_squared_error(y_train,pred_train_enet))))
print('R^2 - Train: ' + str(r2_score(y_train, pred_train_enet)))
pred_test_enet= model_enet.predict(X_test)
print('Mean Square Error - Test: ' + str(np.sqrt(mean_squared_error(y_test,pred_test_enet))))
print('R^2 - Test: ' + str(r2_score(y_test, pred_test_enet)))


Mean Square Error - Train: 1.269167847953475
R^2 - Train: 0.9567786834861433
Mean Square Error - Test: 4.876381651444122
R^2 - Test: 0.2232124548612836


In [None]:

# Final Conclusion: none of the models listed above can accurately predict dependent variables, based on given independent variables.  At least, not on the MTCars dataset.
    