In [1]:
import pandas as pd
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

data = pd.read_csv('auto-mpg.csv') 

y = data[['mpg']]
X = data.drop(['mpg', 'car name', 'origin'], axis=1)

# Perform test train split
X_train , X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=12)

In [3]:
#transform the data
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
# Build a ridge, lasso and regular linear regression model  
# Note that in scikit-learn, the regularization parameter is denoted by alpha (and not lambda)
ridge = Ridge(alpha=0.5)
ridge.fit(x_train_scaled, y_train)

lasso = Lasso(alpha=0.5)
lasso.fit(x_train_scaled, y_train)

lin = LinearRegression()
lin.fit(x_train_scaled, y_train)

LinearRegression()

In [5]:
# Generate predictors for both training and test sets

y_pred_ridge_train = ridge.predict(x_train_scaled)
y_pred_ridge_test = ridge.predict(X_test_scaled)

y_pred_lasso_train = lasso.predict(x_train_scaled)
y_pred_lasso_test = lasso.predict(X_test_scaled)

y_pred_lin_train = lin.predict(x_train_scaled)
y_pred_lin_test = lin.predict(X_test_scaled)

In [9]:
# MSE for train and test sets for each of the three models

print("Mean Squared Error Ridge Train", mean_squared_error(y_train, y_pred_ridge_train))
print("Mean Squared Error Ridge Test", mean_squared_error(y_test, y_pred_ridge_test))
print("\n")

print("Mean Squared Error Lasso Train", mean_squared_error(y_train, y_pred_lasso_train))
print("Mean Squared Error Lasso Test", mean_squared_error(y_test, y_pred_lasso_test))
print("\n")

print("Mean Squared Error Linear Train", mean_squared_error(y_train, y_pred_lin_train))
print("Mean Squared Error Linear Test", mean_squared_error(y_test, y_pred_lin_test))

Mean Squared Error Ridge Train 9.79807951552983
Mean Squared Error Ridge Test 17.523692433834455


Mean Squared Error Lasso Train 16.24445079708179
Mean Squared Error Lasso Test 30.03463631503097


Mean Squared Error Linear Train 9.700888480581275
Mean Squared Error Linear Test 16.748025313964717


In [10]:
# Let's see how including ridge and lasso changed our parameter estimates.
print("Ridge Parameter Coefficients", ridge.coef_)
print("Lasso Parameter Coefficients", lasso.coef_)
print("Linear Model Parameter Coefficients", lin.coef_)

Ridge Parameter Coefficients [[ -2.06904445  -2.88593443  -1.81801505 -15.23785349  -1.45594148
    8.1440177 ]]
Lasso Parameter Coefficients [-9.09743525 -0.         -0.         -4.02703963  0.          3.92348219]
Linear Model Parameter Coefficients [[ -1.33790698  -1.05300843  -0.08661412 -19.26724989  -0.37043697
    8.56051229]]


Regularized Polynomial Regression vs. Polynomial Regression

In [12]:
# Data preparation
poly = PolynomialFeatures(degree=6)
X_train_poly =  poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Standardze the data
X_train_transformed = scaler.fit_transform(X_train_poly)
X_test_transformed = scaler.transform(X_test_poly)

# Fit Models
ridge.fit(X_train_transformed, y_train)
lasso.fit(X_train_transformed, y_train)
lin.fit(X_train_transformed, y_train)

# Generate predictions
y_predict_ridge_train = ridge.predict(X_train_transformed)
y_pred_ridge_test = ridge.predict(X_test_transformed)

y_pred_lasso_train = lasso.predict(X_train_transformed)
y_pred_lasso_test = lasso.predict(X_test_transformed)

y_pred_lin_train = lin.predict(X_train_transformed)
y_pred_lin_test = lin.predict(X_test_transformed)


# Display Results
print('Train Error Polynomial Ridge Model', mean_squared_error(y_train, y_pred_ridge_train))
print('Test Error Polynomial Ridge Model', mean_squared_error(y_test, y_pred_ridge_test))
print('\n')
print('Train Error Polynomial Lasso Model', mean_squared_error(y_train, y_pred_lasso_train))
print('Test Error Polynomial Lasso Model', mean_squared_error(y_test, y_pred_lasso_test))
print('\n')
print('Train Error Unpenalized Polynomial Model', mean_squared_error(y_train, y_pred_lin_train))
print('Test Error Unpenalized Polynomial Model', mean_squared_error(y_test, y_pred_lin_test))

Train Error Polynomial Ridge Model 9.79807951552983
Test Error Polynomial Ridge Model 10.705099905649627


Train Error Polynomial Lasso Model 16.42963282609318
Test Error Polynomial Lasso Model 30.384937999587358


Train Error Unpenalized Polynomial Model 2.6722252011967364e-18
Test Error Unpenalized Polynomial Model 184300.8258927713


# Conclusion

- The model that performed best is the combination of ridge and polynomial. It is able to balance between bias and variance. 
- The unpenalized model is overfitting ie the variance is very high