In [1]:
import pandas as pd
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

data = pd.read_csv('auto-mpg.csv') 

y = data[['mpg']]
X = data.drop(['mpg', 'car name', 'origin'], axis=1)

# Perform test train split
X_train , X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=12)

In [3]:
#transform the data
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
# Build a ridge, lasso and regular linear regression model  
# Note that in scikit-learn, the regularization parameter is denoted by alpha (and not lambda)
ridge = Ridge(alpha=0.5)
ridge.fit(x_train_scaled, y_train)

lasso = Lasso(alpha=0.5)
lasso.fit(x_train_scaled, y_train)

lin = LinearRegression()
lin.fit(x_train_scaled, y_train)

LinearRegression()

In [5]:
# Generate predictors for both training and test sets

y_pred_ridge_train = ridge.predict(x_train_scaled)
y_pred_ridge_test = ridge.predict(X_test_scaled)

y_pred_lasso_train = lasso.predict(x_train_scaled)
y_pred_lasso_test = lasso.predict(X_test_scaled)

y_pred_lin_train = lin.predict(x_train_scaled)
y_pred_lin_test = lin.predict(X_test_scaled)

In [9]:
# MSE for train and test sets for each of the three models

print("Mean Squared Error Ridge Train", mean_squared_error(y_train, y_pred_ridge_train))
print("Mean Squared Error Ridge Test", mean_squared_error(y_test, y_pred_ridge_test))
print("\n")

print("Mean Squared Error Lasso Train", mean_squared_error(y_train, y_pred_lasso_train))
print("Mean Squared Error Lasso Test", mean_squared_error(y_test, y_pred_lasso_test))
print("\n")

print("Mean Squared Error Linear Train", mean_squared_error(y_train, y_pred_lin_train))
print("Mean Squared Error Linear Test", mean_squared_error(y_test, y_pred_lin_test))

Mean Squared Error Ridge Train 9.79807951552983
Mean Squared Error Ridge Test 17.523692433834455


Mean Squared Error Lasso Train 16.24445079708179
Mean Squared Error Lasso Test 30.03463631503097


Mean Squared Error Linear Train 9.700888480581275
Mean Squared Error Linear Test 16.748025313964717


In [None]:
# Let's see how including ridge and lasso changed our parameter estimates.

