In [1]:
# Importing libraries
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [2]:
# Loading dataset
diabetes = load_diabetes()

X = diabetes.data
y = diabetes.target

In [3]:
diabetes.data

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [4]:
# Dataset shape
X.shape

(442, 10)

In [5]:
# Splitting the dataset in train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
X_train.shape

(309, 10)

In [7]:
# Normalizing the data
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Regular linear regression
linear_model = LinearRegression()


# Training the model without penalties
linear_model.fit(X_train, y_train)


# Predictions (THIS IS ONLY AN EXAMPLE TO UNDERSTAND LASSO REGRESSION. THIS IS NOT THE PROPER WAY FOR A REAL PROJECT).
y_pred_linear = linear_model.predict(X_test)


# Calculating the error (MSE) on the predictions
mse_linear = mean_squared_error(y_test, y_pred_linear)

print(f'Mean Squared Error (Linear Regression): {mse_linear}')

Mean Squared Error (Linear Regression): 2821.7509810013107


In [9]:
# LASSO Regression WITH penalties and constant 0.1
lasso_model = Lasso(alpha=0.1, random_state=42)

# Training the model 
lasso_model.fit(X_train, y_train)

# Predictions (AGAIN: THIS IS ONLY AN EXAMPLE TO UNDERSTAND LASSO REGRESSION. THIS IS NOT THE PROPER WAY FOR A REAL PROJECT).
y_pred_lasso = lasso_model.predict(X_test)

# Calculating the error (MSE) on the predictions
mse_lasso = mean_squared_error(y_test, y_pred_lasso)

print(f'Mean Squared Error (LASSO Regression): {mse_lasso}')

Mean Squared Error (LASSO Regression): 2817.0876377470295


In [13]:
# LASSO Regression WITH penalties and constant 1
lasso_model_heavy = Lasso(alpha=1, random_state=42)

# Training the model 
lasso_model_heavy.fit(X_train, y_train)

# Predictions (AGAIN: THIS IS ONLY AN EXAMPLE TO UNDERSTAND LASSO REGRESSION. THIS IS NOT THE PROPER WAY FOR A REAL PROJECT).
y_pred_lasso_heavy = lasso_model_heavy.predict(X_test)

# Calculating the error (MSE) on the predictions
mse_lasso_heavy = mean_squared_error(y_test, y_pred_lasso_heavy)

print(f'Mean Squared Error (LASSO Regression - Heavy Penalty): {mse_lasso_heavy}')

Mean Squared Error (LASSO Regression - Heavy Penalty): 2784.334385980127


In [19]:
# Comparing coefficients
coef_linear = pd.DataFrame(data=linear_model.coef_, index=diabetes.feature_names, columns=['Linear Regression Coefficients'])

coef_lasso = pd.DataFrame(data=lasso_model.coef_, index=diabetes.feature_names, columns=['LASSO Regression Coefficients'])

coef_lasso_heavy = pd.DataFrame(data=lasso_model_heavy.coef_, index=diabetes.feature_names, columns=['LASSO Heavy (Heavy) Coefficients'])

coef_comparison = pd.concat([coef_linear, coef_lasso, coef_lasso_heavy], axis=1)

coef_comparison

Unnamed: 0,Linear Regression Coefficients,LASSO Regression Coefficients,LASSO Heavy (Heavy) Coefficients
age,1.352467,1.364918,0.482876
sex,-12.454269,-12.215587,-10.236559
bmi,26.210046,26.451219,26.900556
bp,18.614433,18.409299,17.340156
s1,-43.260394,-30.541312,-7.592472
s2,24.255629,14.5572,-0.0
s3,5.738626,0.0,-10.590085
s4,13.963427,11.744861,4.286988
s5,31.575215,26.794414,19.082278
s6,1.983394,2.060551,1.877525
