# Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load data

In [2]:
df = pd.read_csv('Data.csv')
df.head()

Unnamed: 0,Density,BodyFat,Age,Weight,Height,Neck,Chest,Abdomen,Hip,Thigh,Knee,Ankle,Biceps,Forearm,Wrist
0,1.0708,12.3,23,154.25,67.75,36.2,93.1,85.2,94.5,59.0,37.3,21.9,32.0,27.4,17.1
1,1.0853,6.1,22,173.25,72.25,38.5,93.6,83.0,98.7,58.7,37.3,23.4,30.5,28.9,18.2
2,1.0414,25.3,22,154.0,66.25,34.0,95.8,87.9,99.2,59.6,38.9,24.0,28.8,25.2,16.6
3,1.0751,10.4,26,184.75,72.25,37.4,101.8,86.4,101.2,60.1,37.3,22.8,32.4,29.4,18.2
4,1.034,28.7,24,184.25,71.25,34.4,97.3,100.0,101.9,63.2,42.2,24.0,32.2,27.7,17.7


#  Lasso vs Ridge (40 points)

In [3]:
target_col = 'Weight'
X = df.drop(target_col, axis=1)
y = df[target_col]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#### Lasso

In [5]:
lasso = Lasso()

In [6]:
alphas_lasso = np.logspace(-3, 3, 100)
lasso_params = {'alpha': alphas_lasso}

In [7]:
grid_lasso = GridSearchCV(lasso, lasso_params, cv=5)
grid_lasso.fit(X_train, y_train)
best_alpha_lasso = grid_lasso.best_params_['alpha']
print("Best alpha: {0:.2f}".format(best_alpha_lasso))

Best alpha: 7.56


In [8]:
lasso_best = Lasso(alpha = best_alpha_lasso)
lasso_best.fit(X_train, y_train)

Lasso(alpha=7.56463327554629)

In [9]:
y_train_pred_lasso = lasso_best.predict(X_train)
y_test_pred_lasso = lasso_best.predict(X_test)

train_mse_lasso = np.round(mean_squared_error(y_train, y_train_pred_lasso), decimals = 4)
test_mse_lasso = np.round(mean_squared_error(y_test, y_test_pred_lasso), decimals = 4)

print('Train set MSE of Lasso:', train_mse_lasso)
print('Test set MSE of Lasso:', test_mse_lasso)

Train set MSE of Lasso: 42.8316
Test set MSE of Lasso: 60.5623


In [10]:
lasso_r2 = r2_score(y_test, y_test_pred_lasso)
print('R2 score for Lasso: {0:.4f}'.format(lasso_r2))

R2 score for Lasso: 0.9409


#### Ridge

In [11]:
ridge = Ridge()

In [12]:
alphas_ridge = np.logspace(-3, 3, 100)
ridge_params = {'alpha': alphas_ridge}

In [13]:
grid_ridge = GridSearchCV(ridge, ridge_params, cv=5)
grid_ridge.fit(X_train, y_train)
best_alpha_ridge = grid_ridge.best_params_['alpha']
print("Best alpha: {0:.2f}".format(best_alpha_ridge))

Best alpha: 869.75


In [14]:
ridge_best = Ridge(alpha = best_alpha_ridge)
ridge_best.fit(X_train, y_train)

Ridge(alpha=869.7490026177834)

In [15]:
y_train_pred_ridge = ridge_best.predict(X_train)
y_test_pred_ridge = ridge_best.predict(X_test)

train_mse_ridge = np.round(mean_squared_error(y_train, y_train_pred_ridge), decimals = 4)
test_mse_ridge = np.round(mean_squared_error(y_test, y_test_pred_ridge), decimals = 4)

print('Train set MSE of Ridge:', train_mse_ridge)
print('Test set MSE of Ridge:', test_mse_ridge)

Train set MSE of Ridge: 27.1519
Test set MSE of Ridge: 47.2027


In [16]:
ridge_r2 = r2_score(y_test, y_test_pred_ridge)
print('R2 score for Ridge: {0:.4f}'.format(ridge_r2))

R2 score for Ridge: 0.9540


According to the results, the Lasso model has lower accuracy compared to the Ridge model; although the results have been very close to each other, a small difference is observed.
The Ridge model has a lower mean squared error (MSE) and a higher R-squared (R2) compared to the Lasso model.