<a href="https://colab.research.google.com/github/Taofeeq97/Ridge-and-lasso-regression/blob/main/Ridge_and_lasso_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [None]:
califonia_ds = fetch_california_housing()

In [None]:
califonia_data = pd.DataFrame(data=califonia_ds.data, columns=califonia_ds.feature_names)
target = pd.DataFrame(data=califonia_ds.target, columns=['MedHouseValue'])
df = pd.concat([califonia_data, target], axis=1)

In [None]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseValue
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [None]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=df)

https://docs.google.com/spreadsheets/d/1rSfEbL4rPgyMXM2BjALBH3LIZs8BO1_kedjTDlvMFqA#gid=0


In [None]:
#Splitting data into feature and dependent variables

X = df.drop('MedHouseValue', axis=1)
y = df['MedHouseValue']

In [None]:
#splitting the data into train test
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Linear regression

In [None]:
#Apply Linear Regression
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
#Evaluate the R2 score
y_pred = model.predict(x_test)
r2 = r2_score(y_test, y_pred)
print("R2 Score:", r2)

R2 Score: 0.5757877060324508


#Ridge and Lasso Regression

In [None]:
from sklearn.linear_model import Ridge, Lasso
#Apply ridge regression
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(x_train, y_train)
y_pred2 = ridge_model.predict(x_test)
r2_ridge = r2_score(y_test, y_pred2)
print("R2 Score with 0.1 alpha :", r2_ridge)

R2 Score with 0.1 alpha : 0.5758549611440126


In [None]:
ridge_model = Ridge(alpha=0.5)
ridge_model.fit(x_train, y_train)
y_pred2 = ridge_model.predict(x_test)
r2_ridge = r2_score(y_test, y_pred2)
print("R2 Score with 0.5 alpha :", r2_ridge)

R2 Score with 0.5 alpha : 0.5758213996714421


In [None]:
ridge_model = Ridge(alpha=0.2)
ridge_model.fit(x_train, y_train)
y_pred2 = ridge_model.predict(x_test)
r2_ridge = r2_score(y_test, y_pred2)
print("R2 Score with 0.2 alpha :", r2_ridge)

R2 Score with 0.2 alpha : 0.5758011993877803


#Lasso Regression

In [None]:
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(x_train, y_train)
y_pred3 = lasso_model.predict(x_test)
r2_lasso = r2_score(y_test, y_pred3)
print("R2 Score with 0.1 alpha :", r2_lasso)

R2 Score with 0.1 alpha : 0.5318167610318159


In [None]:
df.head(5)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseValue
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [None]:
#identify the features with bad slope
bad_slope_features = np.where(lasso_model.coef_==0)[0]
print('features with bad slope', list(X.columns[bad_slope_features]))

features with bad slope ['AveRooms', 'AveBedrms', 'AveOccup', 'Latitude', 'Longitude']


In [None]:
#Remove this bad slope features
x_train_filtered = x_train.drop(X.columns[bad_slope_features], axis=1)
x_test_filtered = x_test.drop(X.columns[bad_slope_features], axis=1)

In [None]:
x_test_filtered

Unnamed: 0,MedInc,HouseAge,Population
20046,1.6812,25.0,1392.0
3024,2.5313,30.0,1565.0
15663,3.4801,52.0,1310.0
20484,5.7376,17.0,1705.0
9814,3.7250,34.0,1063.0
...,...,...,...
15362,4.6050,16.0,1351.0
16623,2.7266,28.0,1650.0
18086,9.2298,25.0,1585.0
2144,2.7850,36.0,1227.0


In [None]:
#Apply Fitered data on Linear Regression
model = LinearRegression()
model.fit(x_train_filtered, y_train)
y_pred = model.predict(x_test_filtered)
r2 = r2_score(y_test, y_pred)
print("R2 Score:", r2)

#Apply filtered data on Lasso Regression
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(x_train_filtered, y_train)
y_pred3 = lasso_model.predict(x_test_filtered)
r2_lasso = r2_score(y_test, y_pred3)
print("R2 Score with 0.1 alpha :", r2_lasso)


R2 Score: 0.4953076598494679
R2 Score with 0.1 alpha : 0.49435970597882606


In [None]:
#Identify features with coefficient closer to zero
zero_coeff_features = np.where(lasso_model.coef_<= 0.05)[0]
print('features with bad slope', list(X.columns[zero_coeff_features]))

features with bad slope ['HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


In [None]:
#get filtered train and test data
x_train_filtered2 = x_train.drop(X.columns[zero_coeff_features], axis=1)
x_test_filtered2 = x_test.drop(X.columns[zero_coeff_features], axis=1)

In [None]:
#Apply Linear and Lasso regression and compare r2 score
model = LinearRegression()
model.fit(x_train_filtered2, y_train)
y_pred = model.predict(x_test_filtered2)
r2 = r2_score(y_test, y_pred)
print("R2 Score:", r2)

lasso_model = Lasso(alpha=0.1)
lasso_model.fit(x_train_filtered2, y_train)
y_pred3 = lasso_model.predict(x_test_filtered2)
r2_lasso = r2_score(y_test, y_pred3)
print("R2 Score with 0.1 alpha :", r2_lasso)

R2 Score: 0.45885918903846656
R2 Score with 0.1 alpha : 0.457868773280494
