In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV, ElasticNetCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
#Reading data
df=pd.read_csv("Student_Performance.csv")
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [3]:
# Handling Categorical data
df['Extracurricular Activities'] = df['Extracurricular Activities'].apply(lambda x: 0 if x=='No' else 1)
df.head(10)

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,1,9,1,91.0
1,4,82,0,4,2,65.0
2,8,51,1,7,2,45.0
3,5,52,1,5,2,36.0
4,7,75,0,8,5,66.0
5,3,78,0,9,6,61.0
6,7,73,1,5,6,63.0
7,8,45,1,4,6,42.0
8,5,77,0,8,2,61.0
9,4,89,0,4,0,69.0


In [4]:
x=df.drop(columns=['Performance Index'])
y=df['Performance Index']

In [5]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=50)

In [6]:
from sklearn.linear_model import LinearRegression
model=LinearRegression()

In [7]:
model.fit(x_train,y_train)

In [8]:
y_pred=model.predict(x_test)
r2=r2_score(y_test,y_pred)
print(r2)

0.9889385156066716


In [9]:
alphas = np.logspace(-6, 2, 100)  # Creates 100 alpha values between 10^-6 and 10^2
l1_ratios = np.linspace(0.01, 1, 100) # Creates 100 l1_values

In [22]:
# LassoCV automates this process and directly provides you with a model trained on the entire dataset using the optimally tuned alpha
reg_lasso = LassoCV(alphas=alphas, cv=5, random_state=50).fit(x_train, y_train)

In [23]:
reg_lasso.score(x_test, y_test)

0.9889388400301822

In [24]:
reg_lasso.alpha_

0.00031992671377973844

In [26]:
reg_ridge = RidgeCV(alphas=alphas, cv=5).fit(x_train, y_train)

In [20]:
reg_ridge.score(x_test, y_test)

0.988939582623175

In [21]:
reg_ridge.alpha_

12.915496650148853

In [16]:
# l1_ratio is a parameter that determines the mixing between L1 (Lasso) and L2 (Ridge) regularization in Elastic Net.
# When l1_ratio is 0, the penalty is purely L2 (Ridge).
# When l1_ratio is 1, the penalty is purely L1 (Lasso).
# For values between 0 and 1, the penalty is a combination of both L1 and L2.
# In the example, l1_ratio=[0.1, 0.5, 0.9] specifies three different mixing ratios for Elastic Net to consider during cross-validation.
model = ElasticNetCV(alphas=alphas, l1_ratio=l1_ratios, cv=5)
model.fit(x_train, y_train)

In [17]:
y_pred = model.predict(x_test)
r2 = r2_score(y_test, y_pred)
print(r2)

0.9889394603249383


In [27]:
model.score(x_test,y_test)

0.9889394603249383

In [29]:
model.alpha_

0.0014174741629268063

In [30]:
model.l1_ratio_

0.01