### linear regression

In [60]:
import pandas as pd
df = pd.read_csv("student_performance.csv")
df.head(5)

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [61]:
df.isna().sum()

Hours Studied                       0
Previous Scores                     0
Extracurricular Activities          0
Sleep Hours                         0
Sample Question Papers Practiced    0
Performance Index                   0
dtype: int64

In [62]:
features=['Hours Studied','Previous Scores','Sleep Hours','Sample Question Papers Practiced']
X=df[features]
y = df['Performance Index']


In [63]:
y.shape


(10000,)

### train_test_split

In [64]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [65]:
X.shape

(10000, 4)

In [66]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [67]:
X_train.shape

(7500, 4)

In [68]:
y_train.shape

(7500,)

In [69]:
y_test.shape

(2500,)

In [70]:
1700/2126

0.7996237064910631

### train_model

In [71]:
model = LinearRegression()
model.fit(X_train,y_train)

In [72]:
y_pred = model.predict(X_test)
y_pred[:5]

array([55.01662478, 22.31952103, 47.61205591, 30.99898063, 43.33297823])

In [73]:
y_test[:5]

6252    51.0
4684    20.0
1731    46.0
4742    28.0
4521    41.0
Name: Performance Index, dtype: float64

### model_evaluation


In [74]:
from sklearn.metrics import mean_squared_error,r2_score

mse = mean_squared_error(y_test,y_pred)
mse

np.float64(4.152326573933453)

In [75]:
r2_score(y_test,y_pred)

0.9887299686226686

In [76]:
model.score(X_test,y_test)

0.9887299686226686

### polynomial_regression

In [77]:
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
poly = PolynomialFeatures(degree=2)
poly.fit_transform(np.array([[1],[2],[3],[4]]))

array([[ 1.,  1.,  1.],
       [ 1.,  2.,  4.],
       [ 1.,  3.,  9.],
       [ 1.,  4., 16.]])

In [78]:
poly = PolynomialFeatures(degree = 2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)


In [79]:
X_train[:5]

Unnamed: 0,Hours Studied,Previous Scores,Sleep Hours,Sample Question Papers Practiced
4901,5,49,5,5
4375,7,88,4,9
6698,3,94,7,1
9805,9,54,5,9
1101,4,56,8,6


In [80]:
X_train_poly[:5]

array([[1.000e+00, 5.000e+00, 4.900e+01, 5.000e+00, 5.000e+00, 2.500e+01,
        2.450e+02, 2.500e+01, 2.500e+01, 2.401e+03, 2.450e+02, 2.450e+02,
        2.500e+01, 2.500e+01, 2.500e+01],
       [1.000e+00, 7.000e+00, 8.800e+01, 4.000e+00, 9.000e+00, 4.900e+01,
        6.160e+02, 2.800e+01, 6.300e+01, 7.744e+03, 3.520e+02, 7.920e+02,
        1.600e+01, 3.600e+01, 8.100e+01],
       [1.000e+00, 3.000e+00, 9.400e+01, 7.000e+00, 1.000e+00, 9.000e+00,
        2.820e+02, 2.100e+01, 3.000e+00, 8.836e+03, 6.580e+02, 9.400e+01,
        4.900e+01, 7.000e+00, 1.000e+00],
       [1.000e+00, 9.000e+00, 5.400e+01, 5.000e+00, 9.000e+00, 8.100e+01,
        4.860e+02, 4.500e+01, 8.100e+01, 2.916e+03, 2.700e+02, 4.860e+02,
        2.500e+01, 4.500e+01, 8.100e+01],
       [1.000e+00, 4.000e+00, 5.600e+01, 8.000e+00, 6.000e+00, 1.600e+01,
        2.240e+02, 3.200e+01, 2.400e+01, 3.136e+03, 4.480e+02, 3.360e+02,
        6.400e+01, 4.800e+01, 3.600e+01]])

In [81]:
model.fit(X_train_poly,y_train)

In [82]:
y_pred = model.predict(X_test_poly)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mse,r2

(np.float64(4.156711995061494), 0.9887180659380324)

### l1&l2_regularization


In [83]:
from sklearn.linear_model import Lasso,Ridge

In [84]:
lm = Lasso(alpha=1.0)
lm.fit(X_train,y_train)
lm.score(X_test, y_test)

0.9869176464437092

In [85]:
rm = Ridge(alpha=1.0)
rm.fit(X_train,y_train)
rm.score(X_test,y_test)

0.9887299756355274