In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
x,y = load_diabetes(return_X_y=True)

In [3]:
x.shape

(442, 10)

In [4]:
y.shape

(442,)

In [5]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [6]:
print(x_train.shape)
print(x_test.shape)

(353, 10)
(89, 10)


## Multi Linear Regression using Sklearn

In [7]:
from sklearn.linear_model import LinearRegression

In [8]:
reg = LinearRegression()

In [9]:
reg.fit(x_train,y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [10]:
y_pred = reg.predict(x_test)

In [11]:
r2_score_sklearn_model = r2_score(y_test,y_pred)
print(r2_score_sklearn_model)

0.4526027629719197


In [12]:
reg.coef_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [13]:
reg.intercept_

np.float64(151.34560453985995)

## Multi Linear Regression using Maths and Python

In [14]:
class Multiple_linear_regression_1:
    
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
        
    def fit(self,X_train,y_train):
        X_train = np.insert(X_train,0,1,axis=1)
        
        # calcuate the coeffs
        betas = np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]
    
    def predict(self,X_test):
        y_pred = np.dot(X_test,self.coef_) + self.intercept_
        return y_pred

In [15]:
lr = Multiple_linear_regression_1()

In [16]:
lr.fit(x_train,y_train)

In [17]:
x_train.shape

(353, 10)

In [18]:
np.insert(x_train,0,1,axis=1).shape

(353, 11)

In [19]:
y_pred_1 = lr.predict(x_test)

In [20]:
r2_score_python_model = r2_score(y_test,y_pred)
print(r2_score_sklearn_model)

0.4526027629719197


In [21]:
lr.coef_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [22]:
lr.intercept_

np.float64(151.34560453986)

## Compair both model r2_score

In [23]:
df = pd.DataFrame({
    'model': ['python_model', 'sklearn_model'],
    'accuracy': [r2_score_python_model, r2_score_sklearn_model]
})

In [24]:
df.style.background_gradient(sns.color_palette("YlOrBr", as_cmap=True))

Unnamed: 0,model,accuracy
0,python_model,0.452603
1,sklearn_model,0.452603


In [25]:
!pip freeze -> requirment.txt