In [1]:
import numpy as np
from sklearn.datasets import load_diabetes

In [2]:
X, y = load_diabetes(return_X_y=True)

In [None]:
X

In [3]:
X.shape

(442, 10)

In [None]:
y

In [4]:
y.shape

(442,)

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [7]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

In [9]:
X_train

array([[-0.00188202, -0.04464164, -0.06979687, ..., -0.03949338,
        -0.06291295,  0.04034337],
       [-0.00914709, -0.04464164,  0.01103904, ..., -0.03949338,
         0.01703713, -0.0052198 ],
       [ 0.02354575,  0.05068012, -0.02021751, ..., -0.03949338,
        -0.09643322, -0.01764613],
       ...,
       [ 0.06350368,  0.05068012, -0.00405033, ..., -0.00259226,
         0.08449528, -0.01764613],
       [-0.05273755,  0.05068012, -0.01806189, ...,  0.1081111 ,
         0.03605579, -0.04249877],
       [ 0.00175052,  0.05068012,  0.05954058, ...,  0.1081111 ,
         0.06898221,  0.12732762]])

In [10]:
lr.fit(X_train, y_train)

LinearRegression()

In [11]:
y_pred = lr.predict(X_test)

In [12]:
from sklearn.metrics import r2_score

In [13]:
print('R2 score',r2_score(y_test, y_pred))

R2 score 0.4399387660024645


In [14]:
lr.coef_

array([  -9.16088483, -205.46225988,  516.68462383,  340.62734108,
       -895.54360867,  561.21453306,  153.88478595,  126.73431596,
        861.12139955,   52.41982836])

In [15]:
lr.intercept_

151.88334520854633

In [16]:
X_train

array([[-0.00188202, -0.04464164, -0.06979687, ..., -0.03949338,
        -0.06291295,  0.04034337],
       [-0.00914709, -0.04464164,  0.01103904, ..., -0.03949338,
         0.01703713, -0.0052198 ],
       [ 0.02354575,  0.05068012, -0.02021751, ..., -0.03949338,
        -0.09643322, -0.01764613],
       ...,
       [ 0.06350368,  0.05068012, -0.00405033, ..., -0.00259226,
         0.08449528, -0.01764613],
       [-0.05273755,  0.05068012, -0.01806189, ...,  0.1081111 ,
         0.03605579, -0.04249877],
       [ 0.00175052,  0.05068012,  0.05954058, ...,  0.1081111 ,
         0.06898221,  0.12732762]])

# Making our own Multiple Linear Regression

In [17]:
class MeraLR:
    
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
    
    def fit(self, X_train, y_train):
        X_train = np.insert(X_train,0 ,1, axis=1)
        print(X_train.shape)
        print(X_train)
        betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]
        
    def predict(self, X_test):
        y_pred = np.dot(X_test, self.coef_) + self.intercept_
        return y_pred

In [18]:
y_train.shape

(353,)

In [19]:
X_train.shape

(353, 10)

In [20]:
reg = MeraLR()

In [21]:
reg.fit(X_train, y_train)

(353, 11)
[[ 1.         -0.00188202 -0.04464164 ... -0.03949338 -0.06291295
   0.04034337]
 [ 1.         -0.00914709 -0.04464164 ... -0.03949338  0.01703713
  -0.0052198 ]
 [ 1.          0.02354575  0.05068012 ... -0.03949338 -0.09643322
  -0.01764613]
 ...
 [ 1.          0.06350368  0.05068012 ... -0.00259226  0.08449528
  -0.01764613]
 [ 1.         -0.05273755  0.05068012 ...  0.1081111   0.03605579
  -0.04249877]
 [ 1.          0.00175052  0.05068012 ...  0.1081111   0.06898221
   0.12732762]]


In [22]:
pred = reg.predict(X_test)

In [23]:
print('R2 score',r2_score(y_test, pred))

R2 score 0.43993876600246484


In [24]:
reg.coef_

array([  -9.16088483, -205.46225988,  516.68462383,  340.62734108,
       -895.54360867,  561.21453306,  153.88478595,  126.73431596,
        861.12139955,   52.41982836])

In [25]:
reg.intercept_

151.88334520854627

In [26]:
import pandas as pd

In [27]:
df = pd.read_csv('placement.csv')

In [28]:
df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [29]:
X = df.iloc[:,0:1]
y = df.iloc[:,-1]

In [30]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2, random_state=2)

In [31]:
X_train1.head()

Unnamed: 0,cgpa
137,7.14
163,8.93
111,5.42
123,5.1
109,7.77


In [32]:
y_train1.shape

(160,)

In [33]:
re = MeraLR()

In [None]:
X_train1.shape

In [None]:
X_train1.head()

In [34]:
re.fit(X_train1.to_numpy(), y_train1)

(160, 2)
[[1.   7.14]
 [1.   8.93]
 [1.   5.42]
 [1.   5.1 ]
 [1.   7.77]
 [1.   6.76]
 [1.   6.89]
 [1.   6.68]
 [1.   7.91]
 [1.   7.89]
 [1.   8.71]
 [1.   7.95]
 [1.   6.61]
 [1.   6.26]
 [1.   6.53]
 [1.   6.42]
 [1.   5.11]
 [1.   6.09]
 [1.   6.93]
 [1.   7.04]
 [1.   5.94]
 [1.   6.05]
 [1.   5.83]
 [1.   5.95]
 [1.   9.31]
 [1.   5.58]
 [1.   7.88]
 [1.   6.13]
 [1.   7.76]
 [1.   4.85]
 [1.   6.19]
 [1.   8.6 ]
 [1.   6.07]
 [1.   7.18]
 [1.   5.12]
 [1.   7.39]
 [1.   8.25]
 [1.   8.28]
 [1.   7.13]
 [1.   7.35]
 [1.   5.66]
 [1.   5.99]
 [1.   8.01]
 [1.   7.14]
 [1.   6.34]
 [1.   6.89]
 [1.   5.42]
 [1.   6.47]
 [1.   7.69]
 [1.   7.4 ]
 [1.   7.28]
 [1.   5.95]
 [1.   7.38]
 [1.   6.93]
 [1.   8.99]
 [1.   7.36]
 [1.   7.08]
 [1.   5.38]
 [1.   7.56]
 [1.   8.22]
 [1.   5.84]
 [1.   6.78]
 [1.   7.19]
 [1.   7.28]
 [1.   6.79]
 [1.   6.12]
 [1.   6.85]
 [1.   8.2 ]
 [1.   6.84]
 [1.   7.37]
 [1.   6.22]
 [1.   6.61]
 [1.   5.23]
 [1.   7.21]
 [1.   6.85]
 [1.   6.19]
 [1

In [37]:
pred = re.predict(X_test1.to_numpy())

In [39]:
print('R2 score',r2_score(y_test1, pred))

R2 score 0.7807301475103857


In [40]:
print(re.coef_)

[0.55795197]


In [41]:
print(re.intercept_)

-0.8961119222429297
