In [1]:
# importing librarries
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes

# **SLR (sklearn)**

In [2]:
df=pd.read_csv('Salary_dataset.csv')

In [3]:
df.shape

(30, 2)

In [4]:
df.head()

Unnamed: 0,YearsExperience,Salary
0,1.2,39344
1,1.4,46206
2,1.6,37732
3,2.1,43526
4,2.3,39892


In [5]:
# input and output
X = df.iloc[:,0:1]
Y = df.iloc[:,-1]

In [6]:
X

Unnamed: 0,YearsExperience
0,1.2
1,1.4
2,1.6
3,2.1
4,2.3
5,3.0
6,3.1
7,3.3
8,3.3
9,3.8


In [7]:
# train test split
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=2)

In [8]:
X_test

Unnamed: 0,YearsExperience
1,1.4
0,1.2
14,4.6
9,3.8
21,7.2
19,6.1


In [9]:
# Using Sklearn's SLR
from sklearn.linear_model import LinearRegression
lr=LinearRegression()

lr.fit(X_train,Y_train)

In [10]:
X_test

Unnamed: 0,YearsExperience
1,1.4
0,1.2
14,4.6
9,3.8
21,7.2
19,6.1


In [11]:
Y_test

1     46206
0     39344
14    61112
9     57190
21    98274
19    93941
Name: Salary, dtype: int64

In [12]:
# prediction
lr.predict(X_test.iloc[3].values.reshape(1,1))



array([59801.64062805])

In [13]:
# value of m
lr.coef_ 

array([9569.58688543])

In [14]:
# value of b
lr.intercept_

23437.210463405063

In [15]:
lr.predict(X_test.iloc[0].values.reshape(1,1))



array([36834.63210301])

# **SLR (mine)**

In [16]:
# Creating my own SLR Class
class MySLR:
    
    def __init__(self):
#       initializing m and b to none
        self.m = None
        self.b = None
        
    def fit(self,X_train,y_train):
#       here we have to calculate m and b value using OLS(Ordinary Least Square method) 
#       where b = Y(mean) - mX(mean) and 
#       m = (sum) (X-X(mean))*(Y-Y(mean)) / (X-X(mean))^2

        num = 0
        den = 0
        
        for i in range(X_train.shape[0]):
            
            num = num + ((X_train[i] - X_train.mean())*(y_train[i] - y_train.mean())) # (sum) (X-X(mean))*(Y-Y(mean))
            den = den + ((X_train[i] - X_train.mean())*(X_train[i] - X_train.mean())) # (X-X(mean))^2
        
        self.m = num/den
        self.b = y_train.mean() - (self.m * X_train.mean()) # Y(mean) - mX(mean)
        print(self.m)
        print(self.b)       
    
    def predict(self,X_test):
        print(X_test)
        
        return self.m * X_test + self.b # calculating Y = mX + b

In [17]:
lr1=MySLR()

In [18]:
# input and output
X1=df.iloc[:,0].values
Y1=df.iloc[:,1].values

In [19]:
X1

array([ 1.2,  1.4,  1.6,  2.1,  2.3,  3. ,  3.1,  3.3,  3.3,  3.8,  4. ,
        4.1,  4.1,  4.2,  4.6,  5. ,  5.2,  5.4,  6. ,  6.1,  6.9,  7.2,
        8. ,  8.3,  8.8,  9.1,  9.6,  9.7, 10.4, 10.6])

In [20]:
# train test split
from sklearn.model_selection import train_test_split
X_train1,X_test1,Y_train1,Y_test1=train_test_split(X1,Y1,test_size=0.2,random_state=2)

In [21]:
# m or coef_ value , b or intercept_ value
lr1.fit(X_train1,Y_train1)

9569.586885432866
23437.21046340505


In [22]:
X_train1[0]

8.3

In [23]:
X_test1

array([1.4, 1.2, 4.6, 3.8, 7.2, 6.1])

In [24]:
lr1.predict(X_test1[0])

1.4


36834.632103011056

# **MLR (mine)**

In [25]:
# Creating my own MLR Class
class MyMLR:
    
    def __init__(self):
#       initializing m and b to none
        self.m = None
        self.b = None
        
    def fit(self,X_train,y_train):
        
#       here we have to calculate all beta values for n-d dataset
#       here we can write --> Y_pred = X * beta (in matrix form)
#       and loss function E = e^T * e ..... where e = Y - Y_pred
#       after calculating dE/d(beta) we get beta = [(X^T * X)^-1]* X^T * Y

        X_train=np.insert(X_train,0,1,axis=1) # inserting all 1 at 0th column
        
        # calculating coeffs
        betas=np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train) # [(X^T * X)^-1]* X^T * Y
        self.b=betas[0] # first value of the matrix is intercept beta_0
        self.m=betas[1:] # rest values are coeff 
    
    def predict(self,X_test):
        y_pred=np.dot(X_test,self.m)+self.b # calculating Y = mX + b
        return y_pred

In [26]:
X2,Y2 = load_diabetes(return_X_y=True) # loading diabetes data

In [27]:
Y2

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [28]:
from sklearn.model_selection import train_test_split
X_train2,X_test2,Y_train2,Y_test2=train_test_split(X2,Y2,test_size=0.2,random_state=2)

In [29]:
lr2=MyMLR()

In [30]:
lr2.fit(X_train2,Y_train2)

In [31]:
y_pred = lr2.predict(X_test2)

In [32]:
from sklearn.metrics import r2_score

In [33]:
r2_score(Y_test2,y_pred)

0.43993386615689756

In [34]:
lr2.b

151.8833100525417

In [35]:
lr2.m

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

# **MLR (skleran)**

In [36]:
lr.fit(X_train2,Y_train2)

In [37]:
y_pred1 = lr.predict(X_test2)

In [38]:
r2_score(Y_test2,y_pred1)

0.4399338661568968

In [39]:
lr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [40]:
lr.intercept_

151.88331005254167