## Welcome to this Notebook


In this notebook, we will be creating multi linear regression model from scratch and then use Iris dataset to test and train the model.

In [None]:
# importing necessary library
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [13]:
# defining a linear regression class
class Linear_Regression:
    def __init__(self, X, Y): # it takes X and Y meaning whole dataset and store it 
        self.X = np.vstack((X.values.T,np.ones(X.shape[0]))).T # adding an extra column so that we can multiply as this column is for theata zero.
        self.Y = Y.reshape(-1,1) # Reshaping Y to the 2D array which have a single column
        self.theta = np.random.random((1,self.X.shape[1])).T # Initializing values for theta's randomly for first time
    def hypothesis(self): # creating a function which will return hypothesis 
        h = np.dot(self.X,self.theta.reshape(-1,1))
        #print("h: ",h.shape)
        return h
    
    def cost_function(self): # function to calculate cost of the model
        return np.mean((self.Y-self.hypothesis())**2)
    
    def der_cost_function(self): #function for derivative of cost function as it is used in updating the weights
        dot_prod=np.dot((self.Y-self.hypothesis()).T,self.X)
        #print(dot_prod)
        return -2*(np.mean(dot_prod,axis=1))
    
    def gradient_descent_function(self,lr=0.0001): # function for calculating gradient descent 
        der = self.der_cost_function()
        return self.theta-lr*der
        
    def train_function(self,no_of_it_train,no_of_it_print_cost,lr=0.0001): # Function to train the model
        count=1 # for printing cost
        for i in range(1,no_of_it_train):
            gradient_val = self.gradient_descent_function(lr) # finding new weights
            self.theta = gradient_val # updating the weights
            if(count == no_of_it_print_cost): 
                print("Cost: ", self.cost_function()) # printing cost value after some number of iterations
                count=1
            count+=1
    def Predict(self,X): # It is used for prediction
        predictions= np.dot(X,self.theta)
        return predictions
    def get_weights(self): # Function for getting the weights
        return self.theta
    def printer(self): # Function to print X, Y, theta's
        print("X: ",self.X)
        print("Y: ",self.Y)
        print("theta: ",self.theta)
        

In [14]:
df=pd.read_csv("Iris.csv") # using Iris data 
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [15]:
# splitting the dataset and converting non-numarical column to numarical using LabelEncoder()
x=df.drop('species',axis=1)
y=df['species']
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [16]:
# Initializing our model
lr=Linear_Regression(x,y)
lr.printer() #checking if everthing is good

X:  [[5.1 3.5 1.4 0.2 1. ]
 [4.9 3.  1.4 0.2 1. ]
 [4.7 3.2 1.3 0.2 1. ]
 [4.6 3.1 1.5 0.2 1. ]
 [5.  3.6 1.4 0.2 1. ]
 [5.4 3.9 1.7 0.4 1. ]
 [4.6 3.4 1.4 0.3 1. ]
 [5.  3.4 1.5 0.2 1. ]
 [4.4 2.9 1.4 0.2 1. ]
 [4.9 3.1 1.5 0.1 1. ]
 [5.4 3.7 1.5 0.2 1. ]
 [4.8 3.4 1.6 0.2 1. ]
 [4.8 3.  1.4 0.1 1. ]
 [4.3 3.  1.1 0.1 1. ]
 [5.8 4.  1.2 0.2 1. ]
 [5.7 4.4 1.5 0.4 1. ]
 [5.4 3.9 1.3 0.4 1. ]
 [5.1 3.5 1.4 0.3 1. ]
 [5.7 3.8 1.7 0.3 1. ]
 [5.1 3.8 1.5 0.3 1. ]
 [5.4 3.4 1.7 0.2 1. ]
 [5.1 3.7 1.5 0.4 1. ]
 [4.6 3.6 1.  0.2 1. ]
 [5.1 3.3 1.7 0.5 1. ]
 [4.8 3.4 1.9 0.2 1. ]
 [5.  3.  1.6 0.2 1. ]
 [5.  3.4 1.6 0.4 1. ]
 [5.2 3.5 1.5 0.2 1. ]
 [5.2 3.4 1.4 0.2 1. ]
 [4.7 3.2 1.6 0.2 1. ]
 [4.8 3.1 1.6 0.2 1. ]
 [5.4 3.4 1.5 0.4 1. ]
 [5.2 4.1 1.5 0.1 1. ]
 [5.5 4.2 1.4 0.2 1. ]
 [4.9 3.1 1.5 0.1 1. ]
 [5.  3.2 1.2 0.2 1. ]
 [5.5 3.5 1.3 0.2 1. ]
 [4.9 3.1 1.5 0.1 1. ]
 [4.4 3.  1.3 0.2 1. ]
 [5.1 3.4 1.5 0.2 1. ]
 [5.  3.5 1.3 0.3 1. ]
 [4.5 2.3 1.3 0.3 1. ]
 [4.4 3.2 1.3 0.2 1. ]
 [5.  3

In [17]:
lr.train_function(10000,100) # training our model for 10000 epochs and print cost after every 100th epoch

Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.8622919160315748
Cost:  0.862

In [296]:
# testing our model 
X_test=[5.9,3.0,5.1,1.8,1] # test example
X_test=np.array(X_test) 
lr.test(X_test)

array([1.20599469])