## Welcome to this Notebook


In this notebook, we will be creating multi linear regression model from scratch and then use Iris dataset to test and train the model.

In [31]:
# importing necessary library
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [39]:
# defining a linear regression class
class Linear_Regression:
    def __init__(self, X, Y): # it takes X and Y meaning whole dataset and store it
        self.X = np.vstack((X.values.T,np.ones(X.shape[0]))).T # adding an extra column so that we can multiply as this column is for theata zero.
        self.Y = Y.reshape(-1,1) # Reshaping Y to the 2D array which have a single column
        self.theta = np.random.random((1,self.X.shape[1])).T # Initializing values for theta's randomly for first time
    def hypothesis(self): # creating a function which will return hypothesis
        h = np.dot(self.X,self.theta)
        #print("h: ",h)
        return h

    def cost_function(self): # function to calculate cost of the model
        return np.mean((self.Y-self.hypothesis())**2)

    def der_cost_function(self): #function for derivative of cost function as it is used in updating the weights
        dot_prod=np.dot(self.X.T,(self.hypothesis()-self.Y))
        x = len(self.Y)
        var = dot_prod / x
       # print(dot_prod)
        return (var)

    def gradient_descent_function(self,lr=0.0001): # function for calculating gradient descent
        der = self.der_cost_function()
       # print("der:",der)
        return self.theta-lr*der

    def train_function(self,no_of_it_train,no_of_it_print_cost,lr=0.0001): # Function to train the model
        count=1 # for printing cost
        for i in range(1,no_of_it_train):
            gradient_val = self.gradient_descent_function(lr) # finding new weights
            #print(gradient_val)
            self.theta = gradient_val # updating the weights
            if(count == no_of_it_print_cost):
                print("Cost: ", self.cost_function()) # printing cost value after some number of iterations
                count=1
            count+=1
    def Predict(self,X): # It is used for prediction
        predictions= np.dot(X,self.theta)
        return predictions
    def get_weights(self): # Function for getting the weights
        return self.theta
    def printer(self): # Function to print X, Y, theta's
        print("X: ",self.X)
        print("Y: ",self.Y)
        print("theta: ",self.theta)


In [40]:
df=pd.read_csv("iris.csv") # using Iris data
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [41]:
# splitting the dataset and converting non-numarical column to numarical using LabelEncoder()
x=df.drop('species',axis=1)
y=df['species']
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

In [42]:
# Initializing our model
lr=Linear_Regression(X_train, y_train)
lr.printer() #checking if everthing is good

X:  [[4.6 3.6 1.  0.2 1. ]
 [5.7 4.4 1.5 0.4 1. ]
 [6.7 3.1 4.4 1.4 1. ]
 [4.8 3.4 1.6 0.2 1. ]
 [4.4 3.2 1.3 0.2 1. ]
 [6.3 2.5 5.  1.9 1. ]
 [6.4 3.2 4.5 1.5 1. ]
 [5.2 3.5 1.5 0.2 1. ]
 [5.  3.6 1.4 0.2 1. ]
 [5.2 4.1 1.5 0.1 1. ]
 [5.8 2.7 5.1 1.9 1. ]
 [6.  3.4 4.5 1.6 1. ]
 [6.7 3.1 4.7 1.5 1. ]
 [5.4 3.9 1.3 0.4 1. ]
 [5.4 3.7 1.5 0.2 1. ]
 [5.5 2.4 3.7 1.  1. ]
 [6.3 2.8 5.1 1.5 1. ]
 [6.4 3.1 5.5 1.8 1. ]
 [6.6 3.  4.4 1.4 1. ]
 [7.2 3.6 6.1 2.5 1. ]
 [5.7 2.9 4.2 1.3 1. ]
 [7.6 3.  6.6 2.1 1. ]
 [5.6 3.  4.5 1.5 1. ]
 [5.1 3.5 1.4 0.2 1. ]
 [7.7 2.8 6.7 2.  1. ]
 [5.8 2.7 4.1 1.  1. ]
 [5.2 3.4 1.4 0.2 1. ]
 [5.  3.5 1.3 0.3 1. ]
 [5.1 3.8 1.9 0.4 1. ]
 [5.  2.  3.5 1.  1. ]
 [6.3 2.7 4.9 1.8 1. ]
 [4.8 3.4 1.9 0.2 1. ]
 [5.  3.  1.6 0.2 1. ]
 [5.1 3.3 1.7 0.5 1. ]
 [5.6 2.7 4.2 1.3 1. ]
 [5.1 3.4 1.5 0.2 1. ]
 [5.7 3.  4.2 1.2 1. ]
 [7.7 3.8 6.7 2.2 1. ]
 [4.6 3.2 1.4 0.2 1. ]
 [6.2 2.9 4.3 1.3 1. ]
 [5.7 2.5 5.  2.  1. ]
 [5.5 4.2 1.4 0.2 1. ]
 [6.  3.  4.8 1.8 1. ]
 [5.8 2

In [43]:
lr.train_function(1000,50) # training our model for 10000 epochs and print cost after every 100th epoch

Cost:  3.843377381029866
Cost:  2.163312016985393
Cost:  1.2457696745336686
Cost:  0.7442975347782171
Cost:  0.4698614507729848
Cost:  0.31931905793949794
Cost:  0.23639281842989
Cost:  0.1903758857337662
Cost:  0.16451346322204197
Cost:  0.14966336710724792
Cost:  0.14083706242827873
Cost:  0.13531236101235153
Cost:  0.13160365962333234
Cost:  0.12890007463223527
Cost:  0.12675887336066274
Cost:  0.12493820201799197
Cost:  0.12330581753277826
Cost:  0.12178928911387717
Cost:  0.12034882679120651
Cost:  0.11896245583520072


In [44]:
# testing our model
X_test = np.vstack((X_test.values.T,np.ones(X_test.shape[0]))).T #adding an extra column so that we can multiply as this column is for theata zero.
model_predictions = lr.Predict(X_test)
#model_predictions

In [47]:
#Calculating accuracy
accuracy = r2_score(y_test, model_predictions)
print(f"Model Accuracy: {accuracy}")

Model Accuracy: 0.8292045191508238


As you can see that our model accuracy is around 0.82 which means that our model is almost predicting accuratly but you can increase it but hyperparameter tuning.