# Implementing Linear Regression for 2Dimensional Dataset by Calculus/Symbolic Math

In [25]:
#In Linear Regression,we have to find the best fit line
#We know equation of a line is y=mx+b where m is the slope and b is the Y-Intercept
#Now as there can be infinite lines, we just need to find the value of m and b to remove other possibilites and get our best fit line
#We know that our Best fit line has minimum Mean Squared Error
#So MSE=(Sum of Squared Terms)/Number of terms
#As No of terms can't change for a given dataset->Min MSE implies minimum Sum of Squared Terms
#So Assuming our Best line is y=mx+c,I have generated Expression of Sum of Squared Terms=Sigma(mx+c-y)^2
#Then I have differentiated Sum of Squared Terms wrt m and b and equated it to 0
#As for Maxima or Minima for a function g(l,m), dg/dl=0 and dg/dm=0
#And After solving those two equations,I have returned that value

In [34]:
#To run these codes,Install Numpy and Sympy

## Storing Dependent and Independent variable values in Numpy Array

In [27]:
import numpy as np
x=np.array([1,2,3])
y=np.array([3,2,4])
print("x=",x)
print("y=",y)

x= [1 2 3]
y= [3 2 4]


## Generating Expression of Sum of Squared Terms

In [28]:
import sympy

m,b=sympy.symbols('m b')
Sum_of_Squared_Terms=0
for i in range(len(x)):
    Sum_of_Squared_Terms+=(m*x[i]+b-y[i])**2
print(Sum_of_Squared_Terms)

(b + m - 3)**2 + (b + 2*m - 2)**2 + (b + 3*m - 4)**2


In [29]:
SSE=Sum_of_Squared_Terms #Just Short form
print(SSE)

(b + m - 3)**2 + (b + 2*m - 2)**2 + (b + 3*m - 4)**2


## Differentiating SSE wrt Slope and Y_Intercept

In [30]:
dSSE_dm=sympy.diff(SSE,m)
dSSE_db=sympy.diff(SSE,b)
print("d(SSE)/dm=",dSSE_dm)
print("d(SSE)/db=",dSSE_db)

d(SSE)/dm= 12*b + 28*m - 38
d(SSE)/db= 6*b + 12*m - 18


## Solving the Obtained Equations to get the value of Slope and Y_Intercept

In [31]:
Answer=sympy.solve([dSSE_dm,dSSE_db],[m,b])
print(Answer)

{b: 2, m: 1/2}


# Generalizing the Results

In [32]:
def Linear_Regression(Input,Output):
    #Give Input and Output as Numpy Arrays
    m,b=sympy.symbols('m b')
    
    #Sum of Squared Terms initialized as 0
    SSE=0
    
    if(len(Input)!=len(Output)):
        print("Missing Values")
        return -1
    
    #Generating the expression of Sum of Squared Terms
    for i in range(len(Input)):
        SSE+=(m*x[i]+b-y[i])**2
     
    #Differentiating the generated expression wrt m and b
    dSSE_dm=sympy.diff(SSE,m)
    dSSE_db=sympy.diff(SSE,b)
    
    #Solving for m and b by equating the equations after differentiating to 0
    #As for Maxima or Minima for a function g(l,m), dg/dl=0 and dg/dm=0
    Answer=sympy.solve([dSSE_dm,dSSE_db],[m,b])
    return Answer

In [33]:
print(Linear_Regression(x,y))

{b: 2, m: 1/2}
