# Linear Regression

Linear regression attempts to model the relationship between two variables by fitting a linear equation to observed data.
One variable is considered to be an explanatory variable,and the other is considered to be a dependent variable.
For example, a modeler might want to relate the weights of individuals to their heights using a linear regression model.

In [9]:
Image(url= "http://rasbt.github.io/mlxtend/user_guide/regressor/LinearRegression_files/simple_regression.png", width=10000, height=10000)

A linear regression line has an equation of the form Y = a + bX, 
where X is the explanatory variable and Y is the dependent variable.
The slope of the line is b, and a is the intercept (the value of y when x = 0).

### Importing Libraries

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn import linear_model
from numpy.linalg import inv,det,multi_dot,norm
from sklearn.metrics import confusion_matrix
from itertools import combinations
import warnings
warnings.filterwarnings('ignore')

### Creating A Class Linear

In [5]:
class Linear:  
    def __init__(self):
        self.x=None
        self.y=None        
        self.weights = None
        self.bias = None
        self.cost= [ ]        
        
    def Error(self,x,y):
        r,c=x.shape
        x=np.array(x)
        y=np.array(y).reshape(r,1)
        for i in range(len(self.bias)):
            fx = np.dot(x, self.weights[i]) + self.bias[i]      
        
        
        MSE=(1/len(x))* np.sum(np.square(fx - y))       
        return MSE   

    def kfold_linear(self,x,y):
        kf = KFold(n_splits=5) 
        x=np.array(x)
        y=np.array(y)      
        
        MinErr=[ ]
        alpha_range = [10**i for i in range(-6,3)]
        for j in alpha_range :
            l=[ ]
            for train_index,test_index in kf.split(x,y):
                #print(x.size,y.size)
                self.Linear_Train(x,y,j,2000,10**+100)                
                pred= self.Error(x[test_index],y[test_index])
                l.append(pred)
            MinErr.append(sum(l)/len(l))
            print(f"Learning rate: {j} mean error is : ---{sum(l)/len(l)} ")
        
        k= np.argmin(MinErr)        
        optimal_alpha=(alpha_range[k])
        print(" ")
        print("optimal Learning rate is-->{}".format(optimal_alpha))
        return optimal_alpha
   
    
    def Linear_Train(self,x,y,alpha,itr,eps):  
        r,c=x.shape
        x=np.array(x)
        y=np.array(y).reshape(r,1)
        
        w=np.zeros((c,1))
        w_list=[w]
        bias=[ ]
        cost_list=[ ]
        #for hyperplane parameters
        self.weights={ }
        self.bias={ }  
                    
        w0=0
        if (r>=c):
                                       #Gradient Decent
            for i in range(itr):
                fx = np.dot(x,w)+ w0            
                Err = fx - y  
                w = w - (alpha)* np.dot(x.T,Err)            
                w0 = w0 - alpha * np.sum(Err)            
                cost = 0.5 * (1/r) *np.sum(np.square(Err))         
                cost_list.append(cost)            
                w_list.append(w)
                bias.append(w0)
                if cost > eps:
                    break 
            self.weights[0]=w
            self.bias[0]=w0            
            self.cost=cost_list
            
        elif(r<c):    
                                      #Lagrangian_method
             
            z = np.ones(r)
            x=np.hstack((z,x))
            Xtrans=np.transpose(x)                       
            a=np.dot(Xtrans,inv(np.dot(x,Xtrans)) )
            W=np.dot(a, y)
            self.weights={ }
            self.bias={ }  
                    #for hyperplane parameters
                  
            self.weights[0]=W[1,:]
            self.bias[0]=W[0]   
        
        
               
        
    def Linear_Test(self,x,y):
        r,c=x.shape
        x=np.array(x)
        y=np.array(y).reshape(r,1)
        for i in range(len(self.bias)):
            z= np.dot(x, self.weights[i]) + self.bias[i] 
            
        MSE=(1/len(x))* np.sum(np.square(z - y))            
        print("")
        print('Mean Square Error is-->{}'.format(MSE))
        
    
   
        
    def plot_decison_boundary(self,x_test,y_test):
        r,c = x_test.shape
        if(c<2):           
            #  2D hyperplane plotting
            x=x_test.iloc[:,0]            
            x=np.array(x).reshape(r,1)           
            fig=plt.figure(figsize=(8, 6))
            for i in range(len(self.bias)):
                y_cal= self.bias[i]+ x @ self.weights[i]
                print(y_cal.shape)
                print("-----------------------Plotting Hyperplane---------------------------------")
                plt.plot(x,y_cal)
            
            plt.scatter(x_test, y_test, color= 'y' ,label="Actual data")
            plt.xlabel('x_1',fontsize = 10)
            plt.ylabel('x_2',fontsize = 10)
            plt.legend(loc='best')
            plt.show()
            
            
        elif(c==2):
            # 3D hyperplane plotting
            fig=plt.figure(figsize=(8, 6))
            ax = fig.add_subplot(111, projection = '3d')                              
            x11=x_test.iloc[:,0] 
            
            x11=np.array(x11).reshape(r,1) 
            
            x21=x_test.iloc[:,1]
            x21=np.array(x21).reshape(r,1) 
            y = np.array(y_test).reshape(r,1) 
            tmp = np.linspace(-40,40,3)
            for i in range(len(self.bias)):
                x1,x2 = np.meshgrid(tmp,tmp)
                z = lambda x1,x2: (self.bias[i]+(x11*self.weights[i][0])+(x21*self.weights[i][1]))
                ax.plot_surface(x11, x21, z(x1, x2))  
                    
            print(x11.shape,x21.shape,y.shape)
            ax.scatter3D(x11, x21, y, color= 'y' ,label="Actual data")
            ax.set_xlabel('X1',fontsize = 10)
            ax.set_ylabel('X2',fontsize = 10)
            ax.set_zlabel('X3',fontsize = 10)
            plt.legend(loc='best')
            plt.show()
            
        
        
        else:
            #not possible to plot in higher dimenstion
            pass
        
        
  # Call this function if data is in standard format
    def test_model_std(self,data):
        
        r,c=data.shape
         #Normalization if required
        data=(data - data.min()) / (data.max() - data.min())
        x = data.iloc[:,range(c-1)]
        y = data.iloc[:,c-1] 
        self.test_model_xy(x,y)
        
 # Call this function if target column is at diffrent location
    def test_model_xy(self,x,y):
        r,c=x.shape           
        y =np.array(y).reshape(r,1)
        X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
        model=Linear()
        print("Train points Shape")
        print(np.shape(X_train))
        print(" ")
        alpha1=model.kfold_linear(X_train,y_train)
        model.Linear_Train(X_train,y_train,alpha1,2000,10**+100)
        print(" ")
        print('Parameters for hyperplane are-->')
        print(model.weights)
        print(" ")
        print("bais ")
        print(model.bias)
        model.Linear_Test(X_test,y_test)    
        model.plot_decison_boundary(X_test,y_test)
