<h3>Linear Regression</h3>
<h4>Packages Used</h4>
<ul>
    <li>numpy</li>
    <li>matplotlib</li>
</ul>
<h3>Import necessary packages</h3>

In [11]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

<h3>ReadData(data,separator): Helper function to read data</h3>
<h4> Assumes data is of the form X[0], X[1], ..., X[n], Y</h4>
<h5>Where X[i] is a feature and Y is the label</h5>

In [12]:
def ReadData(data,separator):
    
    XY =  np.genfromtxt(data, delimiter=separator)
    m=XY.shape[0]
    Y=XY[:,-1].reshape(m,1)
    X=XY[:,0:-1]
    bias = np.zeros((X.shape[1],1))
    theta = np.zeros((X.shape[1],1))
    return X,Y,m,bias,theta

<h3>Normalize(data): Helper function to Normalize data</h3>

In [13]:
def Normalize(data):
    
    Mu = np.mean(X, axis=0)
    Sigma = np.std(X, axis=0)
    data=((data-Mu)/Sigma)
    return data,Mu,Sigma

<h3>GradDescent_CostCalc(iter1,X,theta,bias,Y,learningratebym,costweight): Function to calculate costs, final theata, biases using Gradient Descent</h3>

In [14]:
def GradDescent_CostCalc(iter1,X,theta,bias,Y,learningratebym,costweight):
    costs=[]
    for i in range(iter1):
        H = np.dot(X,theta) + bias
        diff = H - Y
        delta = learningratebym * np.dot(diff.T,X).T
        theta = theta - delta
        bias = bias - (learningratebym * sum(diff))
        J = costweight * sum(np.square(diff))
        costs.append(J.item(0))
    return costs,bias,theta

<h3>CostCalc(X,theta,bias,Y,costweight): Function to calculate cost</h3>

In [15]:
def CostCalc(X,theta,bias,Y,costweight):

    H = np.dot(X,theta) + bias
    diff = H - Y
    J = costweight * sum(np.square(diff))
    return J

<h3>PlotData(Original_X,Normalized_X,Y,trainedtheta,trainedbias,costs,fignumber=1): Helper function to Plot data,predicted target and costs</h3>

In [16]:
def PlotData(Original_X,Normalized_X,Y,trainedtheta,trainedbias,costs,fignumber=1):
    plt.style.use('ggplot')
    plt.figure(fignumber)
    plt.subplot(211)
    plt.plot(Original_X,Y,'r+')
    plt.ylabel('Label')
    plt.xlabel('Feature')
    plt.plot(Original_X, (np.dot(Normalized_X,trainedtheta) + trainedbias), 'b-')
    plt.subplot(212)
    plt.ylabel('Cost')
    plt.xlabel('Iteration')
    plt.plot(range(iter1),costs, '-')

<h3>ContourPlot(X,Y,costweight,plotspace_x = -10,plotspace_y = 20) Helper function to make the contour, surface plots</h3>

In [17]:
def ContourPlot(X,Y,costweight,plotspace_x = -10,plotspace_y = 20):
    bias_vals = np.linspace(plotspace_x, plotspace_y, 100)
    theta_vals = np.linspace(plotspace_x, plotspace_y, 100)
    costs=np.zeros((len(bias_vals),len(theta_vals)))
    for i in range(len(bias_vals)):
        for j in range(len(theta_vals)):
            ##costs[j][i] because contour plot needs costs[i][j] Transpose
            costs[j][i] = CostCalc(X,theta_vals[j],bias_vals[i],Y,costweight)
    X , Y = np.meshgrid(bias_vals, theta_vals)
    plt.figure(2)
    CS = plt.contour(X, Y, costs, np.logspace(-2, 3, 20))
    plt.clabel(CS, inline=1, fontsize=10)
    plt.title('Costs Contour Plot')
    plt.ylabel(r'$\theta_1$')
    plt.xlabel(r'$\theta_0$')
    fig = plt.figure(3)
    ax = fig.gca(projection='3d')
    surf = ax.plot_surface(X, Y, costs,rstride=1, cstride=1, cmap='PuBu_r', linewidth=0, antialiased=False)
    ax.set_xlabel(r'$\theta_0$')
    ax.set_ylabel(r'$\theta_1$')
    ax.set_zlabel('Costs Surface Plot')
    return

<h2>Main Code below</h2>

In [None]:
X,Y,m,bias,theta = ReadData('1DDS.txt',',')
Original_X = X
X,Mu,Sigma = Normalize(X)
learningrate = 0.01
iter1 = 1500
learningratebym= learningrate/m
costweight = 1/(2*m)
costs,trainedbias,trainedtheta = GradDescent_CostCalc(iter1,X,theta,bias,Y,
                                                      learningratebym,costweight)
PlotData(Original_X,X,Y,trainedtheta,trainedbias,costs)
ContourPlot(X,Y,costweight,plotspace_x = -10,plotspace_y = 20)
actual_input = np.array([7.5]).reshape(1,1)
normalized_input=(actual_input-Mu)/Sigma
print('For population = {}, we predict a profit of {}'.format(
             actual_input.item(0) * 10000,(trainedbias + np.dot(normalized_input,trainedtheta)).item(0) * 10000))
plt.show()