In [9]:
import csv
import numpy as np
import scipy as sp
import matplotlib
from matplotlib import pyplot as plt

In [10]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
#############
## DATA IO ##
#############


def get_data(filepath):

    # Opens the file handler for the dataset file. Using variable 'f' we can access and manipulate our file anywhere in our code
    # after the next code line.

    f = open(filepath, 'r')

    # Predictors Collection (or your input variable) (which in this case is just the duration of eruption)

    x1 = []
    x2 = []
    x3 = []
    x4 = []


    # Output Response (or your output variable) (which in this case is the duration after which next eruption will occur.)

    y = []

    # Initializing a reader generator using reader method from csv module. A reader generator takes each line from the file
    # and converts it into list of columns.

    reader = csv.reader(f)

    # Using for loop, we are able to read one row at a time.

    # Iris-setosa
    # Iris-versicolor
    # Iris-virginica

    for row in reader:
        x1.append(float(row[0]))
        x2.append(float(row[1]))
        x3.append(float(row[2]))
        x4.append(float(row[3]))
        #print(row[3])
        if(row[4] == 'Iris-setosa'):
            y.append(1)
        else:
            y.append(0)

    # Close the file once we have succesffuly stored all data into our X and Y variables.

    f.close()

    return [[
        np.array(x1),
        np.array(x2),
        np.array(x3),
        np.array(x4)
        ], np.array(y)]


In [11]:
import math
def sigmoid(value):
    return (1/(1+math.exp(-value)))
    

In [12]:
#####################
## RSS Calculation ##
#####################

def Cost(x, y, betas):
    rss = 0
    for i in range(x[0].shape[0]):
        predicted_value = (betas[0] + (betas[1] * x[0][i]) + (betas[2] * x[1][i]) + (betas[3] * x[2][i]) + (betas[4] * x[3][i]))
        actual_value = y[i]
        y_ = (sigmoid(predicted_value))
        rss = rss + ((-y*math.log(y_))-((1-y)*math.log(1-y_)))
    return (rss/x[0].shape[0])

In [13]:
def compute_gradient(betas, x,index):
    val = (betas[0]) + (betas[1]*x[0][index]) + (betas[2]*x[1][index]) + (betas[3]*x[2][index]) + (betas[4]*x[3][index])
    val = sigmoid(val)
    return val

In [14]:
def gradientDescentAlgorithm(x, y, learning_rate):
    
    print ("Training Linear Regression Model using Gradient Descent")
    
    maximum_iterations = 10000
    
    # This flag lets the program know wether the gradient descent algorithm has reached it's converged state which means wether 
    # the algorithm was able to find the local minima (where the slope of RSS wrt your parameters beta_0 and beta_1 is zero)
    converge_status = False
    
    # num_rows stores the number of datapoints in the current dataset provided for training.
    num_rows = x[0].shape[0]

    # Initial Value of parameters 
    betas = [0,0,0,0,0]
    
    # Initial Error or RSS(beta_0,beta_1) based on the initial parameter values
    #error = RSS(x, y, beta_0, beta_1)
    error = Cost(x, y, betas)
    print('Initial Value (Cost Function)=', error);
    
    # Iterate Loop
    num_iter = 0
    while not converge_status:
        # for each training sample, compute the gradient (d/d_beta j(beta))
        gradient_0 = 1.0/num_rows * sum([(compute_gradient(betas,x,i) - y[i]) for i in range(num_rows)]) 
        gradient_1 = 1.0/num_rows * sum([(compute_gradient(betas,x,i) - y[i])*x[0][i] for i in range(num_rows)])
        gradient_2 = 1.0/num_rows * sum([(compute_gradient(betas,x,i) - y[i])*x[1][i] for i in range(num_rows)]) 
        gradient_3 = 1.0/num_rows * sum([(compute_gradient(betas,x,i) - y[i])*x[2][i] for i in range(num_rows)])
        gradient_4 = 1.0/num_rows * sum([(compute_gradient(betas,x,i) - y[i])*x[3][i] for i in range(num_rows)]) 


        
        
        # Computation of new parameters according to the current gradient.
        temp0 = betas[0] - learning_rate * gradient_0
        temp1 = betas[1] - learning_rate * gradient_1
        temp2 = betas[2] - learning_rate * gradient_2
        temp3 = betas[3] - learning_rate * gradient_3
        temp4 = betas[4] - learning_rate * gradient_4

    
        # Simultaneous Update of Parameters Beta_0 and Beta_1.
        betas[0] = temp0
        betas[1] = temp1
        betas[2] = temp2
        betas[3] = temp3
        betas[4] = temp4


        
        current_error = Cost(x, y, betas)
        
        if num_iter % 1000 == 0:
            print ('Current Value of RSS (Cost Function) based on updated values= ', type (current_error))
            
        error = current_error   # update error 
        num_iter = num_iter + 1  # update iter
    
        if num_iter == maximum_iterations:
            print ("Training Interrupted as Maximum number of iterations were crossed.\n\n")
            converge_status = True
    return [betas[0], betas[1],betas[2],betas[3],betas[4]]

In [15]:
# Method to predict response variable Y (in this case interval before the next erruption) for new values of X (in this case
# duration of eruption) using the estimated coefficientsself.
# This method can predict Response variable (Y) for single as well as multiple values of X. If only a single numerical Value
# input variable (X) which in this case is Duration is passed. It will return the prediction for only that single numerical
# value. If a collection of different values for input variable (list) is passed, it will return a list of predictions
# for each input value.
# "if" statement on line number 72 takes care of understanding if the input value is singular or a list.


def predict(coef,X):
    beta_0 = coef[0]
    beta_1 = coef[1]
    beta_2 = coef[2]
    beta_3 = coef[3]
    beta_4 = coef[4]

    
    #print(X[0])
    
    fy = []
    
    for x in X:
        fy.append(beta_0 + (beta_1 * x[0])+ (beta_2 * x[1]) + (beta_3 * x[2]) + (beta_4 * x[3]) )
    return fy
    
    return Y

In [16]:
X,Y = get_data("../Dataset/iris.csv")

################################################
## Model Training (or coefficient estimation) ##
################################################
# Using our gradient descent function we estimate coefficients of our regression line. The gradient descent function returns a list of 
# coefficients

coefficients = gradientDescentAlgorithm(X,Y,0.000000001)

########################
## Making Predictions ##
########################

# Using our predict function and the coefficients given by our slr function we can now predict the time it will take
# for the next eruption.
print("total parameters: ",coefficients.count)


print ("prediction",predict(coefficients,[[4.9,3.0,1.4,0.2],[4.9,3.0,1.4,0.2]]))

Training Linear Regression Model using Gradient Descent
Initial Value (Cost Function)= [ 0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718  0.69314718
  0.69314718  0.69314718  0.69314718 