# CSE 6240 - Homework 2 (Part 1)
## submitted by Nilaksh Das
---

In [20]:
import csv
import math
import numpy as np
from scipy.optimize import fmin_bfgs
from matplotlib import pyplot as plt

In [21]:
%matplotlib inline

---

In [22]:
def sigmoid(z):
    return 1.0 / (1.0 + (math.e ** (-1.0 * z)))

In [23]:
def h_theta(theta, X):
    theta = theta.reshape(len(theta), 1)
    
    return sigmoid(X.dot(theta))

In [37]:
def J_theta(theta, X, y, lambda_reg = 0.0):
    m = float(X.shape[0])
    
    cost = (-1.0 * y) * np.log(h_theta(theta, X)) - ((1.0 - y) * np.log(1.0 - h_theta(theta, X)))
    cost += (float(lambda_reg) / (2.0 * m)) * np.sum(theta[1:])
    
    for i in range(cost.size):
        if np.isnan(cost[i][0]):
            cost[i][0] = 0
        elif (cost[i][0] == np.inf):
            cost[i][0] = 10000
    
    cost = np.sum(cost) / float(m)
        
    print('cost', cost)
    
    return cost

In [25]:
def grad_J_theta(theta, X, y, lambda_reg = 0.0):
    m = float(X.shape[0])
    
    gradient = (1.0 / m) * np.sum((h_theta(theta, X) - y) * X, axis=0)
    gradient[1:] += (float(lambda_reg) / m) * theta[1:]
    
    return gradient

In [26]:
def train(X, y, regularization_parameter = 0.0):
    m, n = X.shape
    
    X_ = np.concatenate((np.ones((m, 1), dtype=float), X), axis=1)
    y_ = y.reshape((m, 1))
    
    n += 1
    
    theta = np.random.rand(n, 1)

    return fmin_bfgs(J_theta, theta, fprime=grad_J_theta, args=(X_, y_, regularization_parameter))

In [27]:
def predict(theta, X):
    m, n = X.shape
    
    X_ = np.concatenate((np.ones((m, 1), dtype=float), X), axis=1)
    
    return (h_theta(theta, X_) > 0.5).astype(int)

---

In [28]:
def load_dataset(filepath = 'data/ex2data1.txt'):
    X = []
    y = []
    
    with open(filepath, 'r') as csvfile:
        csvreader = csv.reader(csvfile, delimiter=',')
        
        for row in csvreader:
            X.append([float(row[0]), float(row[1])])
            y.append([int(row[2])])
            
    return np.array(X), np.array(y).reshape((len(y), 1))

In [29]:
X, y = load_dataset('data/ex2data1.txt')

In [38]:
theta = train(X, y, regularization_parameter=1)

100
cost 4000.0
100
cost 6000.0
100
cost 112.653160387
100
cost 6.82311282428
100
cost 510.880019883
100
cost 6.46205053567
100
cost 7.4572288779
100
cost 1.06544708783
100
cost 218.054856786
100
cost 15.0852199117
100
cost 0.705297237744
100
cost 2.04236906465
100
cost 0.704620393403
100
cost 0.703306763031
100
cost 0.699077563726
100
cost 0.689820739988
100
cost 0.673794703874
100
cost 0.645478043041
100
cost 0.595548521475
100
cost 0.510484002863
100
cost 0.381547209325
100
cost 0.287362133396
100
cost 0.236251297758
100
cost 0.214061351907
100
cost 0.20754577837
100
cost 0.206340845447
100
cost 0.206162390329
100
cost 0.206030632198
100
cost 0.205820254851
100
cost 0.205621233875
100
cost 0.205545293551
100
cost 0.205529259909
100
cost 0.205528932361
100
cost 0.205529152439
100
cost 0.205528947658
100
cost 0.205528934076
100
cost 0.205528932566
100
cost 0.205528932386
100
cost 0.205528932364
100
cost 0.205528932361
100
cost 0.205528932361
100
cost 0.205528932361
100
cost 0.20552893

  from ipykernel import kernelapp as app


In [32]:
p = predict(theta, X)

print ('Training Accuracy: ' , ((float(y[np.where(p == y)].size) / float(y.size)) * 100.0))

Training Accuracy:  89.0
