In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
def load_datasets(filename, data_columns, target_column):
    df = pd.read_csv(filename)
    X, y = df[data_columns], df[target_column]
    return X, y

In [None]:
def plot_data(A, b, test = False):
    positive_indices = np.where(b == 1)[0]
    negative_indices = np.where(b == 0)[0]
    
    plt.scatter(A.iloc[positive_indices, 0], A.iloc[positive_indices, 1], marker='x', c= 'yellow' if test else 'green')
    plt.scatter(A.iloc[negative_indices, 0], A.iloc[negative_indices, 1], marker='+', c= 'blue' if test else 'red')

In [None]:
def decision_boundary(X, y, mean, std, theta):
    X_prepared = prepare_X((X-mean)/std)
    p_x = np.array([min(X_prepared[:,1]) - 0.1, max(X_prepared[:,2]) + 0.1])
    p_y = (-1/theta[2]) * (theta[1] * p_x + theta[0])
    
    plot_data((X-mean)/std, y)
    plt.plot(p_x, p_y, label = "Decision_Boundary")

In [None]:
def normalize(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    X_new = (X - mean) / std
    return X_new, mean, std

In [None]:
def prepare_X(X):
    m = X.shape[0]
    ones = np.ones((m, 1))
    X_new = np.column_stack((ones, X))
    return X_new

In [None]:
def sigmoid(z):
    g = 1 / (1 + np.exp(-z))
    return g

In [None]:
def h(X, theta):
    z = np.dot(X, theta)
    return sigmoid(z)

In [None]:
def cost_function(X, y, theta):
    m = X.shape[0]
    if m ==0:
        return None
    
    h_X = h(X, theta)
    J = - np.sum(y*np.log(h_X) + (1 - y) * np.log(1 - h_X))/m
    return J

In [None]:
def derivative_theta(X, y, theta):
    m = X.shape[0]
    if m == 0:
        return None
    
    d_theta = np.dot(X.T, (h(X, theta) - y))/m
    
    return d_theta

In [None]:
def gradient_descent(X, y, theta, alpha, epsilon, num_iters, print_J = True):
    m = X.shape[0]
    J_history = []
    
    J = cost_function(X, y, theta)
    
    if print_J == True:
        print(J)
    J_history.append(J)
    for i in range(num_iters):
        
        theta = theta - alpha * derivative_theta(X, y, theta)
        
        J = cost_function(X, y, theta)
        
        J_history.append(J)
        
        if i % 1000 == 0 and print_J == True:
            print(J)
        
        if abs(J-J_history[-2]) < epsilon:
            break
            
    return theta, J_history

In [None]:
def predict(X, mean, std, theta):
    predictions = h(prepare_X(X), theta)
    return predictions

In [None]:
data_columns = ["exam1", "exam2"]
target_column = "submitted"
X, y = load_datasets('data/sats.csv', data_columns, target_column)
print('Training set: X={}, y={}'.format(X.shape, y.shape))

In [None]:
plot_data(X, y)

In [None]:
X_new, mean, std = normalize(X)

In [None]:
X_new = prepare_X(X_new)
y_new = y.values.reshape((X.shape[0], 1))

In [None]:
theta = np.zeros((X_new.shape[1], 1))

In [None]:
cost_function(X_new, y_new, theta)

In [None]:
alpha = 0.01

In [None]:
new_theta, Js = gradient_descent(X_new, y_new, theta, alpha , 1e-7, 1000000, True)

In [None]:
print(new_theta, len(Js))

In [None]:
plt.plot(Js)
plt.ylabel('cost')
plt.xlabel('iterations (per hundreds)')
plt.title("Learning rate =" + str(alpha))
plt.show()

In [None]:
cost_function(X_new, y_new, new_theta)

In [None]:
decision_boundary(X, y, mean, std, new_theta)

In [None]:
T = np.array([[40, 40], [60, 65]])
print(predict(T, mean, std, new_theta))