# 1. Logistic Regression

In [None]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt

data_file_1 = Path(os.path.abspath("")) / "data" / "ex2data1.txt"
data_set_1 = pd.read_csv(data_file_1, header=None, names=["Score_1", "Score_2", "Admitted"])
# print(data_set_1)
# print(data_set_1.head())
# print(data_set_1.describe())

x = np.array(data_set_1.iloc[:, 0:2])
y = np.array(data_set_1.iloc[:, 2:])
m, n = x.shape
print(m, n)
print(x)
print(y)

## 1.1 Visualizing the data

In [None]:
# init empty array with dimensions
admitted_list = np.empty((0, 3))
non_admitted_list = np.empty((0, 3))

for record in np.array(data_set_1):
    if record[2] == 1:
        admitted_list = np.append(admitted_list, [record], axis=0)
    else:
        non_admitted_list = np.append(non_admitted_list, [record], axis=0)

# simple way to init
# admitted_list = np.array([record for record in np.array(data_set_1) if record[2] == 1])
# non_admitted_list = np.array([record for record in np.array(data_set_1) if record[2] == 0])

# print(admitted_list)
# print(non_admitted_list)

plt.plot(admitted_list[:, 0], admitted_list[:, 1], 'go', label="Admitted")
plt.plot(non_admitted_list[:, 0], non_admitted_list[:, 1], 'ro', label="Non-admitted")
plt.xlabel("Score_1")
plt.ylabel("Score_2")
plt.legend(loc="upper right")
plt.show()


## 1.2 Implementation
### 1.2.1 Sigmoid function

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# draw sigmoid
sigmoid_x = np.linspace(-10, 10)
sigmoid_y = sigmoid(sigmoid_x)
plt.plot(sigmoid_x, sigmoid_y)
plt.xlabel("z")
plt.ylabel("sigmoid(z)")
plt.show()

### 1.2.2 Cost function and gradient

In [None]:
X = np.insert(x, 0, np.ones(m), axis=1)
# print(X)
init_theta = np.zeros((n + 1, 1))
# print(init_theta)

def cost(theta, X, y):
    h = sigmoid(X @ theta)
    m = X.shape[0]
    j = 1 / m * (-y.T @ np.log(h) - (1 - y).T @ np.log(1 - h))
    return j.item()

def gradient(theta, X, y):
    m = X.shape[0]
    gradient = 1 / m * X.T @ (sigmoid(X @ theta) - y)
    return gradient

# cost and gradient when theta = [[0], [0], [0]]
print(cost(init_theta, X, y))
print(gradient(init_theta, X, y))

# cost and gradient when theta = [[-24], [0.2], [0.2]]
print(cost(np.matrix([[-24], [0.2], [0.2]]), X, y))
print(gradient(np.matrix([[-24], [0.2], [0.2]]), X, y))

### 1.2.3 Learning parameters using library

In [None]:
# use TNC algorithm to find best theta
# notice that y need to be 1-d array when using this method
res = opt.minimize(fun = cost, x0 = init_theta, args = (X, y.flatten()), method = 'TNC', jac = gradient)
learned_theta = res.x
print(res)
print(learned_theta)

# draw decision boundary
res_x1 = np.linspace(X[:, 1].min(), X[:, 1].max(), 100)
res_x2 = (0 - learned_theta[0] - learned_theta[1] * res_x1) / learned_theta[2]
print(res_x1)
print(res_x2)

plt.plot(admitted_list[:, 0], admitted_list[:, 1], 'go', label="Admitted")
plt.plot(non_admitted_list[:, 0], non_admitted_list[:, 1], 'ro', label="Non-admitted")
plt.plot(res_x1, res_x2, "-")
plt.xlabel("Score_1")
plt.ylabel("Score_2")
plt.legend(loc="upper right")
plt.show()

### 1.2.4 Evaluating logistic regression

In [None]:
def hypothesis(theta, x):
    return sigmoid(theta.T @ x).item()

print("probability: {}".format(hypothesis(learned_theta, np.array([[1], [45], [85]]))))

def predict(theta, X):
    p = sigmoid(X @ theta)
    res = np.array([1 if record >= 0.5 else 0 for record in p])
    return res

predict_res = predict(learned_theta, X)
predict_accuracy = np.sum([1 if predict_res[i] == y[i][0] else 0 for i in range(len(y))]) / len(y)
print("accuracy on training data: {}".format(predict_accuracy))

# 2. Regularized logistic regression
## 2.1 Visualizing the data

In [None]:
data_file_2 = Path(os.path.abspath("")).absolute() / "data" / "ex2data2.txt"
data_set_2 = pd.read_csv(data_file_2, header=None, names=["Test_1", "Test_2", "Accepted"])
# print(data_set_1)
# print(data_set_1.head())
# print(data_set_1.describe())

x = np.array(data_set_2.iloc[:, 0:2])
y = np.array(data_set_2.iloc[:, 2:])
m, n = x.shape
# print(m, n)
# print(x)
# print(y)

accepted_list = np.array([record for record in np.array(data_set_2) if record[2] == 1])
rejected_list = np.array([record for record in np.array(data_set_2) if record[2] == 0])

print(accepted_list)
print(rejected_list)

plt.plot(accepted_list[:, 0], accepted_list[:, 1], 'go', label="Accepted")
plt.plot(rejected_list[:, 0], rejected_list[:, 1], 'ro', label="Rejected")
plt.xlabel("Test_1")
plt.ylabel("Test_2")
plt.legend(loc="upper right")
plt.show()

## 2.2 Feature mapping

In [None]:
# map (x1, x2) to 
# (x1, x2, x1^2, x1 x2, x2^2, x1^3, x1^2 x2, x1 x2^2, x2^3, ..., x1 x2^5, x2^6) 
# then insert x0
def mapFeature(x):
    X = np.copy(x)
    # -1 means unknown dimension
    x1 = x[:, 0].reshape((-1, 1))
    x2 = x[:, 1].reshape((-1, 1))
    powers = [i for i in range(2, 7)]
    for power in powers:
        x1_powers = [i for i in range(0, power + 1)][::-1]
        for x1_power in x1_powers:
            x2_power = power - x1_power
            new_column = x1 ** x1_power * x2 ** x2_power
            X = np.append(X, new_column, axis=1)
    X = np.insert(X, 0, np.ones(X.shape[0]), axis=1)
    return X

X = mapFeature(x)
print(X.shape)
print(X)

## 2.3 Cost function and gradient

In [None]:
def regularized_cost(theta, lamda, X, y):
    m = X.shape[0]
    regular_theta = np.insert(theta[1:], 0, 0, axis=0)
    logistic_cost = cost(theta, X, y)
    regularize_factor = lamda / (2 * m) * (regular_theta.T @ regular_theta).item()
    return logistic_cost + regularize_factor

def regularized_gradient(theta, lamda, X, y):
    m = X.shape[0]
    regular_theta = np.insert(theta[1:], 0, 0, axis=0)
    logistic_gradient = gradient(theta, X, y)
    regularize_vector = lamda / m * regular_theta
    return logistic_gradient + regularize_vector

init_theta = np.zeros((X.shape[1], 1))
lamda = 1

# test all-zeros theta and lambda = 1
print(regularized_cost(init_theta, lamda, X, y))
print(regularized_gradient(init_theta, lamda, X, y))

# test all-ones theta and lambda = 10
print(regularized_cost(np.ones((X.shape[1], 1)), 10, X, y))
print(regularized_gradient(np.ones((X.shape[1], 1)), 10, X, y))


### 2.3.1 Learning parameters

In [None]:
res = opt.minimize(fun = regularized_cost, x0 = init_theta, args = (lamda, X, y.flatten()), method = 'TNC', jac = regularized_gradient)
learned_theta = res.x

print(res)
print(learned_theta)

predict_res = predict(learned_theta, X)
predict_accuracy = np.sum([1 if predict_res[i] == y[i][0] else 0 for i in range(len(y))]) / len(y)
print("accuracy on training data: {}".format(predict_accuracy))

## 2.4 Plotting the decision boundary

In [None]:
lamda_list = [0, 1, 10, 100]
fig = plt.figure(figsize=(20, 20))

def power_func(x1, x2, max_power):
    res = np.zeros(x1.shape)
    power_list = [i for i in range(1, max_power + 1)]
    for power in power_list:
        x1_powers = [i for i in range(0, power + 1)][::-1]
        for x1_power in x1_powers:
            x2_power = power - x1_power
            res += x1 ** x1_power * x2 ** x2_power
    return res


for i in range(len(lamda_list)):
    lamda = lamda_list[i]
    res = opt.minimize(fun = regularized_cost, x0 = init_theta, args = (lamda, X, y.flatten()), method = 'TNC', jac = regularized_gradient)
    learned_theta = res.x
    x1 = np.linspace(x[:, 0].min(), x[:, 0].max(), 100)
    x2 = np.linspace(x[:, 1].min(), x[:, 1].max(), 100)
    X1, X2 = np.meshgrid(x1, x2)
    y = power_func(X1, X2, 6)
    ax = fig.add_subplot(2, 2, i + 1)
    ax.set_title("lambda = {}".format(lamda))
    ax.contour(x1, x2, y)

plt.show()