<a href="https://colab.research.google.com/github/AllyHyeseongKim/CAU11934_MachineLarning/blob/master/assignment/07/assignment07.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment07: Logistic regression for a binary classification with a regularization

## 1. Load the input data (text file)

### Mount the google drive

In [0]:
from google.colab import drive

drive.mount('/content/gdrive')

In [0]:
cd gdrive/My Drive/Colab Notebooks/Machine Learning/assignment06

In [0]:
ls

### Load the Data

Load a set of the data $\{ (x^{(i)}, y^{(i)}, l^{(i)}) \}$ from the given `text file` (`'data-nonlinear.txt'`) for training. \\
Each row $\{ (x^{(i)}, y^{(i)}, l^{(i)}) \}$ of the data consists of a 2-dimensional point $(x, y)$ with its label $l$, $\text{where $x, y \in \mathbb{R}$ and $l \in \{0, 1\}$}$. \\
Plot the set of points $\{ (x^{(i)}, y^{(i)}) \}$ that are loaded from `'data-nonlinear.csv'` file. \\

In [0]:
import numpy as np
import matplotlib.pyplot as plt

data    = np.genfromtxt("data-nonlinear.txt", delimiter=',', dtype=np.float64)

x_train  = data[:, 0]
y_train  = data[:, 1]
label   = data[:, 2]

m = len(x_train)

pointX0 = x_train[label == 0]
pointY0 = y_train[label == 0]

pointX1 = x_train[label == 1]
pointY1 = y_train[label == 1]

plt.figure(figsize=(8, 8))
plt.scatter(pointX0, pointY0, c='b')
plt.scatter(pointX1, pointY1, c='r')
plt.tight_layout()
plt.gca().set_aspect('equal', adjustable='box')
plt.show()

## 2. Generate the Logistic Regression Model

### Generate the `logistic regression`

Define the following `logistic regression`.

\begin{equation*}
\hat{h} = \sigma(z) \\
z = g(x, y; \theta), \quad\text{where $g$ is a high dimensional function and $\theta \in \mathbb{R}^k$} \\
\theta = (\theta_0, \theta_1, ..., \theta_{k-1}) \\
g(x, y; \theta) = \theta_0f_0(x, y) + \theta_1f_1(x, y) + ... + \theta_{k-1}f_{k-1}(x, y) \\
\sigma(z) = \frac{1}{1 + exp(-z)} \\
\sigma'(z) = \sigma(z)(1 - \sigma(z)) \\
\end{equation*}

Define the `dimension` $k$ of $\theta$, where $k \leq 16$.

\begin{equation*}
k  = 15
\end{equation*}

In [0]:
k = 15

Define the `function` $f_k(x, y)$:
\begin{equation*}
f_0(x, y) = -1.5 \\
f_1(x, y) = (x-0.25)^2, 
f_2(x, y) = (y-0.25)^2, \\
f_3(x, y) = x^4 
f_4(x, y) = y^4, \\ 
f_5(x, y) = x^6, 
f_6(x, y) = y^6 
f_7(x, y) = x^8, \\ 
f_8(x, y) = y^8, 
f_9(x, y) = x^{10},
f_{10}(x, y) = y^{10}, \\
f_{11}(x, y) = x^{12}, 
f_{12}(x, y) = y^{12}, \\
f_{13}(x, y) = xy, 
f_{14}(x, y) = x^2y^2
\end{equation*}

In [0]:
def f_k(x, y):
    f = []
    f.append(-1.5)
    f.append((x - 0.25) ** 2)
    f.append((y - 0.25) ** 2)
    f.append(x ** 4)
    f.append(y ** 4)
    f.append(x ** 6)
    f.append(y ** 6)
    f.append(x ** 8)
    f.append(y ** 8)
    f.append(x ** 10)
    f.append(y ** 10)
    f.append(x ** 12)
    f.append(y ** 12)
    f.append(x * y)
    f.append((x ** 2) * (y ** 2))
    return f

In [0]:
f = []
for i in range(m):
    val_k = f_k(x_train[i], y_train[i])
    f.append(val_k)
# print(f)

Define the `function` $g(x, y; \theta)$:
\begin{equation}
g(x, y; \theta) = \theta_0f_0(x, y) + \theta_1f_1(x, y) + ... + \theta_{k-1}f_{k-1}(x, y)
\end{equation}

In [0]:
def g(weight, offset):
    g = []
    for i in range(m):
        g_k = offset * f[i][0]
        for j in range(k-1):
            g_k = g_k + weight[j] * f[i][j + 1]
        g.append(g_k)
    return g

Define the following `sigmoid function`.

\begin{equation*}
\hat{h} = \sigma(z), \text{ where }
\sigma(z) = \frac{1}{1 + exp(-z)}, \\
z = g(x, y; \theta) = \theta_0f_0(x, y) + \theta_1f_1(x, y) + ... + \theta_{k-1}f_{k-1}(x, y) \\
\end{equation*}

In [0]:
def logistic_regression(weight, offset):
    y_logistic_regression = []
    z = g(weight, offset)
    for i in range(m):
        # print(z[i])
        y_logistic_regression.append(1/(1 + np.exp(-z[i])))
        # print(y_logistic_regression[i])        
    return y_logistic_regression

## 3. Generate the `Cost Function` with `Gradient Descent` method

### Generate the `objective function`

Define the following `objective function`.

\begin{equation*}
J(\theta) = \frac{1}{m}\sum_{i = 1}^m(-l^{(i)}log(\sigma(g(x^{(i)}, y^{(i)}; \theta))) - (1 - l^{(i)})log(1 - \sigma(g(x^{(i)}, y^{(i)}; \theta))))
\end{equation*}

In [0]:
def objective_function(y_logistic_regression):
    error = []
    for i in range(m):
        error.append((-label[i]) * np.log(y_logistic_regression[i]) - (1 - label[i]) * np.log(1 - y_logistic_regression[i]))
    return sum(error) / m

### Generate the `gradient descent`

Define the following `derivation`.
\begin{equation}
\frac{\partial g(x^{(i)}, y^{(i)}; \theta^{(t)})}{\partial \theta_k} = \frac{\partial \theta_0f_0(x, y))}{\partial \theta_k} + \frac{\partial \theta_1f_1(x, y))}{\partial \theta_k} + ... + \frac{\partial \theta_{k-1}f_{k-1}(x, y))}{\partial \theta_k}
\end{equation}

In [0]:
def derivation_g(n_data, k):
    return f[n_data][k]

Define the `learning rate`.

\begin{equation*}
\alpha  = 0.0009
\end{equation*}

In [0]:
learning_rate = 0.0009

Define the following `gradient descent`.

\begin{equation*}
\theta_k^{(t+1)} := \theta_0^{(t)} - \alpha\frac{1}{m}\sum_{i = 1}^m(\sigma(g(x^{(i)}, y^{(i)}; \theta)) - l^{(i)})\frac{\partial g(x^{(i)}, y^{(i)}; \theta^{(t)})}{\partial \theta_k}, \quad\text{for all $k$} \\
\end{equation*}

In [0]:
def gradient_descent(weight, offset, y_logistic_regression):
    offset_error = []
    weight_error = []
    for i in range(k-1):
        weight_error.append([])
    weightPrime = []
    for i in range(m):
        regression_error = y_logistic_regression[i] - label[i]
        offset_error.append(regression_error * derivation_g(i, 0))
        for j in range(k-1):
            weight_error[j].append(regression_error * derivation_g(i, j + 1))
    offsetPrime = offset - learning_rate * sum(offset_error) / m
    for i in range(k-1):
        weightPrime.append(weight[i] - learning_rate * sum(weight_error[i]) / m)
    return weightPrime, offsetPrime

## 4. `Train` the input data

Define the initial `weight`$(\theta_1^{(0)}, \theta_2^{(0)}, ..., \theta_{k-1}^{(0)})$ and `offset`$(\theta_0^{(0)})$:

\begin{equation*}
\theta_0^{(0)} = 1, \theta_1^{(0)} = 2, \theta_2^{(0)} = 3, \theta_3^{(0)} = 2, \\
\theta_4^{(0)} = \theta_5^{(0)} = \theta_6^2{(0)} = ... = \theta_{k-1}^{(0)} = 1.
\end{equation*}

In [0]:
weight = []
offset = []
offset.append(1)
weight.append([2, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1])

In [0]:
cost_convergence = []
theta_convergence = []

`Train` the `input data` with the `logistic regression` function above with the `gradient descent`. \\
Find optimal parameters $\theta$ using the `traing data`.

In [0]:
epoch = m * 200
sigma = []
cost = []
i = 0
# print(logistic_regression(weight[i], offset[i]))
# print(objective_function(logistic_regression(weight[i], offset[i])))
sigma.append(logistic_regression(weight[i], offset[i]))
cost.append(objective_function(logistic_regression(weight[i], offset[i])))
# print(j)

while i < epoch:
    i = i + 1

#    print(k)

    weight.append(gradient_descent(weight[i - 1], offset[i - 1], sigma[i - 1])[0])
    offset.append(gradient_descent(weight[i - 1], offset[i - 1], sigma[i - 1])[1])

#    print('weight: ', weight)
#    print('offset: ', offset)

    sigma.append(logistic_regression(weight[i], offset[i]))
#    print('sigma: ', sigma)
#    print(offset[k])
#    print(round(j[k - 1], 2))
    cost.append(objective_function(logistic_regression(weight[i], offset[i])))
#    print('j: ', j)
    if cost[i] == cost[i - 1]:
        cost_convergence.append(i)
        if offset[i] == offset[i - 1]:
            for j in range(15):
                if weight[i][j] == weight[i - 1][j]:
                    theta_convergence.append(i)

theta_convergence.append(epoch)
cost_convergence.append(epoch)

# print(theta_convergence)
# print(cost_convergence)
# print('sigma: ', sigma)
# print('j: ', j)
# print('weight: ', weight)
# print('offset: ', offset)

## 5. Compute the `training accuracy`

Compute the `final training accuracy` in `number (%)`.
\begin{equation}
accuracy\ (\%) = \frac{\text{number of correct predictions}}{\text{total number of predictions}} \times 100
\end{equation}

In [0]:
index_minimum = cost.index(min(cost))

In [0]:
accuracy = []
i = 0
for i in range(epoch):
    correct_predictions = 0
    for j in range(m):
        if round(sigma[i][j], 2) == 0.5:
            correct_predictions = correct_predictions + 1
        else:
            if sigma[i][j] > 0.5:
                if label[j] == 0:
                    correct_predictions = correct_predictions + 1
            else:
                if label[j] == 1:
                    correct_predictions = correct_predictions + 1
    accuracy.append((correct_predictions / m) * 100)
# print(index_minimum)
index_accuracy_maximum = accuracy.index(max(accuracy))
# print(index_accuracy_maximum)
# print(accuracy[index_accuracy_maximum])
# print(accuracy[index_minimum-1])

In [0]:
# print(sigma[0])
# print(sigma[1])
# print(sigma[2])
# print(sigma[index_minimum-1])
# print(label)

In [0]:
# print(accuracy)

## 6. Visualize the `Classifier`

Generate the `Classifier`.

In [0]:
# print(offset[index_minimum])
# print(weight[index_minimum])

In [0]:
def classifier(x, y):
    val_k = f_k(x, y)
    optimal_g = offset[index_accuracy_maximum] * val_k[0]
    for i in range(k-1):
        optimal_g = optimal_g + weight[index_accuracy_maximum][i] * val_k[i + 1]
    return 1 / (1 + np.exp(-optimal_g))

Visualize the obtained `classifier`, where the `boundary` of the `classifier` is defined by $\{(x, y) | \sigma(g(x, y ; \theta)) = 0.5\} = \{(x, y) | g(x, y ; \theta) = 0\}$.

In [0]:
plt.figure(figsize=(8, 8))
x = np.arange(-1, 1.25, 0.01)
y = np.arange(-1, 1.25, 0.01)
X, Y = np.meshgrid(x, y)
z = classifier(X, Y)

CS = plt.contour(X, Y, z, [1/2], colors='green')
CS.clabel()
plt.scatter(pointX0, pointY0, c='b')
plt.scatter(pointX1, pointY1, c='r')
plt.show()

## 7. **Results**

### 1. **Plot the training data**

Plot the `training data points` $(x, y)$ with their `labels` $l$ (in `blue` color for `label 0` and `red` color for `label 1`).

In [0]:
plt.figure(figsize=(8, 8))
plt.scatter(pointX0, pointY0, c='b')
plt.scatter(pointX1, pointY1, c='r')
plt.tight_layout()
plt.gca().set_aspect('equal', adjustable='box')
plt.show()

### 2. **Write down the high dimensional function $g(x, y; \theta)$**

Write down the `equation` for the `non-linear function` $g(x, y; \theta)$ used for the `classifier` in `LaTeX` format.

\begin{equation*}
g(x, y; \theta) = \theta_0f_0(x, y) + \theta_1f_1(x, y) + ... + \theta_{k-1}f_{k-1}(x, y), \\
\text{where }
f_0(x, y) = - 1.5 \\
f_1(x, y) = (x-0.25)^2, 
f_2(x, y) = (y-0.25)^2, \\
f_3(x, y) = x^4 
f_4(x, y) = y^4, \\ 
f_5(x, y) = x^6, 
f_6(x, y) = y^6 
f_7(x, y) = x^8, \\ 
f_8(x, y) = y^8, 
f_9(x, y) = x^{10}, 
f_{10}(x, y) = y^{10}, \\
f_{11}(x, y) = x^{12}, 
f_{12}(x, y) = y^{12}, \\
f_{13}(x, y) = xy, 
f_{14}(x, y) = x^2y^2
\end{equation*}

### 3. **Plot the training error**

Plot the `training error` $J(\theta)$ at `every iteration` of `gradient descent` until `convergence` (in `blue` color).

In [0]:
plt.figure(figsize=(8, 8))
x_out = np.arange(0, cost_convergence[0])
plt.xlabel('t (iteration)')

plt.plot(x_out, cost[:cost_convergence[0]], color = 'blue')

plt.show()

### 4. **Plot the training accuracy**

Plot the `training accuracy` at `every iteration` of `gradient descent` until `convergence` (in `red` color).

In [0]:
plt.figure(figsize=(8, 8))
x_out = np.arange(0, index_accuracy_maximum + 20)
plt.xlabel('t (iteration)')

plt.plot(x_out, accuracy[:index_accuracy_maximum + 20], color = 'red')

plt.show()

### 5. **Write down the final training accuracy**

Present the `final training accuracy` in `number (%)` at `convergence`.

In [0]:
print(accuracy[index_accuracy_maximum], '(%)')

### 6. **Plot the optimal clssifier superimposed on the training data**

Plot the `boundary` of the `optimal classifier` at `convergence` (in `green` color). \\
The `boundary` of the `classifier` is defined by $\{ (x, y) \mid \sigma(g(x, y ; \theta)) = 0.5 \} = \{ (x, y) \mid g(x, y ; \theta) = 0 \}$. \\
Plot the `training data points` $(x, y)$ with their `labels` $l$ superimposed on the illustration of the `classifier` using `contour` function in `python3`(in `blue` color for `label 0` and `red` color for `label 1`).

In [0]:
plt.figure(figsize=(8, 8))
x = np.arange(-1, 1.25, 0.01)
y = np.arange(-1, 1.25, 0.01)
X, Y = np.meshgrid(x, y)
z = classifier(X, Y)

CS = plt.contour(X, Y, z, [1/2], colors='green')
plt.scatter(pointX0, pointY0, c='b')
plt.scatter(pointX1, pointY1, c='r')
plt.show()