# 1. A simple MATLAB function

In [None]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm

# identity matrix
A = np.eye(5, dtype=int)

print(A)

# 2. Linear regression with one variable
## 2.1 Plotting the data

In [None]:
# load and analyze data
data_file_1 = Path(os.path.abspath('')).absolute() / "data" / "ex1data1.txt"
data_set_1 = pd.read_csv(data_file_1, header=None, names=["Population", "Profit"])
# print(data)
# print(data.head())
# print(data.describe())

# make plot of dataframe. by default, matplotlib backend is used.
data_set_1.plot(kind='scatter', x='Population', y='Profit')
plt.show()

# extract data
x = np.matrix(data_set_1.iloc[:, :1])
y = np.matrix(data_set_1.iloc[:, 1:])
print(x)
print(y)

## 2.2 Gradient Descent
### 2.2.1 Update Equations
### 2.2.2 Implementation

In [None]:
# number of training examples
m = len(x)
# insert np.ones(m) to column 0
X = np.insert(x, 0, np.ones(m), axis=1)
print(X)
# init theta with 0s
theta = np.matrix(np.zeros((2,1)))
print(theta)
iterations = 1500
alpha = 0.01

X.shape, y.shape, theta.shape


### 2.2.3 Computing the cost function

In [None]:
# compute cost
def compute_cost(X, y, theta):
    m = len(y)
    sum = 0
    for i in range(0, m):
        diff = np.matrix(X[i]) * theta - y[i]
        sum += diff.item() ** 2
    return 1 / (2 * m) * sum

print(compute_cost(X, y, theta))
# init theta with [[-1],[2]]
print(compute_cost(X, y, np.matrix([[-1],[2]])))

# simpler vectorized method
def vectorized_compute_cost(X, y, theta):
    m = len(y)
    h = X * theta
    diff = h - y
    res = 1 / (2 * m) * np.sum(np.square(diff)) 
    return res

print(vectorized_compute_cost(X, y, theta))
# init theta with [[-1],[2]]
print(vectorized_compute_cost(X, y, np.matrix([[-1],[2]])))

### 2.2.4 Gradient descent

In [None]:
# vectorized gradient descent method
def gradientDescent(X, y, theta, alpha, iterations):
    new_theta = theta.copy()
    costs = []
    for it in range(iterations):
        sum = 0
        m = len(X)
        for i in range(m):
            sum += (new_theta.T * X[i].T - y[i]).item() * X[i].T
        new_theta -= alpha * 1 / (2 * m) * sum
        new_cost = vectorized_compute_cost(X, y, new_theta)
        costs.append(new_cost)
    return new_theta, costs

learned_theta, costs = gradientDescent(X, y, theta, alpha, iterations)

print(learned_theta)
print(costs[-5 : -1])

# predict values for population sizes of 35,000 and 70,000
prediction1 = (np.matrix([1, 3.5]) * learned_theta).item()
prediction2 = (np.matrix([1, 7]) * learned_theta).item()
print("prediction 1: population 3.5, profit {}".format(prediction1))
print("prediction 1: population 7, profit {}".format(prediction2))

# visualize result
x_vector = np.linspace(data_set_1.Population.min(), data_set_1.Population.max(), 100)
x_constructed = np.insert(np.matrix(x_vector), 0, np.ones(100), axis=0)
y_constructed = learned_theta.T * x_constructed
y_vector = np.array(y_constructed).flatten()
print(x_vector)
print(y_vector)

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8,16))
ax1.plot(x, y, "o", label="training data")
ax1.plot(x_vector, y_vector, "-", label="prediction")
ax1.legend()
ax1.set_title("Predicted Profit")
ax1.set_xlabel("Population")
ax1.set_ylabel("Profit")
ax2.plot(range(1, iterations + 1), costs, "-")
ax2.set_title("Cost")
ax2.set_xlabel("Iterations")
ax2.set_ylabel("Cost")
plt.show()

## 2.3 Debugging
## 2.4 Visualizing J(theta)

In [None]:
theta1_vector = np.linspace(-10, 10, 100)
theta2_vector = np.linspace(-1, 4, 100)
cost_vector = np.zeros((len(theta2_vector), len(theta1_vector)))

# create Z grid for the coordinate system
for i in range(len(theta2_vector)):
    for j in range(len(theta1_vector)):
        cost_vector[i][j] = vectorized_compute_cost(X, y, np.matrix([[theta1_vector[j]],[theta2_vector[i]]]))

# transfer X, Y vector to grid for the coordinate system
theta1_vector_mesh, theta2_vector_mesh = np.meshgrid(theta1_vector, theta2_vector)

print(theta1_vector_mesh)
print(theta2_vector_mesh)
print(cost_vector)

# 3D surface figure
fig1 = plt.figure()
ax1 = fig1.gca(projection="3d")
ax1.set_xlabel("Theta1")
ax1.set_ylabel("Theta2")
ax1.set_zlabel("Cost")
surf = ax1.plot_surface(theta1_vector_mesh, theta2_vector_mesh, cost_vector, cmap=cm.coolwarm, linewidth=0, antialiased=False)
cbar1 = fig1.colorbar(surf)
cbar1.set_label("Cost")

# contour figure
fig2, ax2 = plt.subplots()
contour = ax2.contour(theta1_vector_mesh, theta2_vector_mesh, cost_vector, levels=100, cmap=cm.coolwarm)
ax2.set_xlabel("Theta1")
ax2.set_ylabel("Theta2")
cbar2 = fig2.colorbar(contour)
cbar2.set_label("Cost")
plt.show()


# 3. Linear regression with multiple variables


In [None]:
# load data
data_file_2 = Path(os.path.abspath('')).absolute() / "data" / "ex1data2.txt"
data_set_2 = pd.read_csv(data_file_2, header=None, names=["Size", "Bedrooms", "Price"])
x = np.matrix(data_set_2.iloc[:, :2])
y = np.matrix(data_set_2.iloc[:, 2:])
print(data_set_2.head())
print(x)
print(y)

## 3.1 Feature Normalization

In [None]:
# feature normalization (standardization)
def featureNormalize(x):
    mu = np.mean(x, axis=0)
    sigma = np.std(x, axis=0)
    # get normalized_x from element-wise operations
    normalized_x = (x - mu) / sigma
    # check if res_mu == 0 and res_variance = 1
    # res_mu = np.mean(normalized_x, axis=0)
    # res_variance = np.var(normalized_x, axis=0)
    # print(res_mu)
    # print(res_variance)
    return normalized_x, mu, sigma

normalized_x, mu, sigma = featureNormalize(x)
print(normalized_x)
print(mu)
print(sigma)

# number of training examples
m = len(normalized_x)
# insert np.ones(m) to column 0
X = np.insert(normalized_x, 0, np.ones(m), axis=1)
print(X)

## 3.2 Gradient Descent

In [None]:
alpha = 0.1
iterations = 400
theta = np.matrix(np.zeros((3, 1)))

learned_theta, costs = gradientDescent(X, y, theta, alpha, iterations)
print(learned_theta)
print(costs[-5 : -1])

# show if cost converage
fig, ax = plt.subplots()
ax.plot(range(1, iterations + 1), costs, "-")
plt.show()

# normalize test data input
def normalize_test_x(x, mu, sigma):
    normalized_test_x = (x - mu) / sigma
    return normalized_test_x

# normalize input before prediction
normalized_test_x = normalize_test_x(np.matrix([1650, 3]), mu, sigma)
# insert 1 to normalized_test_x
test_x = np.insert(normalized_test_x, 0, 1, 1)
multi_prediction = (test_x * learned_theta).item()

print(test_x)
print(multi_prediction)