In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math, copy

In [None]:
def zscore_normalize_features(X):
    mu     = np.mean(X, axis=0)
    sigma  = np.std(X, axis=0)
    X_norm = (X - mu) / sigma      

    return (X_norm, mu, sigma)


In [None]:
def compute_model_output(x, w, b):
    m = x.shape[0]
    f_wb = np.zeros(m)
    for i in range(m):
        f_wb[i] = np.dot(w, x[i]) + b
    return f_wb


In [None]:
def compute_cost(x, y, w, b): 
    m = x.shape[0]
    cost_sum = 0 
    for i in range(m): 
        f_wb = np.dot(w, x[i]) + b   
        cost = (f_wb - y[i]) ** 2  
        cost_sum = cost_sum + cost  
    total_cost = (1 / (2 * m)) * cost_sum  

    return total_cost


In [None]:
def compute_gradient(x, y, w, b):
    m, n = x.shape
    dj_dw =np.zeros((n,))
    dj_db = 0
    
    for i in range(m):
        f_wb = np.dot(w, x[i]) + b
        dj_dw_i = (f_wb - y[i]) * x[i]
        dj_db_i = f_wb - y[i]
        dj_dw += dj_dw_i
        dj_db += dj_db_i
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    
    return dj_dw, dj_db


In [None]:
def gradient_descent(X, y, alpha=1e-2, num_iters=10000, compute_cost=compute_cost, compute_gradient=compute_gradient):
    J_history = []
    p_history = []
    w = X.shape[1]
    b = 0.
    
    for i in range(num_iters):
        dj_dw, dj_db = compute_gradient(X, y, w, b)

        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        J_history.append(compute_cost(X, y, w, b))

        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration: {i:8d} | Cost: {J_history[-1][0]:10.4f} | dw: {dj_dw[0]:10.3f} | db: {dj_db[0]:10.3f} | w: {w[0]:8.3f}, | b: {b[0]:8.3f}")

    print(f"\n(w,b) found by gradient descent: ({w}, {b[0]})")

    return w, b, J_history, p_history


In [None]:
x = np.arange(0, 20, 1)
y = 1 + x**2
X = x.reshape(-1, 1)


In [None]:
w_final, b_final, J_history, p_history = gradient_descent(X, y)


In [None]:
w_final, b_final

In [None]:
plt.subplots(figsize=(6,4))
plt.scatter(x, y, marker='x', c='r', label="Actual Value")
plt.plot(x,X@w_final + b_final, label="Predicted Value")
plt.title("no feature engineering")
plt.xlabel("X")
plt.ylabel("y")
plt.legend()
plt.show()


In [None]:
x = np.arange(0, 20, 1)
y = 1 + x**2

X = x**2
X = X.reshape(-1, 1)


In [None]:
w_final, b_final, J_history, p_history = gradient_descent(X, y, alpha=1e-5)


In [None]:
plt.subplots(figsize=(6,4))
plt.scatter(x, y, marker='x', c='r', label="Actual Value")
plt.plot(x,X@w_final + b_final, label="Predicted Value")
plt.title("Added x**2 feature")
plt.xlabel("x"); plt.ylabel("y")
plt.legend()
plt.show()


In [None]:
# create target data
x = np.arange(0, 20, 1)
y = x**2

# engineer features .
X = np.c_[x, x**2, x**3]


In [None]:
X, X_mu, X_sigma  = zscore_normalize_features(X)


In [None]:
w_final, b_final, J_history, p_history = gradient_descent(X, y)
w_final = w_final.reshape(-1,1)


In [None]:
plt.subplots(figsize=(6,4))
plt.scatter(x, y, marker='x', c='r', label="Actual Value")
plt.plot(x, X@w_final + b_final, label="Predicted Value")
plt.title("Added x, x**2, x**3 features")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.show()


In [None]:
x = np.arange(0,20,1)
y = np.cos(x/2)

X = np.c_[x, x**2, x**3, x**4, x**5, x**6, x**7, x**8, x**9, x**10, x**11, x**12, x**13]


In [None]:
X, X_mu, X_sigma  = zscore_normalize_features(X)


In [None]:
w_final, b_final, J_history, p_history = gradient_descent(X, y, num_iters=1000000, alpha=1e-1)
w_final = w_final.reshape(-1,1)


In [None]:
plt.figure(figsize=(8,4))
plt.scatter(x, y, marker='x', c='r', label="Actual Value")
plt.plot(x, X@w_final + b_final, label="Predicted Value")
plt.title("feature engineered")
plt.xlabel("X")
plt.ylabel("y")
# plt.legend()
plt.show()
