In [19]:
import numpy as np
import matplotlib.pyplot as plt
import copy, math
%matplotlib inline
np.set_printoptions(precision=3)

In [10]:
def load_data():
    data = np.loadtxt("./ex1data1.txt", delimiter=",")
    X = data[:, :1]
    y = data[:, 1]
    return(X, y)

X_train, y_train = load_data()
print(f"X_train.shape: {X_train.shape}")
print(f"y_train.shape: {y_train.shape}")
print(f"X type: {type(X_train)} \nX: \n{X_train[:5]}")
print(f"y type: {type(y_train)} \nY: \n{y_train[:5]}")
print(f"Peak to peak of X: {np.ptp(X_train, axis=0)}")
print(f"Peak to peak of Y: {np.ptp(y_train, axis=0)}")
print(X_train[:5].T)

X_train.shape: (97, 1)
y_train.shape: (97,)
X type: <class 'numpy.ndarray'> 
X: 
[[6.11 ]
 [5.528]
 [8.519]
 [7.003]
 [5.86 ]]
y type: <class 'numpy.ndarray'> 
Y: 
[17.592  9.13  13.662 11.854  6.823]
Peak to peak of X: [17.176]
Peak to peak of Y: 26.8277
[[6.11  5.528 8.519 7.003 5.86 ]]


In [81]:
def calculate_cost(X, y, w, b):
    m = X.shape[0]
    err = (X @ w + b) - y
    squared_err = np.square(err)
    cost =  np.sum(squared_err) / (2*m)
    return cost

def calculate_gradients(X, y, w, b):
    m, n = X.shape
    err = (X @ w + b) - y
#     print(err.shape) #97
#     print(X.T.shape) # (1, 97)
    dj_db = 1/m * (np.sum(err))
    dj_dw = (1/m) * (X.T @ err) #(x1e1 + x2e2...)
    return(dj_dw, dj_db)

#     dj_dw = np.zeros((n,))
#     dj_db = 0
#     for i in range(m):
#         err_i = ((X[i] @ w) + b) - y[i]
#         dj_db = dj_db + err_i
#         for j in range(n):
#             dj_dw[j] = dj_dw[j] + err_i * X[i, j]
#     dj_dw = dj_dw / m
#     dj_db = dj_db / m
#     return dj_dw, dj_db

def gradients_descent(X_train, y_train, w, b, calculate_cost,
                      calculate_gradients, alpha=1e-5, num_iters=1000):
    w = copy.deepcopy(w)
    b = b
    hist = {}
    hist['params'] , hist['cost'], hist['iter'], hist['grads'] = [], [], [], []
    save_interval = np.ceil(num_iters / 10000)
    
    for i in range(num_iters):
        dj_dw, dj_db = calculate_gradients(X_train, y_train, w, b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
        # Save cost J,w,b at each save interval for graphing
        if i == 0 or i % save_interval == 0:
            cost = calculate_cost(X_train, y_train, w, b)
            hist['cost'].append(cost)
            hist['params'].append([w, b])
            hist['grads'].append([dj_dw, dj_db])
            hist['iter'].append(i)
        
        # Print cost every at intervals 10 times or as many iterations if < 10
        if i % math.ceil(num_iters / 10) == 0:
            print(f"Iteration: {i:4} , Cost : {float(cost):8.2f}")
    return(w, b, hist)

def run_batch_gradient_descent(X_train, y_train, alpha=1e-7, num_iters=1000):
    m, n = X_train.shape
    w = np.zeros((n,)) #initial params
    b = 0.0 #initial params
    w_out, b_out, hist = gradients_descent(X_train, y_train, w, b, calculate_cost,
                                           calculate_gradients, alpha, num_iters)
    print(f"Final parameters are: w = {w_out} , b + {b_out}")
    return(w_out, b_out, hist)
    


In [58]:
w_out, b_out, hist = run_batch_gradient_descent(X_train, y_train, alpha=1e-2, num_iters=1500)

#Calculate predictions for the entire dataset
# y_pred = X_train @ w_out + b_out
m = X_train.shape[0]
predicted = np.zeros(m)

for i in range(m):
    predicted[i] = w * x_train[i] + b
    
# Plot the linear fit
plt.plot(x_train, predicted, c = "b")

# Create a scatter plot of the data. 
plt.scatter(x_train, y_train, marker='x', c='r') 

# Set the title
plt.title("Profits vs. Population per city")
# Set the y-axis label
plt.ylabel('Profit in $10,000')
# Set the x-axis label
plt.xlabel('Population of City in 10,000s')

Iteration:    0 , Cost :     6.74
Iteration:  150 , Cost :     5.31
Iteration:  300 , Cost :     4.96
Iteration:  450 , Cost :     4.76
Iteration:  600 , Cost :     4.64
Iteration:  750 , Cost :     4.57
Iteration:  900 , Cost :     4.53
Iteration: 1050 , Cost :     4.51
Iteration: 1200 , Cost :     4.50
Iteration: 1350 , Cost :     4.49
Final parameters are: w = [1.166] , b + -3.63029143940436


In [83]:
#test set prediction
predict1 = 3.5 * w_out + b_out #x=3.5 => 3.5*10000 = 350000. Remember i/p x is population in ranges of x/10000
print('For population = 35,000, we predict a profit of $%.2f' % (predict1*10000))

predict2 = 7.0 * w_out + b_out #x=3.5 => 3.5*10000
print('For population = 70,000, we predict a profit of $%.2f' % (predict2*10000))

For population = 35,000, we predict a profit of $4519.77
For population = 70,000, we predict a profit of $45342.45


In [82]:
def test_cases_cost(targetfunction):
    # Case 1
    x = np.array([[2, 4, 6, 8]]).T
    y = np.array([7, 11, 15, 19]).T
    initial_w = [2]
    initial_b = 3.0
    cost = targetfunction(x, y, initial_w, initial_b)
    assert cost == 0, f"Case 1: Cost must be 0 for a perfect prediction but got {cost}"
    
    # Case 2
    x = np.array([[2, 4, 6, 8]]).T
    y = np.array([7, 11, 15, 19]).T
    initial_w = [2.0]
    initial_b = 1.0
    cost = targetfunction(x, y, initial_w, initial_b)
    assert cost == 2, f"Case 2: Cost must be 2 for a perfect prediction but got {cost}"
    
     # Case 3
    x = np.array([[1.5, 2.5, 3.5, 4.5, 1.5]]).T
    y = np.array([4, 7, 10, 13, 5]).T
    initial_w = [1]
    initial_b = 0.0
    cost = targetfunction(x, y, initial_w, initial_b)
    assert np.isclose(cost, 15.325), f"Case 3: Cost must be 15.325 for a perfect prediction but got {cost}"
    
    # Case 4
    initial_b = 1.0
    cost = targetfunction(x, y, initial_w, initial_b)
    assert np.isclose(cost, 10.725), f"Case 4: Cost must be 10.725 but got {cost}"
    
    # Case 5
    y = y - 2
    initial_b = 1.0
    cost = targetfunction(x, y, initial_w, initial_b)
    assert  np.isclose(cost, 4.525), f"Case 5: Cost must be 4.525 but got {cost}"
    
    print("\033[92mAll tests passed!")

def test_cases_gradient(targetfunction):
    print("Using X with shape (4, 1)")
    # Case 1
    x = np.array([[2, 4, 6, 8]]).T
    y = np.array([4.5, 8.5, 12.5, 16.5]).T
    initial_w = [2.]
    initial_b = 0.5
    dj_dw, dj_db = targetfunction(x, y, initial_w, initial_b)
    assert dj_db == 0.0, f"Case 1: dj_db is wrong: {dj_db} != 0.0"
    assert np.allclose(dj_dw, 0), f"Case 1: dj_dw is wrong: {dj_dw} != [[0.0]]"
    
    # Case 2 
    x = np.array([[2, 4, 6, 8]]).T
    y = np.array([4, 7, 10, 13]).T + 2
    initial_w = [1.5]
    initial_b = 1
    dj_dw, dj_db = targetfunction(x, y, initial_w, initial_b)
    assert dj_db == -2, f"Case 1: dj_db is wrong: {dj_db} != -2"
    assert np.allclose(dj_dw, -10.0), f"Case 1: dj_dw is wrong: {dj_dw} != -10.0"   
    
    print("\033[92mAll tests passed!")
    
test_cases_cost(calculate_cost)
test_cases_gradient(calculate_gradients)

[92mAll tests passed!
Using X with shape (4, 1)
[92mAll tests passed!
