In [None]:
import pandas as pd
import math
import copy
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np

# Simple linear regression

In [None]:
x_train = np.array([1.0,2.0])   #(size in 1000 square feet)
y_train = np.array([300.0,500.0])   #(price in 1000s of dollars)
plt.plot(x_train,y_train);
plt.scatter(x_train,y_train,color='red')

## Computing cost function

In [None]:
def compute_cost(x,y,w,b):
    m = x.shape[0]
    cost_sum = 0
    for i in range(0,m):
        f_wb = w*x[i]+b
        cost_sum += (f_wb - y[i])**2
    total_cost = (1/(2*m)) * cost_sum
    return total_cost

In [None]:
compute_cost(x_train,y_train,340,0)

In [None]:
x_train = np.array([1.0, 2.0])
y_train = np.array([300.,500.])

In [None]:
def compute_gradient(x,y,w,b):
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0
    
    for i in range(0,m):
        f_wb = w*x[i]+b
        dj_dw+=(f_wb - y[i])*x[i]
        dj_db += (f_wb - y[i])
    dj_dw = (1/m) * dj_dw
    dj_db = (1/m) * dj_db
    return dj_dw, dj_db

In [None]:
def compute_gradient(x, y, w, b): 
   
    # Number of training examples
    m = x.shape[0]    
    dj_dw = 0
    dj_db = 0
    
    for i in range(m):  
        f_wb = w * x[i] + b 
        dj_dw_i = (f_wb - y[i]) * x[i] 
        dj_db_i = f_wb - y[i] 
        dj_db += dj_db_i
        dj_dw += dj_dw_i 
    dj_dw = dj_dw / m 
    dj_db = dj_db / m 
        
    return dj_dw, dj_db

In [None]:
def gradient_descent(x,y,w_in,b_in,alpha,num_iter,compute_gradient):
    m = x.shape[0]
    w = w_in
    b = b_in
    
    for i in range(0,num_iter):
        dj_dw,dj_db = compute_gradient(x,y,w,b)
        w = w-alpha*dj_dw
        b = b-alpha*dj_db
    return w,b
        

In [None]:
w_init = 0
b_init = 0
iteration = 10000
tmp_alpha = 1.0e-2
w_final, b_final = gradient_descent(x_train,y_train,w_init,b_init,tmp_alpha,iteration,compute_gradient)

In [None]:
print(f"1000 sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
print(f"1200 sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
print(f"2000 sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")

# Simple Linear regression

In [None]:
def compute_gradient(x,y,w,b):
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0
    
    for i in range(0,m):
        f_wb = w*x[i]+b
        dj_dw += (f_wb-y[i])*x[i]
        dj_db += (f_wb-y[i])
    dj_dw /=m
    dj_db /=m
    return dj_dw,dj_db

In [None]:
def gradient_descent(x,y,w_init,b_init,alpha,num_iter,cost_function,compute_gradient):
    m = x.shape[0]
    w = w_init
    b = b_init
    J_history = []
    p_history = []
    
    for i in range(0,num_iter):
        dj_dw, dj_db = compute_gradient(x,y,w,b)
        w = w-alpha * dj_dw
        b = b-alpha * dj_db
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(x, y, w , b))
            p_history.append([w,b])
        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iter/10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                    f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                    f"w: {w: 0.3e}, b:{b: 0.5e}")
 
    return w, b, J_history, p_history #return w and J,w history for graphing

In [None]:
# initialize parameters
w_init = 0
b_init = 0
# some gradient descent settings
iterations = 10000
tmp_alpha = 0.001
# run gradient descent
x_train = np.array([])
y_train = np.array([10,19 ,28,37,50,63,70 ,85,90,100])
for i in range (0,10):
    x_train = np.append(x_train,i)   
    # y_train = np.append(y_train,i*100+50)   
w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha, 
                                                    iterations, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

In [None]:
plt.scatter(x=x_train,y = y_train)
x_final = np.array([])
y_final = np.array([])
for i in (0,10):
    x_final = np.append(x_final,i)
    y_final = np.append(y_final,w_final*i+b_final)
plt.plot(x_final,y_final,'r--');

In [None]:
x_value = 3
y_value = w_final*x_value+b_final
print(y_value)

In [None]:
# PLotting the result
plt.scatter(x=x_train,y = y_train)
plt.plot(x_final,y_final,'r--');
plt.scatter(x = x_value,y = y_value, color='black')

# Vectorization

In [None]:
# Here we have more than one feature 
# x = size, no. of bedroom, age; y = price
np.set_printoptions(precision=2) 
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [None]:
# data is stored in numpy array/matrix
print(f"X Shape: {X_train.shape}, X Type:{type(X_train)})")
print(X_train)
print(f"y Shape: {y_train.shape}, y Type:{type(y_train)})")
print(y_train)

#### Cost function

In [None]:
def compute_cost(X,y,w,b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb =np.dot(w,X[i])+b
        cost +=(f_wb-y[i])**2
    cost = cost/(2*m)
    return cost

In [None]:
# Sample test case for cost function
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
cost = compute_cost(X_train,y_train,w_init,b_init)
print(f'Cost of optimal w : {cost}')

#### Gradient descent with multiple variable

In [None]:
def compute_gradient(X,y,w,b):
    m,n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.
    for i in range (m):
        err = (np.dot(X[i],w)+b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j]+err*X[i,j]
        dj_db = dj_db + err
    dj_dw = dj_dw/m
    dj_db = dj_db/m
    
    return dj_db,dj_dw

In [None]:
#Compute and display gradient 
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

In [None]:
def gradient_descent(X,y,w_in, b_in, cost_function, gradient_function, alpha,num_iters):
    # m = X.shape[0]
    w = copy.deepcopy(w_in)
    J_history = []
    
    b = b_in
    for i in range(num_iters):
        dj_db, dj_dw = gradient_function(X,y,w,b)
        w = w- alpha*dj_dw
        b = b- alpha*dj_db
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
        
    return w, b, J_history #return final w,b and J history for graphing
    
        

In [None]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

# Feature Scaling

In [None]:
def load_house_data():
    data = np.loadtxt("./houses.txt", delimiter=',', skiprows=1)
    X = data[:,:4]
    y = data[:,4]
    return X, y

In [None]:
X_train, y_train = load_house_data()
print(X_train,y_train)


In [None]:
def zscore_normalize_features(X):
    mu = np.mean(X,axis=0)
    sigma = np.std(X,axis=0)
    X_norm = (X-mu)/sigma
    return (X_norm, mu,sigma)

In [None]:
# normalize the original features
X_norm, X_mu, X_sigma = zscore_normalize_features(X_train)
print(f"X_mu = {X_mu}, \nX_sigma = {X_sigma}")
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")   
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")


In [None]:
print(X_norm,y_train)

In [None]:
w_init = np.zeros_like(w_init)
b_init = 0
alpha = 1.0e-1
nums_iter = 1000
w_final, b_final,J_hist = gradient_descent(X_norm,y_train,w_init,b_init,compute_cost,compute_gradient,alpha,nums_iter)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

In [None]:
X_features = ['size(sqft)','bedrooms','floors','age']

m = X_norm.shape[0]
yp = np.zeros(m)
for i in range(m):
    yp[i] = np.dot(X_norm[i], w_final) + b_final
    
    
fig,ax=plt.subplots(1,4,figsize=(12, 3),sharey=True)
for i in range(len(ax)):
    ax[i].scatter( X_train[:,i],y_train,label='target')
    ax[i].set_xlabel(X_features[i])
    ax[i].scatter(X_train[:,i],yp, label = 'predict')
ax[0].set_ylabel("Price"); ax[0].legend();
fig.suptitle("target versus prediction using z-score normalized model")
plt.show()
    

In [None]:
# First, normalize out example.
x_house = np.array([1200, 3, 1, 40])
x_house_norm = (x_house - X_mu) / X_sigma
print(x_house_norm)
x_house_predict = np.dot(x_house_norm, w_final) + b_final
print(f" predicted price of a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old = ${x_house_predict*1000:0.0f}")