# Feature scaling

                      *** Before Normalization ***

In [1]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt

In [2]:
X_train = np.array([[1, 5, 1, 45], [2, 3, 2, 40], [3, 2, 1, 35]]) 
y_train = np.array([1000, 2000, 3000])

In [3]:
print(f"X Shape: {X_train.shape}")
print(X_train)
print(f"y Shape: {y_train.shape}")
print(y_train)

X Shape: (3, 4)
[[ 1  5  1 45]
 [ 2  3  2 40]
 [ 3  2  1 35]]
y Shape: (3,)
[1000 2000 3000]


In [4]:
def cost(x, y, w, b):
    m = x.shape[0]
    co = 0
    for i in range(m):
        fwb = np.dot(x[i],w) + b
        co = co + (fwb - y[i])**2
    co = co / (2 * m)
    return co

In [5]:
def gradient(x, y, w, b):
    m,n = x.shape
    
    dw = np.zeros(n)
    db = 0
    
    for i in range(m):
        
        f =  (np.dot(x[i],w) + b) - y[i]
        
        for j in range(n):
            
            dw[j] = dw[j] + f * x[i,j]
        db = db + f 
        
    dw = dw / m
    db = db / m
    
    return dw,db
    
    

In [6]:
def gradient_descent(x, y, w, b, iterations, alpha):
    
    
    for i in range(iterations):
        
        dw, db = gradient(x, y, w, b)
        
        w = w - alpha * dw
        b = b - alpha * db
    
        c = cost(x, y, w, b)
        print(f"Iteration {i:4d}: Cost {c:8.2f}   ")
    
    return w, b, c

In [7]:
# initialize parameters
init_w = np.zeros_like(X_train[0])
init_b = 0
# some gradient descent settings
iterations = 25000
alpha = 0.001
# run gradient descent 
w_final, b_final,c = gradient_descent(X_train, y_train, init_w, init_b, iterations,  alpha)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")

Iteration    0: Cost 1246899.44   
Iteration    1: Cost 807853.31   
Iteration    2: Cost 629762.83   
Iteration    3: Cost 556863.52   
Iteration    4: Cost 526369.22   
Iteration    5: Cost 512971.85   
Iteration    6: Cost 506470.10   
Iteration    7: Cost 502752.00   
Iteration    8: Cost 500160.04   
Iteration    9: Cost 498026.11   
Iteration   10: Cost 496080.88   
Iteration   11: Cost 494215.77   
Iteration   12: Cost 492387.01   
Iteration   13: Cost 490576.92   
Iteration   14: Cost 488778.38   
Iteration   15: Cost 486988.50   
Iteration   16: Cost 485206.09   
Iteration   17: Cost 483430.66   
Iteration   18: Cost 481662.01   
Iteration   19: Cost 479900.02   
Iteration   20: Cost 478144.66   
Iteration   21: Cost 476395.87   
Iteration   22: Cost 474653.63   
Iteration   23: Cost 472917.91   
Iteration   24: Cost 471188.69   
Iteration   25: Cost 469465.94   
Iteration   26: Cost 467749.64   
Iteration   27: Cost 466039.76   
Iteration   28: Cost 464336.28   
Iteration   2

Iteration 10727: Cost    99.11   
Iteration 10728: Cost    99.07   
Iteration 10729: Cost    99.02   
Iteration 10730: Cost    98.98   
Iteration 10731: Cost    98.93   
Iteration 10732: Cost    98.89   
Iteration 10733: Cost    98.84   
Iteration 10734: Cost    98.79   
Iteration 10735: Cost    98.75   
Iteration 10736: Cost    98.70   
Iteration 10737: Cost    98.66   
Iteration 10738: Cost    98.61   
Iteration 10739: Cost    98.56   
Iteration 10740: Cost    98.52   
Iteration 10741: Cost    98.47   
Iteration 10742: Cost    98.43   
Iteration 10743: Cost    98.38   
Iteration 10744: Cost    98.34   
Iteration 10745: Cost    98.29   
Iteration 10746: Cost    98.25   
Iteration 10747: Cost    98.20   
Iteration 10748: Cost    98.15   
Iteration 10749: Cost    98.11   
Iteration 10750: Cost    98.06   
Iteration 10751: Cost    98.02   
Iteration 10752: Cost    97.97   
Iteration 10753: Cost    97.93   
Iteration 10754: Cost    97.88   
Iteration 10755: Cost    97.84   
Iteration 1075

Iteration 20272: Cost     1.18   
Iteration 20273: Cost     1.18   
Iteration 20274: Cost     1.18   
Iteration 20275: Cost     1.18   
Iteration 20276: Cost     1.18   
Iteration 20277: Cost     1.18   
Iteration 20278: Cost     1.18   
Iteration 20279: Cost     1.18   
Iteration 20280: Cost     1.18   
Iteration 20281: Cost     1.18   
Iteration 20282: Cost     1.18   
Iteration 20283: Cost     1.18   
Iteration 20284: Cost     1.18   
Iteration 20285: Cost     1.18   
Iteration 20286: Cost     1.18   
Iteration 20287: Cost     1.18   
Iteration 20288: Cost     1.18   
Iteration 20289: Cost     1.18   
Iteration 20290: Cost     1.17   
Iteration 20291: Cost     1.17   
Iteration 20292: Cost     1.17   
Iteration 20293: Cost     1.17   
Iteration 20294: Cost     1.17   
Iteration 20295: Cost     1.17   
Iteration 20296: Cost     1.17   
Iteration 20297: Cost     1.17   
Iteration 20298: Cost     1.17   
Iteration 20299: Cost     1.17   
Iteration 20300: Cost     1.17   
Iteration 2030

In [8]:
print(f"prediction: {np.dot(X_train[0], w_final) + b_final:0.2f}, target value: {y_train[0]}")

prediction: 999.74, target value: 1000


In [9]:
def predict(x, w, b): 
    p = np.dot(x, w) + b     
    return p    

In [10]:
x_house = np.array([1, 5, 1, 45])

In [11]:
res = predict(w_final,x_house,b_final)

In [12]:
print(res)

999.737272794573


In [13]:
m = X_train.shape[0]
yp = np.zeros(m)
for i in range(m):
    yp[i] = np.dot(X_train[i], w_final) + b_final
    print(yp[i])
    # plot predictions and targets versus original features    

999.737272794573
2000.708577898882
2999.5280405228423


# z-score normalization

                           *** After Normalization ***

In [14]:
def zscore_normalize_features(X):
    # find the mean of each column/feature
    mu     = np.mean(X, axis=0)                
    # find the standard deviation of each column/feature
    sigma  = np.std(X, axis=0)                  
    # element-wise, subtract mu for that column from each example, divide by std for that column
    X_norm = (X - mu) / sigma      

    return (X_norm, mu, sigma)
 

In [15]:
X_norm, X_mu, X_sigma = zscore_normalize_features(X_train)
print(f"X_mu = {X_mu}, \nX_sigma = {X_sigma}")

X_mu = [ 2.          3.33333333  1.33333333 40.        ], 
X_sigma = [0.81649658 1.24721913 0.47140452 4.0824829 ]


In [16]:
print(X_norm)

[[-1.22474487  1.33630621 -0.70710678  1.22474487]
 [ 0.         -0.26726124  1.41421356  0.        ]
 [ 1.22474487 -1.06904497 -0.70710678 -1.22474487]]


In [17]:
def gradient_descent(x, y, w, b, iterations, alpha):
    
    
    for i in range(iterations):
        
        dw, db = gradient(x, y, w, b)
        
        w = w - alpha * dw
        b = b - alpha * db
    
        c = cost(x, y, w, b)
        print(f"Iteration {i:4d}: Cost {c:8.2f}   ")
    
    return w, b, c

In [18]:
init_w = np.zeros_like(X_train[0])
init_b = 0
iterations = 3300
alpha = 0.003

In [19]:
w_final, b_final,c = gradient_descent(X_norm, y_train, init_w, init_b, iterations,  alpha)

Iteration    0: Cost 2315449.23   
Iteration    1: Cost 2297741.82   
Iteration    2: Cost 2280208.81   
Iteration    3: Cost 2262847.93   
Iteration    4: Cost 2245656.97   
Iteration    5: Cost 2228633.73   
Iteration    6: Cost 2211776.05   
Iteration    7: Cost 2195081.81   
Iteration    8: Cost 2178548.92   
Iteration    9: Cost 2162175.31   
Iteration   10: Cost 2145958.96   
Iteration   11: Cost 2129897.88   
Iteration   12: Cost 2113990.10   
Iteration   13: Cost 2098233.68   
Iteration   14: Cost 2082626.72   
Iteration   15: Cost 2067167.34   
Iteration   16: Cost 2051853.70   
Iteration   17: Cost 2036683.97   
Iteration   18: Cost 2021656.37   
Iteration   19: Cost 2006769.12   
Iteration   20: Cost 1992020.50   
Iteration   21: Cost 1977408.80   
Iteration   22: Cost 1962932.32   
Iteration   23: Cost 1948589.40   
Iteration   24: Cost 1934378.42   
Iteration   25: Cost 1920297.76   
Iteration   26: Cost 1906345.84   
Iteration   27: Cost 1892521.10   
Iteration   28: Cost

In [20]:
x_house = np.array([1, 5, 1, 45])
x_house_norm = (x_house - X_mu) / X_sigma
x_house_predict = np.dot(x_house_norm, w_final) + b_final
print(f" predicted After Normalization House Price = ${x_house_predict:0.0f}")

 predicted After Normalization House Price = $1000


In [28]:
m = X_norm.shape[0]
yp = np.zeros(m)
for i in range(m):
    yp[i] = np.dot(X_norm[i], w_final) + b_final
    print(yp[i])
    # plot predictions and targets versus original features    

999.8992501910944
1999.9056427145165
2999.8985053650595
