<p style="float:right;"><i>Created By Maroyi Bisoka on 30/01/2025</i></p>

In [27]:
import math, copy
import numpy as np

<div>
    <h2>Dataset with Additional Features</h2>
    <table>
        <thead>
            <tr>
                <th>Size</th>
                <th>Age of House (years)</th>
                <th>Bedrooms</th>
                <th>Floors</th>
                <th>Price</th>
            </tr>
        </thead>
        <tbody>
            <tr><td>23</td><td>12</td><td>2</td><td>1</td><td>105</td></tr>
            <tr><td>40</td><td>8</td><td>3</td><td>2</td><td>150</td></tr>
            <tr><td>36</td><td>15</td><td>2</td><td>1</td><td>105</td></tr>
            <tr><td>34</td><td>10</td><td>4</td><td>2</td><td>126</td></tr>
            <tr><td>26</td><td>5</td><td>1</td><td>1</td><td>63</td></tr>
            <tr><td>26</td><td>20</td><td>2</td><td>1</td><td>106</td></tr>
            <tr><td>60</td><td>7</td><td>5</td><td>3</td><td>150</td></tr>
            <tr><td>36</td><td>3</td><td>2</td><td>2</td><td>157</td></tr>
            <tr><td>29</td><td>18</td><td>3</td><td>1</td><td>79</td></tr>
            <tr><td>40</td><td>9</td><td>4</td><td>2</td><td>154</td></tr>
        </tbody>
    </table>
</div>

In [29]:
# # Define the dataset
x_train = np.array([
    [23, 12, 2, 1],
    [40, 8, 3, 2],
    [36, 15, 2, 1],
    [34, 10, 4, 2],
    [26, 5, 1, 1],
    [26, 20, 2, 1],
    [60, 7, 5, 3],
    [36, 3, 2, 2],
    [29, 18, 3, 1],
    [40, 9, 4, 2]
])

y_train = np.array([105, 150, 105, 126, 63, 106, 150, 157, 79, 154])


<h1>Feature scaling</h1>
<div>
    <hp>Scaling method used: <strong>z-score normalization</strong> </p>
    <div class="formula" style="font-weight: bold;">
        z = (x - μ) / σ
    </div>
    <div class="explanation">
        <p>Where:</p>
        <ul>
            <li><strong>z</strong> = Z-score (standard score)</li>
            <li><strong>x</strong> = Raw data value</li>
            <li><strong>μ</strong> = Mean of the dataset</li>
            <li><strong>σ</strong> = Standard deviation of the dataset</li>
        </ul>
    </div>
<div class="explanation">
    <p>The Z-score indicates how many standard deviations a data point is from the mean.</p>
</div>
</div>

In [31]:
# Features scaling 
def zscore_normalization(x):
    # mean of each column/feature
    mu = np.mean(x, axis=0) # mu will have shape (n,)
    # standard deviation of each column/feature
    sigma = np.std(x, axis=0) # shape of n with correspond to the number of features
    # normalized values (element-wise operation)
    x_norm = (x - mu) / sigma      
    return x_norm, mu, sigma

In [32]:
x_norm, mu, sigma = zscore_normalization(x_train)
x_norm

array([[-1.18817705,  0.24740596, -0.68599434, -0.90453403],
       [ 0.49507377, -0.51384314,  0.17149859,  0.60302269],
       [ 0.09901475,  0.81834277, -0.68599434, -0.90453403],
       [-0.09901475, -0.13321859,  1.02899151,  0.60302269],
       [-0.89113279, -1.08477996, -1.54348727, -0.90453403],
       [-0.89113279,  1.76990414, -0.68599434, -0.90453403],
       [ 2.47536886, -0.70415541,  1.88648444,  2.11057941],
       [ 0.09901475, -1.4654045 , -0.68599434,  0.60302269],
       [-0.59408853,  1.38927959,  0.17149859, -0.90453403],
       [ 0.49507377, -0.32353086,  1.02899151,  0.60302269]])

In [33]:
from sklearn.preprocessing import scale

In [34]:
# Scaling of sklearn implementation
scale(x_train, axis=0, with_mean=True, with_std=True, copy=True)

array([[-1.18817705,  0.24740596, -0.68599434, -0.90453403],
       [ 0.49507377, -0.51384314,  0.17149859,  0.60302269],
       [ 0.09901475,  0.81834277, -0.68599434, -0.90453403],
       [-0.09901475, -0.13321859,  1.02899151,  0.60302269],
       [-0.89113279, -1.08477996, -1.54348727, -0.90453403],
       [-0.89113279,  1.76990414, -0.68599434, -0.90453403],
       [ 2.47536886, -0.70415541,  1.88648444,  2.11057941],
       [ 0.09901475, -1.4654045 , -0.68599434,  0.60302269],
       [-0.59408853,  1.38927959,  0.17149859, -0.90453403],
       [ 0.49507377, -0.32353086,  1.02899151,  0.60302269]])

In [35]:
def compute_cost(x, y, w, b):
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb_i = np.dot(x[i], w) + b
        cost +=  (f_wb_i - y[i])**2
    cost = cost / (2*m)
    return cost

In [36]:
# Compute gradient only one time 
def compute_gradient(x, y, w, b):
    m,n = x.shape # m --> Number of training examples, n --> Number of features
    dj_dw = np.zeros((n,))
    dj_db = 0.0
    for i in range(m):
        f_wb_i = np.dot(x[i], w) + b
        err = f_wb_i - y[i]
        for j in range(n):
           dj_dw[j] += (err * x[i, j]) 
        dj_db += err
    dj_dw /= m
    dj_db /= m
    return dj_dw, dj_db

In [37]:
# Running Gradient descent for a specific number of iterations (epochs)
def gradient_descent(x, y, w_init, b_init, alpha, epochs, cost_function, gradient_function): 
    w = copy.deepcopy(w_init)
    b = b_init
    
    for i in range(epochs):
        # Calculate the gradient and update the parameters using gradient_function
        dj_dw, dj_db = gradient_function(x, y, w , b)

        # Update Parameters w and b
        b = b - alpha * dj_db
        w = w - alpha * dj_dw

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i % math.ceil(epochs/10) == 0:
            print(f"Iteration {i:4}: Cost: {cost_function(x, y, w , b):0.2e} ")
    return w, b

In [38]:
m, n = x_train.shape
w_init = np.zeros(n)
b_init = 0.0
epochs = 1_000
alpha = 1.0e-1

In [39]:
w_final, b_final = gradient_descent(x_norm ,y_train, w_init, b_init, alpha, epochs, compute_cost, compute_gradient)

Iteration    0: Cost: 6.14e+03 
Iteration  100: Cost: 1.70e+02 
Iteration  200: Cost: 1.53e+02 
Iteration  300: Cost: 1.45e+02 
Iteration  400: Cost: 1.41e+02 
Iteration  500: Cost: 1.40e+02 
Iteration  600: Cost: 1.39e+02 
Iteration  700: Cost: 1.38e+02 
Iteration  800: Cost: 1.38e+02 
Iteration  900: Cost: 1.38e+02 


In [40]:
# Final w and b
print(f'w_final {w_final}')
print(f'b_final {b_final}')

w_final [-11.65381685  12.55027221 -15.83896296  55.99161463]
b_final 119.49999999999996


In [41]:
def predict_with_norm_value(w, b, x_test, mu, sigma):
    result = np.zeros(x_train.shape[0])
    for i, x in enumerate(x_test):
        x_test_norm = (x - mu) / sigma
        result[i] = np.dot(x_test_norm, w) + b
    return result

In [42]:
# Predictions of our own implementation
pred = predict_with_norm_value(w_final, b_final, x_train, mu, sigma)
pred

array([ 96.67092775, 138.32948397,  88.83564269, 136.44802583,
        90.07173116, 112.31699498, 170.10995458, 144.58452774,
        90.49655519, 127.13615612])

In [43]:
j = compute_cost(x_train, y_train, w_final, b_final)
print(f'Cost j : {j}')

Cost j : 38311.35540195978


## Use sklearn for testing our own implementation
<p>
    Scikit-learn (sklearn) is an open-source machine learning library for Python that provides implementations of numerous data modeling and machine learning algorithms, and provides consistent Python APIs. It supports a standardized and concise model interface across models.
</p>

In [45]:
from sklearn.linear_model import LinearRegression

In [46]:
lm = LinearRegression()

In [47]:
# Running gradient descent
lm.fit(x_train, y_train)

In [48]:
print('sklearn w_final: ',lm.coef_) # parameter w
print('sklearn b_final:', lm.intercept_) # parameter b

sklearn w_final:  [ -1.19369078   2.47674379 -14.07755172  86.12102093]
sklearn b_final: 36.401530175051874


In [49]:
# Predictions of sklearn implementation
lm.predict(x_train)

array([ 96.63348513, 138.47723589,  88.54573632, 136.51531644,
        89.79275799, 112.86636308, 170.09259395, 144.9458318 ,
        90.25425144, 126.87642796])

In [50]:
# Predictions of our own implementation
pred

array([ 96.67092775, 138.32948397,  88.83564269, 136.44802583,
        90.07173116, 112.31699498, 170.10995458, 144.58452774,
        90.49655519, 127.13615612])