<a href="https://colab.research.google.com/github/Tershire/Python_DL/blob/main/S01/L010_gradient_descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# 2023 OCT 21

In [3]:
import numpy as np
import pandas as pd
import sklearn

### get data

In [4]:
# california housing dataset
# from sklearn.datasets import load_boston
# from sklearn.datasets import fetch_california_housing
# cali = fetch_california_housing()

# print(cali["feature_names"])

# print(cali.DESCR)

# boston housing dataset
boston_df = pd.read_csv("./drive/MyDrive/Colab Notebooks/Python_DL/boston_housing.csv")

boston_df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

display(boston_df)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [5]:
boston_targets = boston_df.iloc[:, -1].values

# gradient descent & linear regression

* define functions

In [6]:
# w0: <1 x 1>
# w: <M x 1>
# X: <M x N>
# y: <1 x N>

def compute_gradient_of_cost(w0, w, X, y):
    N = y.shape[1]

    ones = np.ones((1, N))

    # y_hat = w0*ones + w.T @ X
    y_hat = w0 + w.T @ X
    error = y - y_hat

    partial_w0 = -(2/N)*(ones @ error.T)
    partial_w = -(2/N)*(X @ error.T)
    gradient = np.concatenate((partial_w0, partial_w), axis=0)

    # MSE (mean squared error)
    loss_MSE = np.mean(np.square(error))

    return gradient, loss_MSE

In [7]:
def gradient_descent(X, y, alpha=0.01, max_epochs=1000, verbose=True):
    M = X.shape[0]

    w0 = 0
    w = np.zeros((M, 1))

    for i in range(max_epochs):
        gradient, loss_MSE = compute_gradient_of_cost(w0, w, X, y)
        w0 -= alpha*gradient[0, [0]]
        w -= alpha*gradient[1:, [0]]

        if verbose:
            if i == max_epochs or i%(max_epochs//10) == 0:
                print("Epoch:", i, "-", "loss (MSE):", loss_MSE, "w0:", w0, "w1:", w)

    return w0, w

* pre-process features and run gradient descent

In [8]:
# scale features
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X = scaler.fit_transform(boston_df[["RM", "LSTAT"]]).T
y = boston_targets.reshape(1, -1)
print("X.shape:", X.shape, "y.shape:", y.shape, "\n")

# run gradient descent
w0, w = gradient_descent(X, y, alpha=0.01, max_epochs=1000, verbose=True)

# result
print("\n=RESULT-")
print("w0:", w0)
print("w", w)

X.shape: (2, 506) y.shape: (1, 506) 

Epoch: 0 - loss (MSE): 592.1469169960474 w0: [0.45065613] w1: [[0.252369  ]
 [0.10914761]]
Epoch: 100 - loss (MSE): 76.5663691608906 w0: [15.50153711] w1: [[9.65323012]
 [2.03623996]]
Epoch: 200 - loss (MSE): 66.92841493665178 w0: [16.45516227] w1: [[11.51626124]
 [-0.07718644]]
Epoch: 300 - loss (MSE): 60.60156752548139 w0: [16.52282922] w1: [[12.79231979]
 [-2.23984325]]
Epoch: 400 - loss (MSE): 55.37990609595617 w0: [16.53315873] w1: [[13.92764442]
 [-4.22003672]]
Epoch: 500 - loss (MSE): 51.065069214970315 w0: [16.53828664] w1: [[14.9596517]
 [-6.020131 ]]
Epoch: 600 - loss (MSE): 47.499534683029175 w0: [16.54164232] w1: [[15.89926458]
 [-7.65563   ]]
Epoch: 700 - loss (MSE): 44.55316426595381 w0: [16.54358623] w1: [[16.75496463]
 [-9.14146437]]
Epoch: 800 - loss (MSE): 42.11842255099923 w0: [16.54427297] w1: [[ 17.53437127]
 [-10.49125873]]
Epoch: 900 - loss (MSE): 40.106451124369364 w0: [16.54383202] w1: [[ 18.24440529]
 [-11.71740124]]

=RES

* predict

In [9]:
predictions = w0 + w.T @ X
print(predictions.shape)
# print(predictions)

boston_df["PREDICTED_MDEV"] = predictions.ravel()
display(boston_df)

(1, 506)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV,PREDICTED_MDEV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0,26.298946
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6,24.270017
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7,28.842337
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4,28.551276
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2,28.244935
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4,24.704910
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6,23.202062
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9,27.516496
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0,26.560756


### gradient descent: TensorFlow-Keras
* #node = 1

In [10]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

In [23]:
model = Sequential([Dense(1, input_shape=(2, ), activation=None, kernel_initializer="zeros", bias_initializer="ones")])

In [24]:
model.compile(optimizer=Adam(learning_rate=0.01), loss="mse", metrics=["mse"])
model.fit(X.T, y.T, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x7e4f9808baf0>

In [25]:
# predict
predictions_keras = model.predict(X.T)

boston_df["PREDICTED_MDEV_KERAS"] = predictions_keras
display(boston_df)



Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV,PREDICTED_MDEV,PREDICTED_MDEV_KERAS
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0,26.298946,28.961348
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6,24.270017,25.487286
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7,28.842337,32.618912
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4,28.551276,32.396362
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2,28.244935,31.583714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4,24.704910,25.999460
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6,23.202062,24.026537
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9,27.516496,30.529972
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0,26.560756,29.076572
