# Gradient Descent 구현해보기
- 보스턴 집값 예측 문제를 적용해보자.

## 데이터 로딩 및 데이터프레임 생성

In [1]:
from sklearn.datasets import load_boston
import pandas as pd
import numpy as np

In [2]:
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['PRICE'] = boston.target
print(df.shape)
df.head()

(506, 14)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


## weight, bias를 update하는 함수 생성
- w1 : RM(방의 개수)의 weight
- w2 : LSTAT(하위계층 비율)의 weight
- bias : Bias
- N : 입력 데이터 건수
- MSE 사용

In [21]:
def get_update_weights_value(bias, w1, w2, rm, lstat, target, learning_rate=0.01) :
    # 데이터 건수
    N = len(target)
    # 예측값
    pred = w1 * rm + w2 * lstat + bias
    # 실제값과 예측값 차이
    diff = target - pred
    # bias를 array로 만들어줌
    bias_factor = np.ones((N,))
    
    # weight와 bias를 얼마나 update할지 계산
    w1_update = -(2/N)*learning_rate*(rm.T @ diff)
    w2_update = -(2/N)*learning_rate*(lstat.T @ diff)
    bias_update = -(2/N)*learning_rate*(bias_factor.T @ diff)
    
    # MSE 계산
    mse_loss = np.mean(np.square(diff))
    
    # weight와 bias가 update되어야 할 값과 MSE값을 반환
    return bias_update, w1_update, w2_update, mse_loss

## Gradient Descent 적용하는 함수 생성
- iter_epochs 수만큼 반복적으로 get_update_weights_value 함수를 수행해 가중치 구하기
- 신경망은 데이터를 정규화/표준화 작업을 미지 선행해 주어야 함
- 이를 위해 MinMaxScaler를 이용해 0 ~ 1 값들로 바꿔주자

In [36]:
def gradient_descent(features, target, iter_epochs=1000, verbose=True) :
    # w1, w2는 numpy array로 연산하기 위해 1차원 array 변환하되 초기값은 0으로 설정
    # bias도 1차원 array로 변환하되 초기 값은 1로 설정
    w1 = np.zeros((1,)) # 원래는 랜덤값주는게 맞다.
    w2 = np.zeros((1,))
    bias = np.ones((1,))
    print('최초 w1, w2, bias : ', w1, w2, bias)
    
    # learning_rate와 RM, LSTAT 피처 지정, 호출 시 nparray로 지정한다.
    learning_rate = 0.01
    rm = features[:, 0]
    lstat = features[:, 1]
    
    # 반복하면서 업데이트 수행
    for i in range(iter_epochs) :
        bias_update, w1_update, w2_update, loss = get_update_weights_value(bias, w1, w2, rm, lstat, 
                                                                           target, learning_rate)
        w1 = w1 - w1_update
        w2 = w2 - w2_update
        bias = bias - bias_update
        if verbose :
            print('Epoch : ', i+1, '/', iter_epochs, '=>', 'w1 : ',w1, 'w2 : ',w2, 'bias : ', bias, 'loss : ',loss)
            
    return w1, w2, bias
    

In [37]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df[['RM', 'LSTAT']])

w1, w2, bias = gradient_descent(scaled_features, df['PRICE'].values, iter_epochs=1000, verbose=True)
print('#### 최종 w1, w2, bias ####')
print(w1, w2, bias)

최초 w1, w2, bias :  [0.] [0.] [1.]
Epoch :  1 / 1000 => w1 :  [0.24193162] w2 :  [0.10311943] bias :  [1.43065613] loss :  548.0813043478261
Epoch :  2 / 1000 => w1 :  [0.47767212] w2 :  [0.20269304] bias :  [1.84955238] loss :  522.964778344195
Epoch :  3 / 1000 => w1 :  [0.70739021] w2 :  [0.29881838] bias :  [2.25700994] loss :  499.19625820107575
Epoch :  4 / 1000 => w1 :  [0.93124998] w2 :  [0.39159032] bias :  [2.65334123] loss :  476.7031232605375
Epoch :  5 / 1000 => w1 :  [1.14941104] w2 :  [0.48110116] bias :  [3.03885015] loss :  455.41666565492966
Epoch :  6 / 1000 => w1 :  [1.36202867] w2 :  [0.56744065] bias :  [3.41383227] loss :  435.2718794853261
Epoch :  7 / 1000 => w1 :  [1.56925388] w2 :  [0.65069613] bias :  [3.7785751] loss :  416.20726135905875
Epoch :  8 / 1000 => w1 :  [1.77123356] w2 :  [0.73095251] bias :  [4.13335831] loss :  398.1646216743121
Epoch :  9 / 1000 => w1 :  [1.96811059] w2 :  [0.8082924] bias :  [4.47845392] loss :  381.0889060727257
Epoch :  10 

## 계산된 weight와 bias를 이용해 가격 예측

- 예측 feature도 0~1 사이값

In [38]:
pred = scaled_features[:, 0]*w1 + scaled_features[:, 1]*w2 + bias
df['PREDICTED_PRICE'] = pred
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PREDICTED_PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,26.322
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,24.281207
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,28.830886
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,28.557652
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,28.22912


## keras 이용해서 보스턴 주택가격 퍼셉트론 구현

In [40]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

In [44]:
model = Sequential([
    Dense(1, input_shape=(2, ), activation=None, kernel_initializer='zeros', bias_initializer='ones')
])
# sequential은 대도록 쓰지 말자
# Adam optimizer 사용, MSE사용
model.compile(optimizer=Adam(learning_rate=0.01), loss='mse', metrics=['mse'])
model.fit(scaled_features, df['PRICE'].values, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<tensorflow.python.keras.callbacks.History at 0x1bb3ffa0ac0>

In [45]:
predicted = model.predict(scaled_features)
df['KERAS_PREDICTED_PRICE'] = predicted
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,PREDICTED_PRICE,KERAS_PREDICTED_PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0,26.322,28.974117
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6,24.281207,25.501675
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7,28.830886,32.630707
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4,28.557652,32.407974
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2,28.22912,31.596006
