# ex_boston_house

## 0. 連接雲端硬碟，起始準備

In [None]:
# 連接雲端硬碟，起始準備

from google.colab import drive
drive.mount('/content/drive')

# 移到資料夾
%cd /content/drive/MyDrive/class_AI/ex_boston_housing/
!pwd
!ls

## 1. MLP 模型 - Regression

In [None]:
# 匯入套件模組
%tensorflow_version 2.x
import tensorflow
print(tensorflow.__version__)

import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


## 資料預處理
比較方法一與方法二
- (1) DIY，使用標準化
- (2) 使用 Kaggle 資料集，似乎是正規化

### 資料預處理 (方法一：DIY）

In [None]:
# 讀取資料集 - 載入糖尿病資料集

# dataset = df.values

# 載入波士頓房屋資料集
df = pd.read_csv("./boston_housing.csv")

print(df.head())
print(df.shape)


In [None]:
# 轉換為 numpy
dataset = df.values
print(dataset)

In [None]:
# 分割成特徵資料和標籤資料
x = dataset[:, 0:13]
y = dataset[:, 13]


In [None]:
# 分割成特徵資料和標籤資料
x = dataset[:, 0:13]
y = dataset[:, 13]

# 特徵標準化
x -= x.mean(axis=0)
x /= x.std(axis=0)
# 分割訓練和測試資料集

x_train, y_train = x[:404], y[:404]     # 訓練資料前404筆
x_test, y_test = x[404:], y[404:]       # 測試資料後102筆

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)


### 資料預處理 (方法二：直接使用資料集)

In [None]:
from keras.datasets import boston_housing

(x_train, y_train), (x_test, y_test) = boston_housing.load_data()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)


### 定義模型

In [None]:
# 寫成函式，供之後重覆呼叫
def build_model():
  # 定義模型
  model = Sequential()
  model.add(Dense(32, input_shape=(x_train.shape[1],), activation='relu'))
  model.add(Dense(32, activation='relu'))
  model.add(Dense(1))

  # 編譯模型
  model.compile(loss='mse', optimizer='adam', metrics=['mae'])
  return model

In [None]:
# 使用 k-fold 方法交叉驗証
k = 4
nb_val_samples = len(x_train) // k
print('k=', k, '   nb_val_samples=', nb_val_samples)

nb_epochs = 80
mse_scores = []
mae_scores = []
for i in range(k):
    print('Processing Fold #' + str(i))
    
    # 取出驗證資料集
    x_val = x_train[i*nb_val_samples: (i+1)*nb_val_samples]
    y_val = y_train[i*nb_val_samples: (i+1)*nb_val_samples]
    # 結合出訓練資料集
    x_train_p = np.concatenate(
            [x_train[:i*nb_val_samples],
            x_train[(i+1)*nb_val_samples:]], axis=0)
    y_train_p = np.concatenate(
            [y_train[:i*nb_val_samples],
            y_train[(i+1)*nb_val_samples:]], axis=0)
    
    model = build_model()
    # 訓練模型
    model.fit(x_train_p, y_train_p, epochs=nb_epochs, batch_size=16, verbose=0)
    # 評估模型
    mse, mae = model.evaluate(x_val, y_val, verbose=0)
    mse_scores.append(mse)
    mae_scores.append(mae)

In [None]:
print('MSE_val: ', np.mean(mse_scores))
print('MAE_val: ', np.mean(mae_scores))
# 使用測試資料評估模型

mse, mae = model.evaluate(x_test, y_test, verbose=0)
print('MSE_test: ', mse)
print('MAE_test: ', mae)
