In [None]:
from sklearn.datasets import fetch_california_housing

In [None]:
california = fetch_california_housing()
print(california.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [None]:
x = california.data
y = california.target #회귀

* 조건 : Functional API
    1. training set / validation set / test set 분리!
    2. hidden layer 최소 3개 사용! (노드 수 자율)
    3. early stopping 적용!(patience 7)
    4. validation_data 옵션에 validation set 적용!

In [None]:
x.shape, y.shape

((20640, 8), (20640,))

In [None]:
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size = 0.2, random_state = 2023)

#validation set은 training set에서 다시 파생된다.
train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size = 0.2, random_state = 2023)

In [None]:
train_x.shape, train_y.shape, val_x.shape, val_y.shape, test_x.shape, test_y.shape

((13209, 8), (13209,), (3303, 8), (3303,), (4128, 8), (4128,))

In [None]:
import tensorflow as tf
from tensorflow import keras
## 모델링 : Functional API
#1. 세션 클리어
keras.backend.clear_session()

#2. 레이어 사슬처럼 엮기
il = keras.layers.Input(shape = (8, ))
hl = keras.layers.Dense(512, activation = "relu")(il)
hl = keras.layers.Dense(256, activation = "relu")(hl)
hl = keras.layers.Dense(128, activation = "relu")(hl)
ol = keras.layers.Dense(1)(hl)

#3. 모델의 시작/끝 지정
model = keras.models.Model(il, ol)

#4. 컴파일
model.compile(loss = "mse", optimizer = "adam")

#5. 요약
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 8)]               0         
                                                                 
 dense (Dense)               (None, 512)               4608      
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 1)                 129       
                                                                 
Total params: 168,961
Trainable params: 168,961
Non-trainable params: 0
_________________________________________________________________


In [None]:
#Early Stopping
from tensorflow.keras.callbacks import EarlyStopping
es = EarlyStopping(monitor = "val_loss", #관측 대상
                    min_delta = 0,       #관측 대상의 임계값 설정
                    patience = 5,        #몇번 참을레
                    verbose = 1,
                    restore_best_weights = True) #최적 epoch 시점의 가중치로 설정할래?

In [None]:
#val_loss, val_accuracy는 validation set의 loss, accuracy에 적용

In [None]:
#학습
model.fit(train_x, train_y, epochs = 1000, verbose = 1, callbacks = [es], validation_data = (val_x, val_y))
#validation set으로 나누고 스케일링 했으면 validation_split가 아닌 validation_data = ()!!!!!

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 23: early stopping


<keras.callbacks.History at 0x7f0908ca9370>

In [None]:
y_pred = model.predict(test_x)



In [None]:
y_pred[:50].reshape(-1)

array([1.7940274 , 2.554658  , 1.7956134 , 1.0007962 , 2.1119385 ,
       2.2766237 , 1.9256945 , 2.33584   , 1.4687502 , 2.3990738 ,
       1.5478429 , 1.688168  , 3.8962944 , 0.9031272 , 1.6952629 ,
       2.3728673 , 1.8524054 , 1.1170719 , 1.4836578 , 1.3336068 ,
       1.5303322 , 2.771842  , 1.7354188 , 4.6625934 , 2.6525981 ,
       2.2797272 , 2.9166858 , 2.1794548 , 1.8973312 , 1.9040442 ,
       1.1032089 , 3.153931  , 2.416744  , 2.0227075 , 2.1431808 ,
       2.3155396 , 1.6199954 , 2.6577682 , 2.1236684 , 2.45876   ,
       3.2462153 , 4.053288  , 0.93441707, 1.34983   , 1.1343594 ,
       2.8023098 , 2.554784  , 1.1651134 , 1.2648423 , 2.044886  ],
      dtype=float32)

In [None]:
test_y[:10]

array([3.656, 2.01 , 2.132, 0.986, 1.53 , 1.326, 2.125, 3.068, 1.573,
       1.766])