In [1]:
# set up
import os
import matplotlib.pyplot as plt
root = ""
chapter_id ="Tuning"
# path of folder
images_path = os.path.join(root,"images",chapter_id)
os.makedirs(images_path,exist_ok=True)
def save_fig(fig_id,tigh_layout,fig_extension="png",resolution=True):
    # path until file name
    path = os.path.join(images_path,fig_id + "." +fig_extension)
    print("save figure",fig_id)
    if tigh_layout:
        plt.tight_layout()
    plt.savefig(path,fig_extension=fig_extension,resolution=resolution)
 

In [2]:
# load data set
import tensorflow as tf 
import keras
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
print(housing.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [3]:
# SPLIT DATA TRAIN FULL AND TESTING 
X_train_full,X_test,y_train_full,y_test = train_test_split(housing['data'],housing['target'],random_state=42)
#SPLIT DATA TRAIN FULL TO TRAIN AND VALIDATION
X_train,X_valid,y_train,y_valid = train_test_split(X_train_full,y_train_full,random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.fit_transform(X_valid)
X_test = scaler.fit_transform(X_test)

# Hyperparameter Tuning

In [4]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [5]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [6]:
# Regresion model
def build_model(n_hidden=1, n_neurons=30,learning_rate=3e-3,input_shape=[8]):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons,activation='relu'))
    model.add(keras.layers.Dense(1))
    optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
    model.compile(loss='mse',optimizer=optimizer)
    return model


In [7]:
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)

  keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)


In [9]:
keras_reg.fit(X_train,y_train,epochs=100,
              validation_data=[X_valid,y_valid],
              callbacks=[keras.callbacks.EarlyStopping(patience=10)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1b98e7ceee0>

In [10]:
mse_test = keras_reg.score(X_test,y_test)



In [12]:
X_new = X_test[:3]
y_pred = keras_reg.predict(X_new)
y_pred



array([0.64688206, 1.642302  , 4.772844  ], dtype=float32)

In [13]:
y_test[:3]

array([0.477  , 0.458  , 5.00001])

In [16]:
y_pred = keras_reg.predict(X_test)



In [18]:
from sklearn.metrics import r2_score
r2_score(y_pred,y_test)

0.6612818771164024

In [38]:
# hyperparameter tuning used randomizedSearchCV
# digunakan untuk sebuah randomisasi dalam format yang ter scale 
# https://docs.scipy.org/doc/scipy-1.2.1/reference/generated/scipy.stats.reciprocal.html
from scipy.stats import reciprocal
# sama kayak reciprocal cuma format standarnya berbentuk exponensial
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.expon.html 
from scipy.stats import expon
# reciprocal(3e-4, 3e-2).rvs(1000).tolist()
reciprocal(90,100).rvs(1000).tolist()

[96.61036220656865,
 97.88060632725822,
 92.40666400403123,
 96.12474044941891,
 95.58817519130913,
 98.25411258886575,
 99.01540759060205,
 90.1153497908005,
 96.6237680060932,
 90.49287486699858,
 95.35794968563704,
 92.76921065285431,
 92.95651573338,
 93.40992101020801,
 96.08847322344373,
 93.22401376896615,
 97.22299136009003,
 93.9188286551209,
 90.65049509184983,
 97.74744021720154,
 92.75089163557213,
 94.19868406742253,
 96.74013775071772,
 93.20836048854954,
 90.53817312339861,
 93.61645643649642,
 99.41641690301806,
 96.29564912378136,
 96.59790558679582,
 96.19979667257857,
 91.90685189127967,
 94.05554781781616,
 97.41001947527522,
 90.96641487528532,
 92.67366855332948,
 92.65869480931258,
 94.19126106945197,
 99.79337780043105,
 90.64237087767941,
 95.055437551229,
 91.71698867346237,
 99.69152334033319,
 91.08083291389114,
 93.90966222911773,
 97.27612780880129,
 96.93511994599605,
 94.09910896241736,
 93.3466175581908,
 93.85042725628963,
 92.54120027138087,
 91.96827

In [39]:
expon(10,20).rvs(1000).tolist()

[26.97244906536377,
 42.73950510194796,
 38.55574242691,
 13.34235220819778,
 13.232726846354243,
 16.244260013969708,
 18.959355481473118,
 20.50037007107788,
 32.769770663103024,
 11.167003390427585,
 10.705761370840593,
 19.948666148259477,
 33.89125715066162,
 14.299424294130878,
 30.516782380760525,
 16.017457474444655,
 53.44624912940855,
 55.20775019978245,
 17.05614023175558,
 15.227133048625124,
 20.59690620306006,
 15.50273089236741,
 32.31825219635884,
 44.981435983227506,
 32.36153787377161,
 44.785288930532,
 20.11650168683739,
 13.399568252751191,
 36.784471381437854,
 18.940575837911872,
 32.25042088402471,
 16.311864928669912,
 11.694381250078765,
 108.07636461135083,
 13.396831327292032,
 99.17123290037557,
 85.69013434215007,
 41.57993018123753,
 31.542278591820526,
 27.245856850986517,
 50.21347135269835,
 16.83402428388424,
 22.610254889028177,
 29.319599393525102,
 20.59305072409802,
 21.15437765711922,
 18.018050675033187,
 26.612897333471885,
 48.01915051205327,
