In [33]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from matplotlib import pyplot as plt
import os

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [34]:
train_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv")
test_df = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_test.csv")

# Scale the labels
scale_factor = 1000.0
# Scale the training set's label.
train_df["median_house_value"] /= scale_factor
train_df = train_df.reindex(np.random.permutation(train_df.index))

# Scale the test set's label
test_df["median_house_value"] /= scale_factor
train_df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
14573,-122.16,37.76,36.0,2781.0,574.0,1438.0,519.0,2.4598,155.5
11607,-121.28,37.94,44.0,1406.0,357.0,1489.0,386.0,1.4688,56.8
1644,-117.24,34.59,4.0,5027.0,797.0,1869.0,686.0,3.5507,186.1
2886,-117.75,34.07,52.0,1548.0,348.0,1131.0,343.0,2.63,127.3
11833,-121.34,38.69,16.0,2686.0,516.0,1553.0,529.0,3.7857,112.7


In [35]:
feature_cols = []
resolution_in_degrees = 0.4

# merepresentasikan kolom-kolom yang akan digunakan sebagai feature
lat_col = tf.feature_column.numeric_column("latitude")

# bikin np.array dari latitude terkecil sampai terbesar dengan resolusi / interval 1
lat_boundaries = list(np.arange(int(min(train_df["latitude"])), int(max(train_df["latitude"])), resolution_in_degrees))

# bikin tempat untuk mewadahi latitude dengan bucket / binning sekian
latitude = tf.feature_column.bucketized_column(lat_col, boundaries=lat_boundaries)

long_col = tf.feature_column.numeric_column("longitude")
long_boundaries = list(np.arange(int(min(train_df["longitude"])), int(max(train_df["longitude"])), resolution_in_degrees))
longitude = tf.feature_column.bucketized_column(long_col, boundaries=long_boundaries)


# bikin crossed column antara lat & long
lat_x_long = tf.feature_column.crossed_column([latitude, longitude], hash_bucket_size=100)

# jadikan crossed column tadi sebagai indicator column (One-Hot Encoding)
crossed_feature = tf.feature_column.indicator_column(lat_x_long)

feature_cols.append(crossed_feature)
cross_feature_layer = layers.DenseFeatures(feature_cols)

In [36]:
def create_model(_learning_rate, _feature_layer):
    _model = tf.keras.Sequential()
    _model.add(_feature_layer)
    _model.add(layers.Dense(units=1, input_shape=(1,)))
    _model.compile(
        optimizer=tf.keras.optimizers.RMSprop(learning_rate=_learning_rate),
        loss="mean_squared_error",
        metrics=[tf.keras.metrics.RootMeanSquaredError()],
    )
    return _model

def train_model(_model, _dataset, _epochs, _batch_size, _label_name):
    _features = {
        name:np.array(value) for name, value in _dataset.items() if name != _label_name
    }
    _labels = np.array(_dataset[_label_name])
    _history = _model.fit(
        x=_features,
        y=_labels,
        batch_size=_batch_size,
        epochs=_epochs,
        shuffle=True,
        verbose=0,
    )
    _epochs_trained = _history.epoch
    _hist = pd.DataFrame(_history.history)
    _rmse = _hist["root_mean_squared_error"]

    return _epochs_trained, _rmse

def plot_loss(_epochs, _rmse):
    plt.figure()
    plt.xlabel("Epoch")
    plt.ylabel("Root Mean Squared Error")
    plt.plot(_epochs, _rmse, label="Loss")
    plt.legend()
    plt.ylim(_rmse.min()*0.95, _rmse.max()*1.05)
    plt.show()

In [37]:
learning_rate = 0.05
epochs = 30
batch_size = 100
label_name = "median_house_value"

model = create_model(learning_rate, cross_feature_layer)
epochs_trained, rmse = train_model(model, train_df, epochs, batch_size, label_name)
plot_loss(epochs_trained, rmse)

print("\nEvaluate against test dataset:")
test_features = {
    name:np.array(value) for name, value in test_df.items() if name != label_name
}
test_labels = np.array(test_df[label_name])
model.evaluate(test_features, test_labels, batch_size=batch_size)

Consider rewriting this model with the Functional API.


ValueError: in user code:

    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\keras\engine\training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\keras\engine\training.py:835 run_step  **
        outputs = model.train_step(data)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\keras\engine\training.py:787 train_step
        y_pred = self(x, training=True)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\keras\engine\base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\keras\engine\sequential.py:383 call
        outputs = layer(inputs, **kwargs)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\keras\engine\base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\keras\feature_column\dense_features.py:168 call  **
        tensor = column.get_dense_tensor(transformation_cache,
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:4303 get_dense_tensor
        return transformation_cache.get(self, state_manager)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:2460 get
        transformed = column.transform_feature(self, state_manager)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:4239 transform_feature
        id_weight_pair = self.categorical_column.get_sparse_tensors(
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:4097 get_sparse_tensors
        transformation_cache.get(self, state_manager), None)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:2460 get
        transformed = column.transform_feature(self, state_manager)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:4045 transform_feature
        ids_and_weights = key.get_sparse_tensors(transformation_cache,
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:2852 get_sparse_tensors
        input_tensor = transformation_cache.get(self, state_manager)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:2460 get
        transformed = column.transform_feature(self, state_manager)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:2774 transform_feature
        source_tensor = transformation_cache.get(self.source_column, state_manager)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:2460 get
        transformed = column.transform_feature(self, state_manager)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:2669 transform_feature
        input_tensor = transformation_cache.get(self.key, state_manager)
    C:\Users\Tenessine\.conda\envs\Pleiades\lib\site-packages\tensorflow\python\feature_column\feature_column_v2.py:2444 get
        raise ValueError('Feature {} is not in features dictionary.'.format(key))

    ValueError: Feature longitue is not in features dictionary.
