In [15]:
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()

In [16]:
print(housing['DESCR'])
print('\n'*3)
print('Keys:', ', '.join(housing.keys()))

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [17]:
features = pd.DataFrame(housing['data'], columns=housing['feature_names'])
target = pd.Series(housing['target'], name=housing['target_names'][0])

In [18]:
features.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [19]:
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   MedInc      20640 non-null  float64
 1   HouseAge    20640 non-null  float64
 2   AveRooms    20640 non-null  float64
 3   AveBedrms   20640 non-null  float64
 4   Population  20640 non-null  float64
 5   AveOccup    20640 non-null  float64
 6   Latitude    20640 non-null  float64
 7   Longitude   20640 non-null  float64
dtypes: float64(8)
memory usage: 1.3 MB


In [20]:
target.head()

0    4.526
1    3.585
2    3.521
3    3.413
4    3.422
Name: MedHouseVal, dtype: float64

In [21]:
scaler = StandardScaler()
features = pd.DataFrame(scaler.fit_transform(features), columns=features.columns, index=features.index)

features.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,2.344766,0.982143,0.628559,-0.153758,-0.974429,-0.049597,1.052548,-1.327835
1,2.332238,-0.607019,0.327041,-0.263336,0.861439,-0.092512,1.043185,-1.322844
2,1.782699,1.856182,1.15562,-0.049016,-0.820777,-0.025843,1.038503,-1.332827
3,0.932968,1.856182,0.156966,-0.049833,-0.766028,-0.050329,1.038503,-1.337818
4,-0.012881,1.856182,0.344711,-0.032906,-0.759847,-0.085616,1.038503,-1.337818


In [22]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=.2, random_state=42)
X_train.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
14196,-0.321654,0.346478,-0.166259,-0.190451,0.772251,0.059808,-1.367976,1.267645
8267,-0.03062,1.617807,-0.386181,-0.117472,-0.09844,-0.128306,-0.871699,0.703627
17445,0.150349,-1.957806,0.087641,-0.2354,-0.450778,-0.033453,-0.455012,-0.454356
14265,-1.014947,0.584852,-0.576442,-0.13267,-0.006602,0.08894,-1.37734,1.227714
2271,-0.166583,1.141059,0.339282,0.079205,-0.486983,-0.074203,0.537543,-0.114948


In [23]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=X_train.shape[1:]))
model.add(tf.keras.layers.Dense(32, activation='relu'))

model.add(tf.keras.layers.Dense(1))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 64)                576       
                                                                 
 dense_4 (Dense)             (None, 32)                2080      
                                                                 
 dense_5 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2689 (10.50 KB)
Trainable params: 2689 (10.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [36]:
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
model.fit(X_train, y_train, epochs=10, verbose=2)

model.evaluate(X_test, y_test, verbose=2)

Epoch 1/10
516/516 - 1s - loss: 0.2624 - mae: 0.3475 - 1s/epoch - 3ms/step
Epoch 2/10
516/516 - 1s - loss: 0.2726 - mae: 0.3471 - 807ms/epoch - 2ms/step
Epoch 3/10
516/516 - 1s - loss: 0.2606 - mae: 0.3452 - 790ms/epoch - 2ms/step
Epoch 4/10
516/516 - 1s - loss: 0.3167 - mae: 0.3469 - 1s/epoch - 2ms/step
Epoch 5/10
516/516 - 1s - loss: 0.2650 - mae: 0.3446 - 1s/epoch - 2ms/step
Epoch 6/10
516/516 - 1s - loss: 0.2678 - mae: 0.3443 - 1s/epoch - 2ms/step
Epoch 7/10
516/516 - 1s - loss: 0.2608 - mae: 0.3432 - 1s/epoch - 2ms/step
Epoch 8/10
516/516 - 1s - loss: 0.2613 - mae: 0.3432 - 815ms/epoch - 2ms/step
Epoch 9/10
516/516 - 1s - loss: 0.2597 - mae: 0.3416 - 765ms/epoch - 1ms/step
Epoch 10/10
516/516 - 1s - loss: 0.2552 - mae: 0.3419 - 743ms/epoch - 1ms/step
129/129 - 0s - loss: 0.2874 - mae: 0.3617 - 313ms/epoch - 2ms/step


[0.2873994708061218, 0.36171698570251465]

In [25]:
y_pred = model.predict(X_test.head(1))

print(y_pred[0][0], y_test.head(1).values[0])

0.5378594 0.477
