In [6]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt

%matplotlib inline

In [14]:
SEED = 0
# Load and view the dataset
cal_housing = np.loadtxt('data/cal_housing.data', delimiter=',')
print("Dataset dimensions: " + str(cal_housing.shape)) # 20640 observations, 8 + 1 variables, last var is the dependent var
print("First row:")
print(cal_housing[0].view())

# Split into inputs and labels
X_data, Y_data = cal_housing[:,:8], cal_housing[:,-1] 
Y_data = (np.asmatrix(Y_data)).transpose()

# Shuffle data to break any symmetry
idx = np.arange(X_data.shape[0]) # no of rows
np.random.seed(SEED)
np.random.shuffle(idx)
print("\nShuffled incides: " + str(idx))
X_data, Y_data = X_data[idx], Y_data[idx] 

print("Check whether shuffling done correctly, next 2 rows should be the same:")
print(X_data[0].view())
print(cal_housing[20303].view())

# Read and divide data into test and train sets 
m = 3* X_data.shape[0] // 10
n = 7* X_data.shape[0] // 10
trainX, trainY = X_data[m:], Y_data[m:] # take the back 70% for trainset
testX, testY = X_data[:m], Y_data[:m] # take the front 30% for testset
print("\nSize of trainset: " + str(len(trainX)))
print("Size of testset: " + str(len(testX)))

# Normalize input
print("\nBefore normalization: " + str(trainX[0]))
trainX = (trainX- np.mean(trainX, axis=0))/ np.std(trainX, axis=0)
testX = (testX- np.mean(testX, axis=0))/ np.std(testX, axis=0)
print("Normalized: " + str(trainX[0]))

Dataset dimensions: (20640, 9)
First row:
[-1.2223e+02  3.7880e+01  4.1000e+01  8.8000e+02  1.2900e+02  3.2200e+02
  1.2600e+02  8.3252e+00  4.5260e+05]

Shuffled incides: [14740 10101 20566 ...  9845 10799  2732]
Check whether shuffling done correctly, next 2 rows should be the same:
[-117.05     32.58     22.     2101.      399.     1551.      371.
    4.1518]
[-1.1918e+02  3.4160e+01  1.2000e+01  4.6000e+02  1.0100e+02  4.0500e+02
  1.0300e+02  5.2783e+00  1.6740e+05]

Size of trainset: 14448
Size of testset: 6192

Before normalization: [-119.79    36.73    52.     112.      28.     193.      40.       1.975]
Normalized: [-0.11131397  0.51396387  1.8562098  -1.15358908 -1.2069376  -1.07186955
 -1.1994956  -1.00030408]


In [None]:
learning_rate = 1e-2
regularizer = tf.contrib.layers.l2_regularizer(scale=10e-6)
num_epoch = 500
def model_init_fn(inputs, is_training):
    hidden_layer_size, num_classes = 10, 6
    initializer = tf.variance_scaling_initializer(scale=1.0, seed=0)
    layers = [
        tf.layers.Dense(hidden_layer_size, activation=tf.nn.sigmoid,
                        kernel_regularizer=regularizer,
                        kernel_initializer=initializer),
        tf.layers.Dense(num_classes,
                        kernel_regularizer=regularizer,
                        kernel_initializer=initializer),
    ]
    model = tf.keras.Sequential(layers)
    return model(inputs)

def optimizer_init_fn():
    return tf.train.GradientDescentOptimizer(learning_rate)

errors, accuracies = train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epoch)
# print(errors, accuracies)

fig, [ax1, ax2] = plt.subplots(nrows=2, ncols=1)
ax1.plot(range(len(errors)), errors)
ax2.plot(range(len(accuracies)), accuracies)
plt.xlabel('epoch')
ax1.set_ylabel('Train Errors')
ax2.set_ylabel('Validation accuracy')
plt.show()