In [55]:
import tensorflow as tf
import numpy as np 
from sklearn.datasets import fetch_california_housing
from IPython.display import clear_output, Image, display, HTML
import math

###### Do not modify here ###### 
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = graph_def
    #strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
###### Do not modify  here ######

###### Implement Data Preprocess here ######
housing = fetch_california_housing()
print("Shape of dataset:", housing.data.shape)
print("Shape of label:", housing.target.shape)

# Split training and test set
training_X = housing.data[:int(housing.data.shape[0]*0.9)]
training_y = housing.target[:int(housing.target.shape[0]*0.9)]
testing_X = housing.data[int(housing.data.shape[0]*0.9):]
testing_y = housing.target[int(housing.target.shape[0]*0.9):]

# Preprocessing: 

# Padding ones for the bias
training_X = np.concatenate((training_X, np.ones((training_X.shape[0], 1))), axis = 1)
testing_X = np.concatenate((testing_X, np.ones((testing_X.shape[0], 1))), axis = 1)

# Reshape y
training_y = np.reshape(training_y, (training_y.shape[0], 1))
testing_y = np.reshape(testing_y, (testing_y.shape[0], 1))

###### Implement Data Preprocess here ######

# Getting params
n_samples = training_X.shape[0]
n_dim = training_X.shape[1]

# Reset graph for each run of the code
tf.reset_default_graph()

# Declare input placeholders
X_train = tf.placeholder(tf.float64, [None, n_dim], name="X_train")
y_train = tf.placeholder(tf.float64, [None, 1], name="y_train")

X_test = tf.placeholder(tf.float64, [None, n_dim], name="X_test")
y_test = tf.placeholder(tf.float64, [None, 1], name="y_test")


# Calculating the weight using only training set
w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.matrix_transpose(X_train), X_train, name='XtX')), tf.matrix_transpose(X_train)), y_train, name='w')

# Prediction result
y_hat_train = tf.matmul(X_train, w, name='y_hat')
y_hat_test = tf.matmul(X_test, w, name='y_hat')

# Error rate
error_rate_train = tf.reduce_mean(tf.abs(y_hat_train - y_train)/y_train)
error_rate_test = tf.reduce_mean(tf.abs(y_hat_test - y_test)/y_test)

init = tf.global_variables_initializer()


###### Start TF session ######
with tf.Session() as sess:
    
    sess.run(init)
    
    show_graph(tf.get_default_graph().as_graph_def())
    
    # Compute w
    w_res, e_train, e_test = sess.run([w, error_rate_train, error_rate_test], feed_dict = {X_train:training_X, y_train: training_y, X_test: testing_X, y_test: testing_y})
    
    # Compute error rates on training and testing sets
    print('Result weights:', w_res)
    print('Error rate in training: ', e_train)
    print('Error rate in testing: ', e_test)

###### Start TF session ######

Shape of dataset: (20640, 8)
Shape of label: (20640,)


Result weights: [[  4.42440871e-01]
 [  9.84845976e-03]
 [ -1.13854700e-01]
 [  6.81118426e-01]
 [ -1.69407374e-06]
 [ -5.61851833e-03]
 [ -4.31983935e-01]
 [ -4.46590962e-01]
 [ -3.80329231e+01]]
Error rate in training:  0.316859910108
Error rate in testing:  0.344205405992


In [2]:
housing.data[0]

array([   8.3252    ,   41.        ,    6.98412698,    1.02380952,
        322.        ,    2.55555556,   37.88      , -122.23      ])

In [3]:
housing.target[0]

4.5259999999999998

In [4]:
housing.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [13]:
n_samples

18576

In [10]:
training_X[10]

array([   3.2031    ,   52.        ,    5.47761194,    1.07960199,
        910.        ,    2.26368159,   37.85      , -122.26      ,    1.        ])

In [5]:
training_X.shape

(18576, 9)

In [46]:
testing_y.shape

(2064, 8)