In [1]:
import tensorflow as tf
import numpy as np 
from sklearn.datasets import fetch_california_housing # for getting data
from sklearn.model_selection import train_test_split # split data
from IPython.display import clear_output, Image, display, HTML

In [2]:
###### Do not modify here ###### 
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = graph_def
    #strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
###### Do not modify  here ######

In [29]:
### Get data and set boundary
housing = fetch_california_housing()

# data = housing.data
data = np.delete(housing.data, [7], axis=1)
# features = housing.feature_names
features = housing.feature_names[:-1]
print(features)

test_size = 0.1 # param for test size

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude']


In [30]:
### Preprocessing, set training and test data
def feature_normalize(data):
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    norm_data = (data - mean) /std
    # add an extra column to X_data as biases
    return add_column(norm_data)

# add a column to the last column
def add_column(data):
    new_data = np.ones((data.shape[0], data.shape[1] + 1))
    new_data[:, :-1] = data
    return new_data
    
# Split data into training and test data with test size 0.1
X_train, X_test, y_train, y_test = train_test_split(np.float32(feature_normalize(data)),
                                                    np.float32(housing.target),
                                                    test_size=0.1)

# transform (size, ) to (size, 1)
y_train = y_train.reshape([len(y_train), 1])
y_test = y_test.reshape([len(y_test), 1])

In [31]:
### Create TF structure
X = tf.placeholder(tf.float32, [None, len(features) + 1], name='X_train')
y = tf.placeholder(tf.float32, [None, 1], name='y_train')

# set transpose and multiplication first
X_trans = tf.transpose(X, name='transpose_X')
X_square = tf.matmul(X_trans, X, name='square_X')

# Use name_scope to encapsulate the formula, name it as Weights
with tf.name_scope('Weights'):
    # (X^T*X)^-1 * X * y
    Weights = tf.matmul(tf.matmul(tf.matrix_inverse(X_square), X_trans), y)



In [32]:
###### Start TF session ######
# set tf config 
config = tf.ConfigProto(device_count = {'GPU': 1})
config.gpu_options.allow_growth = True
sess = tf.Session()

# Weights = sess.run(Weights)
# print("Weights: ", Weights)

# Get y_ by multiplying X by W, the last column is bias
# W = tf.placeholder(tf.float32, [feature_len + 1, 1], name='Weights')
with tf.name_scope('Predict_Y'):
    y_predict = tf.matmul(X_test, Weights) 

with tf.name_scope('Get_error_rate'):
    error_rate = tf.reduce_mean(tf.abs(y_predict - y_test)/ y_test)

err = sess.run(error_rate, feed_dict={X: X_train, y: y_train})


print("error rate: %.4f" % err)

show_graph(tf.get_default_graph().as_graph_def())
'''
in order to show a more concise graph, we use tf.name_scope to encapsulate the formula.
As what you see, the graph contains a few cells.
X_train, y_train are placeholder with its name
X_trans is transpose of X by using tf.transpose, whose structure is written by tensorflow.
X_square is multiplication of X and X_trans, obtained from tf.transpose
Weights cell is the most important part. When you expand it, 
you can see the illustration of the formula.
First, inverse the square_X by tf.inverse
Second, multiply it by transpose of X
Last, multiply it by y_train, which is the labels
Therefore, the result is the Weights

Because we try to establish the connection between training and prediction, we run the session in the last step.
After getting the Weights, we can use that to predict our y_test and calculate error rate.
As a result, we directly multiply Weight by X_test, and get the y_predict.
Comparing y_predict with y, we use error rate, which you can see in the Get_error_rate cell.

Before training, we first normalize the data, in case that Weights become too large as Nan.
Moreover, add biases to adjust the result.
For the better performance and expression, combine the biases with Weights, which means there is a column denoted bias.
'''
###### Start TF session ######

error rate: 0.3403


'\nin order to show a more concise graph, we use tf.name_scope to encapsulate the formula.\nAs what you see, the graph contains a few cells.\nX_train, y_train are placeholder with its name\nX_trans is transpose of X by using tf.transpose, whose structure is written by tensorflow.\nX_square is multiplication of X and X_trans, obtained from tf.transpose\nWeights cell is the most important part. When you expand it, \nyou can see the illustration of the formula.\nFirst, inverse the square_X by tf.inverse\nSecond, multiply it by transpose of X\nLast, multiply it by y_train, which is the labels\nTherefore, the result is the Weights\n\nBecause we try to establish the connection between training and prediction, we run the session in the last step.\nAfter getting the Weights, we can use that to predict our y_test and calculate error rate.\nAs a result, we directly multiply Weight by X_test, and get the y_predict.\nComparing y_predict with y, we use error rate, which you can see in the Get_error

In [None]:
##### Draw the picture
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
print(y_predict.shape)
print(y_test.)
ax.scatter(y_test, y_predict)
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=3)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

In [None]:
# clear tf graph in case of overlapped
tf.reset_default_graph()
sess.close()

In [None]:
##### Draw the picture
# Because connect all the formulas, we can't get y_predict
import matplotlib.pyplot as plt
fig, ax = plt.subplots()

print(housing.feature_names)
all_features = feature_normalize(housing.data)
all_prices = housing.target
feature_names = housing.feature_names

X = all_features.T
Y = all_prices
# T = np.arctan2(Y.reshape(10, 1), X) # for color value

markers = [plt.scatter(X[i], Y, s=1, alpha=.5) for i in range(len(feature_names))]

plt.xlim(-2.5, 4)
plt.ylabel('price')
plt.legend(markers,
           feature_names,
           scatterpoints=4,
           bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,
           ncol=3,                        
           fontsize=12)
plt.show()