In [15]:
# Tensorflow: Library created by Google 
import pandas as pd # work with data as tables
import numpy as np # So we can use number matrices. Both pandas and TensorFlow need it. 
import matplotlib.pyplot as plt  # Visualize the things
import tensorflow as tf          # Fire from the gods

In [16]:
# step 1 load data

dataframe = pd.read_csv('data.csv') # dataframe
# we removed the features we don't care about
dataframe = dataframe.drop(["index", "price", "sq_price"], axis=1)
dataframe = dataframe.head(10) # we decided to use the first 10 columns 
# we could've also used = dataframe[0:10]
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [17]:
# step 2 - add labels
# 1 is good buy and 0 is bad buy
dataframe.loc[:, ('y1')] = [1,1,1,0,0,1,0,1,1,1]
# y2 is the negation of y1, opposite ( we don't like a house)
dataframe.loc[:, ('y2')] = dataframe['y1'] == 0
# turn TRUE/FALSE values into 1s and 0s
dataframe.loc[:, ('y2')] = dataframe['y2']. astype(int)
dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [18]:
# Step 3- prepare data for tensorflow
# Tensors: they are generic version of vectors and matrices
# vector - is a list of numbers (1D tensor)
# matrix - is a list of list of numbers (2D tensor)
# list of list of list of numbers (3D tensor).... and so on!!

# convert features into input tensors
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
# convert labels into input tensors
inputY = dataframe.loc[:, ['y1', 'y2']].as_matrix()

In [19]:
# Step 4 - Write out our hyperparameters
learning_rate = 0.000001 # how fast we reach convergence
# how many times we are going to train our model. 
# We start with this number then we can tune our hyperparameters
training_epochs = 2000
display_step = 50 # how often do we wanna display the process of training
n_samples = inputY.size # number of the sample

In [24]:
# Step 5 = Create our computation graph/neural network
x = tf.placeholder(tf.float32, [None, 2])   # Okay TensorFlow, we'll feed you an array of examples. Each example will
                                            # be an array of two float values (area, and number of bathrooms).
                                            # "None" means we can feed you any number of examples
                                            # Notice we haven't fed it the values yet
# create weights
W = tf.Variable(tf.zeros([2, 2]))           # 2x2 floar matrix, that we'll keep updating through the training
                                            # variables in tf hold and update parameters
                                            # in memory buffers containing tensors
                                            # Maintain a 2 x 2 float matrix for the weights that we'll keep updating 
                                            # through the training process (make them all zero to begin with)
# create biases 
b = tf.Variable(tf.zeros([2]))              #(example is b in y = mx+b. b here is the bias)
                                            # it makes the model fit better
# first calculation - multiply our weights by our inputs
y_values = tf.add(tf.matmul(x, W), b)       # Weights are how we govern how data flows in our computation graph
                                            # multiply inout by weights and add biases

# apply softmax to value we just created
y = tf.nn.softmax(y_values)                 # softmax is our activation function. It is the "sigmoid function"
                                            # It normalizes our value that translates the
                                            # numbers outputted by the previous layer into probability form
# feed in a matrix of labeles
y_ = tf.placeholder(tf.float32, [None, 2])

In [25]:
# Step 6 - PERFORM TRAINING
# Create our cost function, "mean squared error"
# reduce sum computes the sum of elements across dimensions of a tensor
cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [26]:
# initialize variables and tensorflow session
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [29]:
# Training Loop
for i in range(training_epochs):
    sess.run(optimizer, feed_dict={x:inputX, y_:inputY})

    # That's all! The rest of the cell just outputs debug messages. 
    # Display logs per epoch step
    if (i) % display_step == 0:
        cc = sess.run(cost, feed_dict={x: inputX, y_:inputY})
        print("Training step:", '%04d' % (i), "cost=", "{:.9f}".format(cc)) #, \"W=", sess.run(W), "b=", sess.run(b)

print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={x: inputX, y_: inputY})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')

Training step: 0000 cost= 0.109537281
Training step: 0050 cost= 0.109537207
Training step: 0100 cost= 0.109537147
Training step: 0150 cost= 0.109537080
Training step: 0200 cost= 0.109537005
Training step: 0250 cost= 0.109536931
Training step: 0300 cost= 0.109536871
Training step: 0350 cost= 0.109536804
Training step: 0400 cost= 0.109536745
Training step: 0450 cost= 0.109536670
Training step: 0500 cost= 0.109536603
Training step: 0550 cost= 0.109536529
Training step: 0600 cost= 0.109536454
Training step: 0650 cost= 0.109536394
Training step: 0700 cost= 0.109536313
Training step: 0750 cost= 0.109536268
Training step: 0800 cost= 0.109536193
Training step: 0850 cost= 0.109536126
Training step: 0900 cost= 0.109536052
Training step: 0950 cost= 0.109535977
Training step: 1000 cost= 0.109535910
Training step: 1050 cost= 0.109535851
Training step: 1100 cost= 0.109535791
Training step: 1150 cost= 0.109535709
Training step: 1200 cost= 0.109535649
Training step: 1250 cost= 0.109535575
Training ste

In [30]:
sess.run(y, feed_dict={x: inputX })

array([[ 0.71123087,  0.28876919],
       [ 0.66498965,  0.33501032],
       [ 0.73654455,  0.26345551],
       [ 0.64717317,  0.3528268 ],
       [ 0.78332442,  0.21667559],
       [ 0.70069939,  0.29930058],
       [ 0.65866619,  0.34133381],
       [ 0.64829433,  0.3517057 ],
       [ 0.64369309,  0.35630691],
       [ 0.65480596,  0.34519401]], dtype=float32)