# Simple Housing Classification using TensorFlow
#### Author: Tien Dinh

This is a simple project that applies TensorFlow into classifying which house is good and which is bad.

Import pandas to read from data file

Import TensorFlow for all the magic

In [1]:
import pandas as pd
import tensorflow as tf

First we use pandas' read_csv function to read from our input file

Then we are going to drop all the features that we don't need such as the prices

Then we choose a specific set of data to train on, we are not going to train our model on all of the given data

In [5]:
dataframe = pd.read_csv('data.csv')
dataframe = dataframe.drop(['index', 'price', 'sq_price'], axis = 1) # axis means we are dropping the whole column.
dataframe = dataframe[0:10]
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


Now we need to decide which house is good and which is bad

We do that by adding another two column into our reduced datasets

In [6]:
dataframe.loc[:, 'y1'] = [1,1,0,1,0,1,0,0,1,1]
dataframe.loc[:, 'y2'] = dataframe['y1'] == 0
dataframe['y2'] = dataframe['y2'].astype(int)     # converting from True and False into raw values of 0s and 1s
dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,0,1
3,1416.0,2.0,1,0
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,0,1
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


We then convert the input and output into matrices

In [15]:
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
inputY = dataframe.loc[:, ['y1', 'y2']].as_matrix()

## Step 1. Creating hyperparameters

Learning rate is how fast the network should learn, this should be a small value because if it is too large then the network will never be optimized.
Learning epoch is how many times the network should learn.

In [32]:
learning_rate = 0.000001
training_epochs = 3000
display_step = 50
n_samples = inputY.size

## Step 2. Creating variables
Creating placeholders for input and output matrices, as well as initial weights and biases

In [20]:
x = tf.placeholder(tf.float32, [None, 2])
W = tf.Variable(tf.zeros([2, 2]))
b = tf.Variable(tf.zeros([2]))
y_values = tf.add(tf.matmul(x, W), b)
y = tf.nn.softmax(y_values)                  # The sigmoid function, used to normalize the values to between 0 and 1
y_ = tf.placeholder(tf.float32, [None, 2])

In [21]:
cost = tf.reduce_sum(tf.pow((y_ - y), 2))/(2 * n_samples)                   # Calculating the error
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) # Using Gradient Descent to minimize error

## Step 3. Creating and Run Session

In [22]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

## Step 4. Begin Training

In [33]:
for i in range(training_epochs):
    sess.run(optimizer, feed_dict = {x: inputX, y_: inputY})
    
    if i % display_step == 0:
        cc = sess.run(cost, feed_dict = {x: inputX, y_: inputY})
        print 'Training step:', '%04d' % i, 'cost =', '{:.9f}'.format(cc)
print 'Optimization Finished!'
training_cost = sess.run(cost, feed_dict = {x: inputX, y_: inputY})
print 'Training Cost:', training_cost, '\nW =', sess.run(W), '\nb =', sess.run(b)

Training step: 0000 cost = 0.124016687
Training step: 0050 cost = 0.124016583
Training step: 0100 cost = 0.124016479
Training step: 0150 cost = 0.124016359
Training step: 0200 cost = 0.124016263
Training step: 0250 cost = 0.124016151
Training step: 0300 cost = 0.124016047
Training step: 0350 cost = 0.124015942
Training step: 0400 cost = 0.124015830
Training step: 0450 cost = 0.124015734
Training step: 0500 cost = 0.124015614
Training step: 0550 cost = 0.124015510
Training step: 0600 cost = 0.124015406
Training step: 0650 cost = 0.124015287
Training step: 0700 cost = 0.124015190
Training step: 0750 cost = 0.124015093
Training step: 0800 cost = 0.124014974
Training step: 0850 cost = 0.124014877
Training step: 0900 cost = 0.124014758
Training step: 0950 cost = 0.124014661
Training step: 1000 cost = 0.124014542
Training step: 1050 cost = 0.124014452
Training step: 1100 cost = 0.124014333
Training step: 1150 cost = 0.124014236
Training step: 1200 cost = 0.124014117
Training step: 1250 cost 

### Train the model on the data

In [34]:
sess.run(y, feed_dict = {x: inputX})

array([[ 0.54832923,  0.4516708 ],
       [ 0.53717941,  0.46282056],
       [ 0.5548557 ,  0.44514433],
       [ 0.53264624,  0.46735376],
       [ 0.56847113,  0.43152887],
       [ 0.54615146,  0.45384854],
       [ 0.53571635,  0.46428367],
       [ 0.53334308,  0.46665695],
       [ 0.53230011,  0.46769992],
       [ 0.53482932,  0.46517068]], dtype=float32)