In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [4]:
# Load data from CSV file

data = pd.read_csv("kc_house_data.csv")
data.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [5]:
# Droping the columns which we are not interested at

X = data.drop(['id', 'price', 'date'],axis=1)
Y = data['price']

print(X.shape, Y.shape)
X.head()

(21613, 18) (21613,)


Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503


In [6]:
# Spliting data into train and test data and scaling them

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

x_scaler = MinMaxScaler(feature_range=(0, 1))
y_scaler = MinMaxScaler(feature_range=(0, 1))

x_train_scaled = x_scaler.fit_transform(x_train)
y_train_scaled = y_scaler.fit_transform(y_train)

x_test_scaled = x_scaler.transform(x_test)
y_test_scaled = y_scaler.transform(y_test)




In [7]:
# Neural network model 

# Parameters
learning_rate = 0.0001
epochs = 1000
display_step = 100

input_num = 18
output_num = 1

layer_1_nodes = 50
layer_2_nodes = 100
layer_3_nodes = 50


# Input Layer
with tf.variable_scope('input'):
    X = tf.placeholder(tf.float32, shape=(None, input_num))
    
# Layer 1
with tf.variable_scope('layer_1'):
    weights = tf.get_variable("weights1", shape=[input_num, layer_1_nodes], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases1", shape=[layer_1_nodes], initializer=tf.zeros_initializer())
    layer_1_output = tf.nn.relu(tf.matmul(X, weights) + biases)

# Layer 2
with tf.variable_scope('layer_2'):
    weights = tf.get_variable("weights2", shape=[layer_1_nodes, layer_2_nodes], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases2", shape=[layer_2_nodes], initializer=tf.zeros_initializer())
    layer_2_output = tf.nn.relu(tf.matmul(layer_1_output, weights) + biases)

# Layer 3
with tf.variable_scope('layer_3'):
    weights = tf.get_variable("weights3", shape=[layer_2_nodes, layer_3_nodes], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases3", shape=[layer_3_nodes], initializer=tf.zeros_initializer())
    layer_3_output = tf.nn.relu(tf.matmul(layer_2_output, weights) + biases)

# Output Layer
with tf.variable_scope('output'):
    weights = tf.get_variable("weights4", shape=[layer_3_nodes, output_num], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases4", shape=[output_num], initializer=tf.zeros_initializer())
    prediction = tf.matmul(layer_3_output, weights) + biases

    
# The cost function of the neural network

with tf.variable_scope('cost'):
    Y = tf.placeholder(tf.float32, shape=(None, 1))
    cost = tf.reduce_mean(tf.squared_difference(prediction, Y))

# The optimizer function

with tf.variable_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

In [8]:
#Initialize session

with tf.Session() as session:

    session.run(tf.global_variables_initializer())

    for epoch in range(epochs):

        # Feed in the training data 
        session.run(optimizer, feed_dict={
                                    X: x_train_scaled, 
                                    Y: y_train_scaled
                                })

        # Every 5 training steps, log our progress
        if epoch % display_step == 0:
            training_cost = session.run(cost, feed_dict={
                                    X: x_train_scaled, 
                                    Y: y_train_scaled
                                })

            print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(training_cost))

    print("Training finished!")

    testing_cost = session.run(cost, feed_dict={
                                    X: x_test_scaled, 
                                    Y: y_test_scaled
                                })
    print("Mean squared error of test data: ", "{:.9f}".format(testing_cost))
    

Epoch: 0001 cost= 0.002811852
Epoch: 0101 cost= 0.000855011
Epoch: 0201 cost= 0.000626880
Epoch: 0301 cost= 0.000515473
Epoch: 0401 cost= 0.000459255
Epoch: 0501 cost= 0.000417316
Epoch: 0601 cost= 0.000384033
Epoch: 0701 cost= 0.000356689
Epoch: 0801 cost= 0.000334096
Epoch: 0901 cost= 0.000313732
Training finished!
Mean squared error of test data:  0.000395173
