In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline
np.random.seed(1)

In [2]:
def load_dataset():
    data_file = "./processed_data.csv"
    df = pd.read_csv(data_file, index_col=0)    
    columns = df.shape[1]
    
    X_train = (df.values[:10000, 0: columns-1]).T
    Y_train = (df.values[:10000, -1]).T
    X_dev = (df.values[10000: 12690, 0: columns-1]).T
    Y_dev = (df.values[10000: 12690, -1]).T
    X_test = (df.values[12690:, 0: columns-1]).T
    
    return X_train, Y_train.reshape((1, -1)), X_dev, Y_dev.reshape((1, -1)), X_test

In [3]:
def create_placeholders(n_x, n_y):
    X = tf.placeholder(shape=(n_x, None), name="X", dtype=tf.float32)
    Y = tf.placeholder(shape=(n_y, None), name="Y", dtype=tf.float32)
    
    return X, Y

In [4]:
def initialize_parameters():
    parameters = {}
    W1 = tf.get_variable(shape=(50, 31), name="W1", initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable(shape=(50, 1), name="b1", initializer=tf.zeros_initializer())
    W2 = tf.get_variable(shape=(100, 50), name="W2", initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b2 = tf.get_variable(shape=(100, 1), name="b2", initializer=tf.zeros_initializer())
    W3 = tf.get_variable(shape=(1, 100), name="W3", initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b3 = tf.get_variable(shape=(1, 1), name="b3", initializer=tf.zeros_initializer())
    parameters = {
        "W1": W1,
        "W2": W2,
        "W3": W3,
        "b1": b1,
        "b2": b2,
        "b3": b3
    }
    return parameters

In [5]:
def forward_propagation(X, parameters):
    Z1 = tf.matmul(parameters["W1"], X) + parameters["b1"]
    A1 = tf.nn.relu(Z1)
    Z2 = tf.matmul(parameters["W2"], A1) + parameters["b2"]
    A2 = tf.nn.relu(Z2)
    Z3 = tf.matmul(parameters["W3"], A2) + parameters["b3"]
    A3 = tf.nn.relu(Z3)
    
    return A3

In [6]:
def compute_cost(Z3, Y):
    predictions = tf.transpose(Z3)
    labels = tf.transpose(Y)
    cost = tf.losses.mean_squared_error(labels=labels, predictions=predictions)
    
    return cost

In [7]:
def random_mini_batches(X, Y, batch_size, seed):
    dataset = tf.data.Dataset.from_tensor_slices((X.T, Y.T)).shuffle(100, seed=seed).batch(batch_size)
    
    return dataset

In [8]:
def model(X_train, Y_train, X_dev, Y_dev, X_test, num_epochs=20000, mini_batch_size=32, learning_rate=0.001):
    n_x, m = X_train.shape
    n_y, _ = Y_train.shape

    X, Y = create_placeholders(n_x, n_y)
    parameters = initialize_parameters()
    A3 = forward_propagation(X, parameters)
    cost = compute_cost(A3, Y)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        sess.run(init)
        epoch_cost = 0
        for epoch in range(num_epochs):
            _, epoch_cost = sess.run([optimizer, cost], feed_dict={X: X_train, Y: Y_train})

            if epoch % 100 == 0:
                print("Cost after {} epochs: {}".format(epoch, epoch_cost))
        
        parameters = sess.run(parameters)
        accuracy = 1 - tf.losses.mean_squared_error(predictions=A3, labels=Y_dev)
        print("Accuracy on dev set: {}".format(sess.run(accuracy, feed_dict={X: X_dev})))
        
        saved_path = saver.save(sess, "./model/model.ckpt")
        print("Model saved in {}".format(saved_path))
        
        predictions = sess.run(A3, feed_dict={X: X_test}) * 10000
        
    return predictions, parameters

In [9]:
predictions, parameters = model(*load_dataset())

W0701 20:30:08.866951 4621616576 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

W0701 20:30:09.014090 4621616576 deprecation.py:323] From /anaconda3/envs/deep-learning/lib/python3.6/site-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Cost after 0 epochs: 0.0075616068206727505
Cost after 100 epochs: 0.0017182441661134362
Cost after 200 epochs: 0.0011704829521477222
Cost after 300 epochs: 0.0009582120110280812
Cost after 400 epochs: 0.0008270435500890017
Cost after 500 epochs: 0.0007324058096855879
Cost after 600 epochs: 0.000670223671477288
Cost after 700 epochs: 0.0006214824388734996
Cost after 800 epochs: 0.0005754194571636617
Cost after 900 epochs: 0.00054593087406829
Cost after 1000 epochs: 0.0005219418671913445
Cost after 1100 epochs: 0.0005017521907575428
Cost after 1200 epochs: 0.0004916427424177527
Cost after 1300 epochs: 0.00047222705325111747
Cost after 1400 epochs: 0.00046763542923144996
Cost after 1500 epochs: 0.00047286361223086715
Cost after 1600 epochs: 0.000443457713117823
Cost after 1700 epochs: 0.0004420379118528217
Cost after 1800 epochs: 0.00043257392826490104
Cost after 1900 epochs: 0.0004177750670351088
Cost after 2000 epochs: 0.00042199130984954536
Cost after 2100 epochs: 0.0004050049756187945

Cost after 17600 epochs: 0.0003307885199319571
Cost after 17700 epochs: 0.0002602778549771756
Cost after 17800 epochs: 0.00025767445913515985
Cost after 17900 epochs: 0.00029383646324276924
Cost after 18000 epochs: 0.0002626281639095396
Cost after 18100 epochs: 0.00031044473871588707
Cost after 18200 epochs: 0.0002662832266651094
Cost after 18300 epochs: 0.000255337858106941
Cost after 18400 epochs: 0.0002643192419782281
Cost after 18500 epochs: 0.00026181346038356423
Cost after 18600 epochs: 0.00025526454555802047
Cost after 18700 epochs: 0.00027145861531607807
Cost after 18800 epochs: 0.0002794539323076606
Cost after 18900 epochs: 0.0002562327135819942
Cost after 19000 epochs: 0.0002849460288416594
Cost after 19100 epochs: 0.00025313725927844644
Cost after 19200 epochs: 0.0002575516700744629
Cost after 19300 epochs: 0.0002567005867604166
Cost after 19400 epochs: 0.00027547136414796114
Cost after 19500 epochs: 0.00026542844716459513
Cost after 19600 epochs: 0.0002519125700928271
Cost 

In [10]:
df = pd.DataFrame({"COST": np.squeeze(predictions)})

In [11]:
df.to_csv("predicted_costs.csv")