In [1]:
from typing import List
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.metrics import mean_squared_log_error


In [2]:
def load_data(mode: str, columns: List[str]):
    if mode == "train":
        data = pd.read_csv('../data/house-prices-advanced-regression-techniques/train.csv')
        return data[columns].to_numpy(), data['SalePrice'].to_numpy()
    else:
        data = pd.read_csv('../data/house-prices-advanced-regression-techniques/test.csv')
        return data[columns].to_numpy()


def randomize(x, y):
    """ Randomizes the order of data samples and their corresponding labels"""
    permutation = np.random.permutation(y.shape[0])
    return x[permutation, :], y[permutation]

In [3]:
features = ['LotArea', 'TotalBsmtSF']

input_layer = tf.placeholder(
    tf.float32,
    shape=[None, len(features)],
    name='input_layer_placeholder'
)

layer1_size = 200
layer1_weights = tf.get_variable(
    'layer1_weights',
    dtype=tf.float32,
    shape=[input_layer.get_shape()[1], layer1_size],
    initializer=tf.truncated_normal_initializer(stddev=0.01)
)
layer1_biases = tf.get_variable(
    'layer1_biases',
    dtype=tf.float32,
    initializer=tf.constant(0., shape=[layer1_size], dtype=tf.float32)
)

layer1 = tf.matmul(input_layer, layer1_weights)
layer1 = tf.add(layer1, layer1_biases)
layer1 = tf.nn.relu(layer1)


output_layer_size = 1
output_layer_weights = tf.get_variable(
    'output_layer_weights',
    dtype=tf.float32,
    shape=[layer1.get_shape()[1], output_layer_size],
    initializer=tf.truncated_normal_initializer(stddev=0.01)
)
output_layer_biases = tf.get_variable(
    'output_layer_biases',
    dtype=tf.float32,
    initializer=tf.constant(0., shape=[output_layer_size], dtype=tf.float32)
)
output_layer = tf.matmul(layer1, output_layer_weights)
output_layer = tf.add(output_layer, output_layer_biases)

In [4]:
output_layer_placeholder = tf.placeholder(
    tf.float32,
    shape=[None],
    name='output_layer_placeholder'
)
loss = tf.reduce_mean(tf.squared_difference(output_layer_placeholder, output_layer))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001, name='Adam-optimizer').minimize(loss)

In [5]:
init = tf.global_variables_initializer()

In [6]:
x_train, y_train = load_data('train', features)

In [7]:
BATCH_SIZE = 100
EPOCHS_NUMBER = 100

session = tf.InteractiveSession()

session.run(init)

iteration_number = len(y_train) // BATCH_SIZE
for epoch in range(EPOCHS_NUMBER):
    x_train, y_train = randomize(x_train, y_train)
    for batch in range(iteration_number):
        start = batch * BATCH_SIZE
        end = (batch + 1) * BATCH_SIZE
        x_batch = x_train[start:end]
        y_batch = y_train[start:end]
        session.run(
            optimizer,
            feed_dict={
                input_layer: x_batch,
                output_layer_placeholder: y_batch,
            },
        )

In [8]:
y_train_prediction = session.run(output_layer, feed_dict={input_layer: x_train})
y_train_prediction_accuracy = np.sqrt(mean_squared_log_error(y_train, y_train_prediction))

print(f'Train accuracy : {y_train_prediction_accuracy}')


Train accuracy : 0.5314663209533911


In [9]:
y_test_prediction = session.run(
    output_layer,
    feed_dict={input_layer: load_data('test', features)}
)

print(y_test_prediction[:5])

[[122864.98]
 [162402.73]
 [140477.86]
 [113432.75]
 [ 95294.49]]


In [10]:
session.close()
