In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from sklearn import metrics, model_selection, ensemble, preprocessing
import matplotlib.pyplot as plt
import tensorflow as tf

df = pd.read_csv('all_treated_DB.csv', sep = '\t', index_col=0)

In [2]:
df = df.drop(df.index[df['total_annual_cost'] > 50000000])
df = df.drop(df.index[df['monthly_rent'] > 3000000])
df = df.drop(df.index[df['floor_plan'] > 9 ])
df = df.drop(df.index[df['floor'] > 70])
df = df.drop(df.index[df['surface'] > 320])
df = df.drop(df.index[(df['walk_min1'] > 40) ])


In [3]:
ward = pd.get_dummies(df['ward'])

In [4]:
df_o = df[['floor_plan', 'plan_DK','plan_L', 'plan_K','plan_S','age','story', 'floor','surface','walk_min1', 'monthly_rent']]
df_o = pd.merge(df_o, ward, left_index=True, right_index=True)
df_o = df_o.dropna()
target = df_o['monthly_rent']/100000
df_o.drop(['monthly_rent'], axis=1, inplace=True) 
data = df_o
data.head()

Unnamed: 0,floor_plan,plan_DK,plan_L,plan_K,plan_S,age,story,floor,surface,walk_min1,...,江戸川,江東,渋谷,港,目黒,練馬,荒川,葛飾,豊島,足立
0,1,0,0,0,0,6,2,1,4.33,1,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,47,5,5,9.72,5,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,47,5,5,9.72,5,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,37,9,3,17.7,4,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,37,9,7,22.88,4,...,0,0,0,0,0,0,0,0,0,0


In [5]:
df_scaler = preprocessing.StandardScaler().fit(df_o)

In [6]:
target_n = target
df_n = df_scaler.transform(df_o)

In [7]:
target_n = target_n.values.reshape(target_n.shape[0], 1)

In [8]:
# parameters
learning_rate = 0.001
n_hidden_1 = 64
n_hidden_2 = 32
n_hidden_3 = 16
n_input = df_n.shape[1]
n_class = 1
#keep_prob = 0.8

# declare placeholder
x = tf.placeholder(tf.float32, [None, n_input])
y_ = tf.placeholder(tf.float32, [None, 1])
keep_prob = tf.placeholder(tf.float32)

# declare variables
# len(df.columns) -> [None, 10]
weights = {
    'w1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, stddev=0.1), name="w1"),
    'w2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, stddev=0.1), name="w2"),
    'w3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], 0, stddev=0.1), name="w3"), 
    'w4': tf.Variable(tf.random_normal([n_hidden_3, n_class], 0, stddev=0.1), name="w4")     
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, stddev=0.1), name="b1"), 
    'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, stddev=0.1), name="b2"), 
    'b3': tf.Variable(tf.random_normal([n_hidden_3], 0, stddev=0.1), name="b3"), 
    'b4': tf.Variable(tf.random_normal([n_class], 0, stddev=0.1), name="b4")     
}

# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    #layer_1 = tf.nn.dropout(layer_1, keep_prob)

    # Hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    #layer_2 = tf.nn.dropout(layer_2, keep_prob)

    # Hidden layer with RELU activation
    layer_3 = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
    layer_3 = tf.nn.relu(layer_3)
    layer_3 = tf.nn.dropout(layer_3, keep_prob)

    # Output layer with linear activation
    out_layer = tf.matmul(layer_3, weights['w4']) + biases['b4']
    return out_layer

# train model
pred = multilayer_perceptron(x, weights, biases)

#cost = tf.reduce_mean(tf.squared_difference(tf.transpose(pred), y_))
cost = tf.reduce_mean(tf.squared_difference(pred, y_))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

In [9]:
data_train, data_test, label_train, label_test = \
    model_selection.train_test_split(df_n, target_n, test_size=0.2)

In [11]:
training_epochs = 1000
batch_size = 1000
total_len = data_train.shape[0]
display_step = 25

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(total_len/batch_size)
    for i in range(total_batch-1):
        batch_x = data_train[batch_size * i: batch_size *(i + 1) - 1]
        batch_y = label_train[batch_size * i: batch_size *(i + 1) - 1]
        _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5})
        avg_cost += c/total_batch

    # display
    if epoch % display_step == 0:
        accuracy = sess.run(cost, feed_dict={x:data_test, y_:label_test, keep_prob: 1.0})
        print(" Epoch:", '%04d' % (epoch), "cost=", "{:.9f}".format(avg_cost), "accuracy=", "{:.9f}".format(accuracy))

print("finish training")
accuracy = sess.run(cost, feed_dict={x:data_test, y_:label_test, keep_prob: 1.0})
print("Error on test data=", accuracy)


 Epoch: 0000 cost= 0.688060279 accuracy= 0.135817990
 Epoch: 0025 cost= 0.093425657 accuracy= 0.044741888
 Epoch: 0050 cost= 0.092176264 accuracy= 0.045852173
 Epoch: 0075 cost= 0.092760202 accuracy= 0.056625366
 Epoch: 0100 cost= 0.088423654 accuracy= 0.048128974
 Epoch: 0125 cost= 0.094658424 accuracy= 0.046530370
 Epoch: 0150 cost= 0.082987057 accuracy= 0.043260198
 Epoch: 0175 cost= 0.087892422 accuracy= 0.046107329
 Epoch: 0200 cost= 0.084583715 accuracy= 0.041807394
 Epoch: 0225 cost= 0.085510893 accuracy= 0.042212840
 Epoch: 0250 cost= 0.077910864 accuracy= 0.046488255
 Epoch: 0275 cost= 0.093557172 accuracy= 0.047465779
 Epoch: 0300 cost= 0.088881469 accuracy= 0.042632181
 Epoch: 0325 cost= 0.083123201 accuracy= 0.039136462
 Epoch: 0350 cost= 0.081035382 accuracy= 0.044379782
 Epoch: 0375 cost= 0.083920682 accuracy= 0.042366479
 Epoch: 0400 cost= 0.086444777 accuracy= 0.042770538
 Epoch: 0425 cost= 0.084537391 accuracy= 0.046347030
 Epoch: 0450 cost= 0.086975713 accuracy= 0.043

In [12]:
saver = tf.train.Saver(var_list={"b1": biases['b1'], "b2": biases['b2'], "b3": biases['b3'], "b4": biases['b4'],  \
                                 "w1": weights['w1'], "w2": weights['w2'], "w3":weights['w3'], "w4": weights['w4']})
saver.save(sess, "all_tf.ckpt")


'all_tf.ckpt'

In [13]:
target_pre = sess.run(pred, feed_dict={x: data_test, keep_prob: 1.0})
target_pre
# target_pre = target_pre * (target.max() - target.min())  + target.min()


# test_reindex = pd.merge(df, pd.DataFrame(target_pre.reshape(-1)), left_index=True, right_index=True)
# # # test_reindex = test_reindex.sort_values(by='monthly_rent').reset_index()

# # # del test_reindex['index']

# test_reindex.head()

array([[ 1.18864691],
       [ 0.80745316],
       [ 0.774248  ],
       ..., 
       [ 1.04782319],
       [ 1.2896986 ],
       [ 0.80128765]], dtype=float32)

In [14]:
residual_RF = (label_test*100000 - target_pre*100000)**2
residual_RF_total = residual_RF.sum()
#print(mean_absolute_error(Y, clf.predict(X)))
print(residual_RF_total)

1.21746063276e+13
