In [9]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.neural_network import MLPRegressor
import matplotlib.pyplot as plt

bike = pd.read_csv("/Users/apple/Downloads/SupML/Project/Bike-Sharing-Dataset/hour.csv")
bike_new = bike.drop(['instant', 'dteday', 'casual', 'registered'], axis=1)

In [10]:
def split(X, prop = [0.6,0.2,0.2], shuffle=False):
    df_index = np.arange(X.shape[0])
    if shuffle==True:
        np.random.shuffle(df_index)
    cut1 = int(np.floor(X.shape[0]*prop[0]))
    cut2 = int(np.floor(X.shape[0]*(prop[0]+prop[1])))
    train_index = df_index[:cut1]
    val_index = df_index[cut1:cut2]
    test_index = df_index[cut2:]
    X_train = X.iloc[train_index]
    X_val = X.iloc[val_index]
    X_test = X.iloc[test_index]
    return [X_train, X_val, X_test]

In [11]:
bike_train, bike_val, bike_test = split(bike_new, [0.6,0.2,0.2])

bike_train_X = np.array(bike_train.drop(['cnt'], axis=1))
bike_train_Y = np.array(bike_train.loc[:,'cnt'])
bike_train_Y.shape = (bike_train_Y.shape[0], 1)

bike_val_X = np.array(bike_val.drop(['cnt'], axis=1))
bike_val_Y = np.array(bike_val.loc[:,'cnt'])
bike_val_Y.shape = (bike_val_Y.shape[0], 1)

bike_test_X = np.array(bike_test.drop(['cnt'], axis=1))
bike_test_Y = np.array(bike_test.loc[:,'cnt'])
bike_test_Y.shape = (bike_test_Y.shape[0], 1)


In [15]:
# Gradient descent optimizer
tf.set_random_seed(250)
seed=142
learning_rate_1 = 0.001
learning_rate_2 = 0.0001
training_epochs = 1400
batch_size = 128
beta = 5
percent = 75

X = tf.placeholder(tf.float32, [None, 12], name="X")
Y = tf.placeholder(tf.float32, [None, 1], name="Y")

total_len = bike_train_X.shape[0]
n_input = bike_train_X.shape[1]
n_hidden_1 = 20
n_hidden_2 = 25
n_hidden_3 = 25
n_hidden_4 = 20
n_output = 1
    
weights = {
    'w1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.001, seed=seed)),
    'w2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.001, seed=seed)),
    'w3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], 0, 0.001, seed=seed)),
    'w4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], 0, 0.001, seed=seed)),
    'w_out': tf.Variable(tf.random_normal([n_hidden_4, n_output], 0, 0.001, seed=seed))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.001, seed=seed)),
    'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.001, seed=seed)),
    'b3': tf.Variable(tf.random_normal([n_hidden_3], 0, 0.001, seed=seed)),
    'b4': tf.Variable(tf.random_normal([n_hidden_4], 0, 0.001, seed=seed)),
    'b_out': tf.Variable(tf.random_normal([n_output], 0, 0.001, seed=seed))
}

def deep_nn(X, weights, biases):
    
    layer_1 = tf.add(tf.matmul(X, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)

    layer_3 = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
    layer_3 = tf.nn.relu(layer_3)

    layer_4 = tf.add(tf.matmul(layer_3, weights['w4']), biases['b4'])
    layer_4 = tf.nn.relu(layer_4)

    out_layer = tf.matmul(layer_4, weights['w_out']) + biases['b_out']
    return out_layer

pred = deep_nn(X, weights, biases)

reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
cost = tf.sqrt(tf.reduce_mean(tf.square(pred-Y))) + beta * tf.sqrt(tf.reduce_sum(tf.square(reg_losses)))
true_cost = tf.sqrt(tf.reduce_mean(tf.square(pred-Y)))
optimizer_1 = tf.train.GradientDescentOptimizer(learning_rate=learning_rate_1).minimize(cost)
optimizer_2 = tf.train.GradientDescentOptimizer(learning_rate=learning_rate_2).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # Training cycle
    print("training:")
    
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(total_len/batch_size)
        # Loop over all batches
        if ((epoch*100/training_epochs) < percent):
            for i in range(total_batch-1):
                batch_x = bike_train_X[i*batch_size:(i+1)*batch_size]
                batch_y = bike_train_Y[i*batch_size:(i+1)*batch_size]
            # Run optimization op (backprop) and cost op (to get loss value)
            
                _, c, p = sess.run([optimizer_1, cost, pred], feed_dict={X: batch_x,
                                                          Y: batch_y})
            # Compute average loss
                avg_cost += c / total_batch
        else:
            for i in range(total_batch-1):
                batch_x = bike_train_X[i*batch_size:(i+1)*batch_size]
                batch_y = bike_train_Y[i*batch_size:(i+1)*batch_size]
            # Run optimization op (backprop) and cost op (to get loss value)
            
                _, c, p = sess.run([optimizer_2, cost, pred], feed_dict={X: batch_x,
                                                          Y: batch_y})
                avg_cost += c / total_batch
            
        if epoch % 25 == 0:
            print(avg_cost)
    
    _, c, train_pred = sess.run([optimizer_1, true_cost, pred], feed_dict={X: bike_train_X,
                                                          Y: bike_train_Y})
    print("Training set:", c)
    
    _, c, val_pred = sess.run([optimizer_1, true_cost, pred], feed_dict={X: bike_val_X,
                                                          Y: bike_val_Y})
    print("Validation error:", c)
    
    _, c, test_pred = sess.run([optimizer_1, true_cost, pred], feed_dict={X: bike_test_X,
                                                          Y: bike_test_Y})
    
    print("Test error:", c)



training:
184.441672996
183.294072092


KeyboardInterrupt: 

In [15]:
nn_skl = MLPRegressor(hidden_layer_sizes=(30,30,30,30),activation='relu',
                     solver = 'sgd', alpha= 0.1, batch_size= 128, learning_rate_init=0.0001, max_iter=500)
nn_skl.fit(bike_train_X, np.ravel(bike_train_Y))

y_train_pred_skl = nn_skl.predict(X=bike_train_X)
y_val_pred_skl = nn_skl.predict(X=bike_val_X)
print("Train error for skl model:", str(np.sqrt(np.mean((y_train_pred_skl-bike_train_Y)**2))))
print("Validation error for skl model:", str(np.sqrt(np.mean((y_train_pred_skl-bike_val_Y)**2))))

Train error for skl model: 134.614377367
Validation error for skl model: 246.448758432


In [16]:
y_test_pred_skl = nn_skl.predict(X=bike_test_X)
print("Test error for skl model:", str(np.sqrt(np.mean((y_test_pred_skl-bike_test_Y)**2))))

Test error for skl model: 243.857236364


In [17]:
# Explained variance score for TF GD model
print("Exaplained variance for TF GD model: ", end="")
print(1 - (np.var(bike_test_Y - test_pred)/np.var(bike_test_Y)))
# Explained variance score for sklearn model
print("Exaplained variance for sklearn model: ", end="")
print(1 - (np.var(bike_test_Y - y_test_pred_skl)/np.var(bike_test_Y)))

Exaplained variance for TF model: 0.748166987567
Exaplained variance for sklearn model: -1.7763568394e-15


In [19]:
# Adam optimizer
tf.set_random_seed(250)
seed=142
learning_rate_1 = 0.01
learning_rate_2 = 0.0001
training_epochs = 700
batch_size = 128
beta = 5
percent = 65

X = tf.placeholder(tf.float32, [None, 12], name="X")
Y = tf.placeholder(tf.float32, [None, 1], name="Y")

total_len = bike_train_X.shape[0]
n_input = bike_train_X.shape[1]
n_hidden_1 = 20
n_hidden_2 = 20
n_hidden_3 = 20
n_hidden_4 = 20
n_output = 1
    
weights = {
    'w1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.001, seed=seed)),
    'w2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.001, seed=seed)),
    'w3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], 0, 0.001, seed=seed)),
    'w4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], 0, 0.001, seed=seed)),
    'w_out': tf.Variable(tf.random_normal([n_hidden_4, n_output], 0, 0.001, seed=seed))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.001, seed=seed)),
    'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.001, seed=seed)),
    'b3': tf.Variable(tf.random_normal([n_hidden_3], 0, 0.001, seed=seed)),
    'b4': tf.Variable(tf.random_normal([n_hidden_4], 0, 0.001, seed=seed)),
    'b_out': tf.Variable(tf.random_normal([n_output], 0, 0.001, seed=seed))
}

def deep_nn(X, weights, biases):
    
    layer_1 = tf.add(tf.matmul(X, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)

    layer_3 = tf.add(tf.matmul(layer_2, weights['w3']), biases['b3'])
    layer_3 = tf.nn.relu(layer_3)

    layer_4 = tf.add(tf.matmul(layer_3, weights['w4']), biases['b4'])
    layer_4 = tf.nn.relu(layer_4)

    out_layer = tf.matmul(layer_4, weights['w_out']) + biases['b_out']
    return out_layer

pred = deep_nn(X, weights, biases)

reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
cost = tf.sqrt(tf.reduce_mean(tf.square(pred-Y))) + beta * tf.sqrt(tf.reduce_sum(tf.square(reg_losses)))
true_cost = tf.sqrt(tf.reduce_mean(tf.square(pred-Y)))
optimizer_1 = tf.train.AdamOptimizer(learning_rate=learning_rate_1).minimize(cost)
optimizer_2 = tf.train.AdamOptimizer(learning_rate=learning_rate_2).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # Training cycle
    print("training:")
    
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(total_len/batch_size)
        # Loop over all batches
        if ((epoch*100/training_epochs) < percent):
            for i in range(total_batch-1):
                batch_x = bike_train_X[i*batch_size:(i+1)*batch_size]
                batch_y = bike_train_Y[i*batch_size:(i+1)*batch_size]
            # Run optimization op (backprop) and cost op (to get loss value)
            
                _, c, p = sess.run([optimizer_1, cost, pred], feed_dict={X: batch_x,
                                                          Y: batch_y})
            # Compute average loss
                avg_cost += c / total_batch
        else:
            for i in range(total_batch-1):
                batch_x = bike_train_X[i*batch_size:(i+1)*batch_size]
                batch_y = bike_train_Y[i*batch_size:(i+1)*batch_size]
            # Run optimization op (backprop) and cost op (to get loss value)
            
                _, c, p = sess.run([optimizer_2, cost, pred], feed_dict={X: batch_x,
                                                          Y: batch_y})
                avg_cost += c / total_batch
            
        if epoch % 25 == 0:
            print(avg_cost)
    
    _, c, train_pred = sess.run([optimizer_1, true_cost, pred], feed_dict={X: bike_train_X,
                                                          Y: bike_train_Y})
    print("Training set:", c)
    
    _, c, val_pred = sess.run([optimizer_1, true_cost, pred], feed_dict={X: bike_val_X,
                                                          Y: bike_val_Y})
    print("Validation error:", c)
    
    _, c, test_pred = sess.run([optimizer_1, true_cost, pred], feed_dict={X: bike_test_X,
                                                          Y: bike_test_Y})
    
    print("Test error:", c)




training:
136.086154985
110.462037263
111.039097256
107.487136794
90.6258015574
84.3991068145
84.0118870205
84.2523394455
76.0717637333
66.7290173519
56.4973723565
51.8107666675
51.2634845074
46.581377571
46.4921313156
43.6744221817
42.8883053344
42.6553045202
44.3981017831
45.1742709595
43.6103914049
42.8187825945
42.2070944044
41.7050616182
41.2798692915
40.8854961631
40.5817648805
40.334003213
Training set: 42.7088
Validation error: 101.814
Test error: 113.681


In [20]:
# Explained variance score for TF Adam model
print("Exaplained variance for TF Adam model: ", end="")
print(1 - (np.var(bike_test_Y - test_pred)/np.var(bike_test_Y)))
# Explained variance score for sklearn model
print("Exaplained variance for sklearn model: ", end="")
print(1 - (np.var(bike_test_Y - y_test_pred_skl)/np.var(bike_test_Y)))

Exaplained variance for TF Adam model: 0.755330183434
Exaplained variance for sklearn model: -1.7763568394e-15
