In [1]:
%cd "/content/drive/MyDrive/Colab Notebooks/Bike Demand Submission/"
!ls

/content/drive/MyDrive/Colab Notebooks/Bike Demand Submission
GCNN_bike_sharing.py  gcn.py	   __pycache__	test.csv
GCNN-DDGF.ipynb       __init__.py  script.py	utils.py


In [2]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [3]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from utils import StandardScaler

tf.__version__

'1.15.2'

In [4]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('GPU device not found')
else:
  print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [5]:
path = "test.csv"
df_demand = pd.read_csv(path)
df_demand.columns = ["timebin", "cell", "count"]

In [6]:
timebins = pd.unique(df_demand['timebin'])
stations = pd.unique(df_demand['cell'])
num_timebins, num_cells, num_counts = df_demand.nunique(axis=0)

# data = pd.pivot(df_demand, index='timebin', columns='start_station_id', values='count')
demand_count = df_demand['count'].values.reshape((num_cells, num_timebins)).T
df_demand_count = pd.DataFrame(demand_count, columns=stations).add_prefix('station_')


In [7]:
datebins = pd.to_datetime(timebins, unit='s', utc=True, infer_datetime_format=True).tz_convert(tz='US/Eastern')

onehot = True
if onehot:
    df_month = pd.get_dummies(datebins.month, prefix='month')
    df_hour = pd.get_dummies(datebins.hour, prefix='hour')
    df_dayofweek = pd.get_dummies(datebins.dayofweek, prefix='dayofweek')
    df_weekend = pd.get_dummies(np.in1d(datebins.dayofweek, [5,6]), prefix='weekend')
else:
    df_month = pd.DataFrame(datebins.month, columns=['month'])
    df_hour = pd.DataFrame(datebins.hour, columns=['hour'])
    df_dayofweek = pd.DataFrame(datebins.dayofweek, columns=['dayofweek'])
    df_weekend = pd.DataFrame(np.in1d(datebins.dayofweek, [5,6]), columns=['weekend'])

df = pd.concat([df_demand_count, df_month, df_hour, df_dayofweek, df_weekend], axis=1)
names_stations = df_demand_count.columns.tolist()
names_month = df_month.columns.tolist()
names_hour = df_hour.columns.tolist()
names_dayofweek = df_dayofweek.columns.tolist()
names_weekend = df_weekend.columns.tolist()
names_all = df.columns.tolist()

train_filter = (datebins >= '2018-01-01') & (datebins < '2019-09-30')
val_filter = (datebins >= '2019-10-01') & (datebins < '2019-12-31')
test_filter = (datebins >= '2019-10-01') & (datebins < '2019-12-31')

df_train = df[train_filter]
df_val = df[val_filter]
df_test = df[test_filter]

df.head(), df.shape

num_input = df.shape[1]
# num_input = num_cells
num_output = num_cells
num_output = df.shape[1]

In [8]:

def get_dataset_XY(df, batch_size=None, batch_index=0, num_feature=24, num_horizon=1, num_input = num_input, num_output=num_output):
    x_offsets = np.sort(np.concatenate((np.arange(-num_feature+1, 1, 1),)))
    y_offsets = np.sort(np.arange(1, 1+ num_horizon, 1))

    min_t = abs(min(x_offsets))
    max_t = abs(df.shape[0] - abs(max(y_offsets)))  # Exclusive

    X, Y = [], []
    if batch_size is None:
        batch_size = max_t - min_t + 1
        batch_index = 0
    count = 0
    for t in range(min_t, max_t):
        t = t + batch_size * batch_index
        xt = df.iloc[t + x_offsets, 0:num_input].values.flatten('F')
        yt = df.iloc[t + y_offsets, 0:num_output].values.flatten('F')
        X.append(xt)
        Y.append(yt)

        count += 1
        if count == batch_size:
            break

    X = np.stack(X).reshape([-1, num_input, num_feature])
    Y = np.stack(Y)#.reshape([-1, num_input, num_feature])

    return X, Y


In [9]:
batch_size = 100
num_feature = 24
num_horizon = 1

X_train, Y_train = get_dataset_XY(df_train, batch_size, 0, num_feature, num_horizon)
X_train.shape, Y_train.shape

X_val, Y_val = get_dataset_XY(df_val, None, 0, num_feature, num_horizon)
# # X_test, Y_test = get_dataset_XY(df_test)
X_test, Y_test = X_val, Y_val

X_train.shape, Y_train.shape, X_val.shape, Y_val.shape

((100, 225, 24), (100, 225), (8716, 225, 24), (8716, 225))

In [10]:
scaler = StandardScaler(mean=X_train.mean(), std=X_train.std())

In [13]:
import gcn
import importlib
importlib.reload(gcn)

# Hyperparameters
learning_rate = 3e-4 # learning rate
decay = 0.9
batchsize = 100 # batch size 

hidden_num_layer = [10, 20, 20] # determine the number of hidden layers and the vector length at each node of each hidden layer
reg_weight = [0, 0, 0] # regularization weights for adjacency matrices L1 loss

keep = 1 # drop out probability

early_stop_th = 200 # early stopping threshold, if validation RMSE not dropping in continuous 20 steps, break
training_epochs = 10 # total training epochs

# Training
start_time = datetime.datetime.now()

val_error, predic_res, test_Y, test_error, bestWeightA = gcn.gcnn_ddgf(
    hidden_num_layer, reg_weight, 
    num_input, num_output, num_feature, num_horizon, 
    learning_rate, decay, batchsize, 
    keep, early_stop_th, training_epochs, 
    get_dataset_XY, df_train, 
    X_val, Y_val, 
    X_test, Y_test, 
    scaler, 'RMSE')

end_time = datetime.datetime.now()
val_error
print('Total training time: ', end_time-start_time)

Loss 1.904: 100%|██████████| 611/611 [01:35<00:00,  6.39it/s]


Epoch: 0001 Training RMSE = 1.933753786
Validation RMSE: 3.2112274


Loss 1.877: 100%|██████████| 611/611 [01:34<00:00,  6.48it/s]


Epoch: 0002 Training RMSE = 1.836576594
Validation RMSE: 3.1699486


Loss 1.880: 100%|██████████| 611/611 [01:33<00:00,  6.53it/s]


Epoch: 0003 Training RMSE = 1.824286380
Validation RMSE: 3.1945152


Loss 1.882: 100%|██████████| 611/611 [01:32<00:00,  6.64it/s]


Epoch: 0004 Training RMSE = 1.820530386
Validation RMSE: 3.244585


Loss 1.882: 100%|██████████| 611/611 [01:33<00:00,  6.56it/s]


Epoch: 0005 Training RMSE = 1.817398961
Validation RMSE: 3.26584


Loss 1.878: 100%|██████████| 611/611 [01:32<00:00,  6.58it/s]


Epoch: 0006 Training RMSE = 1.813620895
Validation RMSE: 3.248552


Loss 1.869: 100%|██████████| 611/611 [01:32<00:00,  6.62it/s]


Epoch: 0007 Training RMSE = 1.807398697
Validation RMSE: 3.2843664


Loss 1.853: 100%|██████████| 611/611 [01:36<00:00,  6.31it/s]


Epoch: 0008 Training RMSE = 1.795549981
Validation RMSE: 3.3202846


Loss 1.831: 100%|██████████| 611/611 [01:36<00:00,  6.32it/s]


Epoch: 0009 Training RMSE = 1.771026310
Validation RMSE: 3.348918


Loss 1.807: 100%|██████████| 611/611 [01:36<00:00,  6.30it/s]


Epoch: 0010 Training RMSE = 1.731690897
Validation RMSE: 3.6582274
Training RMSE =  1.83657659365757
Validation RMSE =  3.1699486
Test RMSE =  3.1699486
Total training time:  0:15:50.115936
