# **In this notebook, I will build a baseline model for predicting the aggregated demand from T+1 to T+5**

In [1]:
import pandas as pd
import numpy as np
import geohash2
import os
import tensorflow as tf

import keras 
from keras import backend as K
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
tf.enable_eager_execution()

Using TensorFlow backend.


In [2]:
data_dir = '/home/angps/Documents/GrabChallenge/Traffic Management/Data'
df = pd.read_csv(os.path.join(data_dir,'cleaned_training.csv'))

In [3]:
df.head()

Unnamed: 0,geohash6,day,demand,latitude,longitude,latitude_error,longitude_error,Hour,Minute,Period
0,qp03wc,18,0.020072,-5.353088,90.653687,0.002747,0.005493,20,0,1712
1,qp03pn,10,0.024721,-5.413513,90.664673,0.002747,0.005493,14,30,922
2,qp09sw,9,0.102821,-5.325623,90.906372,0.002747,0.005493,6,15,793
3,qp0991,32,0.088755,-5.353088,90.752563,0.002747,0.005493,5,0,2996
4,qp090q,15,0.074468,-5.413513,90.719604,0.002747,0.005493,4,0,1360


## **Baseline Model**

**We will only use data from day 2 onwards so every data point has a previous full day demand data. We would also split the dataset 80-20 for training and test set**

In [4]:
df = df[df['day'] >= 2]
train_df, val_df = train_test_split(df, test_size=0.2, random_state=0)
train_df = train_df.reset_index(drop=True, inplace=False)
val_df = val_df.reset_index(drop=True, inplace=False)

In [5]:
train_df.shape, val_df.shape

((3310955, 10), (827739, 10))

In the baseline model, we will use the past 96 (day) aggregated demand as the input to a simple LSTM model.

In [6]:
def dayhourmin_to_period(day, hour, minute):
    return ((day-1) * 24 * 4) + (hour * 4) + minute//15

def period_to_dayhourmin(period):
    day = period//96 + 1
    hour = (period - (day-1) * 96)//4
    minute = (period - ((day-1) * 96) - (hour*4)) * 15
    return (day, hour, minute)

def get_demand_from_period(df, geohash, period):
    day, hour, minute = period_to_dayhourmin(period)
    demand_queried = df[(df['geohash6'] == geohash) & (df['day'] == day)
                & (df['Hour'] == hour) & (df['Minute'] == minute)]['demand'].values
    if len(demand_queried) > 0:
        return demand_queried[0]
    else:
        return 0
    
def get_past_demand(df, geohash, day, hour, minute, num_periods=96):
    period = dayhourmin_to_period(day, hour, minute)
    return np.array([get_demand_from_period(df, geohash, period - i) for i in range(1, num_periods + 1)])

def get_future_demand(df, geohash, day, hour, minute, num_periods=5):
    period = dayhourmin_to_period(day, hour, minute)
    return np.array([get_demand_from_period(df, geohash, period + i) for i in range(num_periods)])

In [7]:
def fc_layer(inputs, output_units, batch_norm=True):
    net = tf.keras.layers.Dense(output_units)(inputs)
    if batch_norm:
        net = tf.keras.layers.BatchNormalization()(net)
    net = tf.keras.layers.Activation('relu')(net)
    return net

def baseline_model():
    _input = tf.keras.layers.Input(shape=(96,1))
    net = tf.keras.layers.LSTM(units=64)(_input)
    net = fc_layer(net, 16)
    net = tf.keras.layers.Dense(5, kernel_initializer='normal')(net)
    model = tf.keras.models.Model(inputs=_input, outputs=net)
    return model

model = baseline_model()

def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

model.compile(optimizer = "rmsprop", loss = root_mean_squared_error, 
              metrics =["mean_squared_error"])

Instructions for updating:
Colocations handled automatically by placer.


In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 96, 1)             0         
_________________________________________________________________
lstm (LSTM)                  (None, 64)                16896     
_________________________________________________________________
dense (Dense)                (None, 16)                1040      
_________________________________________________________________
batch_normalization_v1 (Batc (None, 16)                64        
_________________________________________________________________
activation (Activation)      (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 85        
Total params: 18,085
Trainable params: 18,053
Non-trainable params: 32
_________________________________________________________________


In [9]:
def data_gen(typ, batch_size, past_periods=96):
    if typ == 'train':
        df = train_df
    elif typ == 'val':
        df = val_df
    max_rows = len(df)
    while True:
        past_demands = np.zeros((batch_size, past_periods))
        future_demands = np.zeros((batch_size, 5))
        rows = np.random.choice(a=max_rows, size=batch_size, replace=False)
        for i in range(len(rows)):
            geohash = df.loc[i, 'geohash6']
            day = df.loc[i, 'day']
            hour = df.loc[i, 'Hour']
            minute = df.loc[i, 'Minute']
            past_demands[i] = get_past_demand(df, geohash, day, hour, minute)
            future_demands[i] = get_future_demand(df, geohash, day, hour, minute)
        yield np.reshape(past_demands, (batch_size, past_periods,1)), future_demands

In [10]:
train_gen = data_gen('train', batch_size=1)
val_gen = data_gen('val', batch_size=1)
num_epochs = 10
history = model.fit_generator(generator=train_gen,
                              validation_data=val_gen,
                              steps_per_epoch=20,
                              validation_steps=20,
                              max_queue_size=20,
                              verbose=1)

Instructions for updating:
Use tf.cast instead.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/angps/anaconda3/envs/deeplearning2/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-7629b887988d>", line 9, in <module>
    verbose=1)
  File "/home/angps/anaconda3/envs/deeplearning2/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 1426, in fit_generator
    initial_epoch=initial_epoch)
  File "/home/angps/anaconda3/envs/deeplearning2/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_generator.py", line 225, in model_iteration
    mode='test')
  File "/home/angps/anaconda3/envs/deeplearning2/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_generator.py", line 177, in model_iteration
    batch_data = _get_next_batch(output_generator, mode)
  File "/home/angps/anaconda3/envs/deeplearning2/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_gen

KeyboardInterrupt: 