# Prediction Number of Vehicles
## Given 20 months Date time, ID of junctions, and number of Vehicles to predict number of vehicles in next 4 months.

In [1]:
import datetime as dt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
import warnings
#warnings.filterwarnings('ignore')

## Read training and test data

In [3]:
train = pd.read_csv('train.csv', encoding = "utf-8", parse_dates=["DateTime"],
date_parser=lambda x: pd.to_datetime(x, format="%Y-%m-%d %H:%M:%S"))
test = pd.read_csv('test.csv', encoding = "utf-8", parse_dates=["DateTime"],
date_parser=lambda x: pd.to_datetime(x, format="%Y-%m-%d %H:%M:%S"))

In [4]:
train.head(5)

Unnamed: 0,DateTime,Junction,Vehicles,ID
0,2015-11-01 00:00:00,1,15,20151101001
1,2015-11-01 01:00:00,1,13,20151101011
2,2015-11-01 02:00:00,1,10,20151101021
3,2015-11-01 03:00:00,1,7,20151101031
4,2015-11-01 04:00:00,1,9,20151101041


In [5]:
train.tail(5)

Unnamed: 0,DateTime,Junction,Vehicles,ID
48115,2017-06-30 19:00:00,4,11,20170630194
48116,2017-06-30 20:00:00,4,30,20170630204
48117,2017-06-30 21:00:00,4,16,20170630214
48118,2017-06-30 22:00:00,4,22,20170630224
48119,2017-06-30 23:00:00,4,12,20170630234


## Split DateTime to year, quarter, month, hour to make more features

In [6]:
train['year'] = train['DateTime'].dt.year
train['quarter'] = train['DateTime'].dt.quarter
train['month'] = train['DateTime'].dt.month
train['hour'] = train['DateTime'].dt.hour

train.head(5)    

Unnamed: 0,DateTime,Junction,Vehicles,ID,year,quarter,month,hour
0,2015-11-01 00:00:00,1,15,20151101001,2015,4,11,0
1,2015-11-01 01:00:00,1,13,20151101011,2015,4,11,1
2,2015-11-01 02:00:00,1,10,20151101021,2015,4,11,2
3,2015-11-01 03:00:00,1,7,20151101031,2015,4,11,3
4,2015-11-01 04:00:00,1,9,20151101041,2015,4,11,4


In [7]:
test['year'] = test['DateTime'].dt.year
test['quarter'] = test['DateTime'].dt.quarter
test['month'] = test['DateTime'].dt.month
test['hour'] = test['DateTime'].dt.hour

test.head(5)    

Unnamed: 0,DateTime,Junction,ID,year,quarter,month,hour
0,2017-07-01 00:00:00,1,20170701001,2017,3,7,0
1,2017-07-01 01:00:00,1,20170701011,2017,3,7,1
2,2017-07-01 02:00:00,1,20170701021,2017,3,7,2
3,2017-07-01 03:00:00,1,20170701031,2017,3,7,3
4,2017-07-01 04:00:00,1,20170701041,2017,3,7,4


## Split data for training and validation set

In [8]:
from sklearn.model_selection import train_test_split

X = train[['year','quarter', 'month', 'hour','Junction']]
y = train['Vehicles']
X_test = test[['year','quarter', 'month', 'hour','Junction']]
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size = 0.3, random_state=100)

In [9]:
X_train.head(5)

Unnamed: 0,year,quarter,month,hour,Junction
41605,2017,2,4,13,3
5461,2016,2,6,13,1
14844,2015,4,11,12,2
38253,2016,4,11,21,3
7745,2016,3,9,17,1


## standardization
### Due to year 2017 included in test set, so include year into the scope of standardization.

In [10]:
from sklearn.preprocessing import MinMaxScaler
mmc = MinMaxScaler(feature_range=(0,1)) # default (0, 1) for sigmoid function
mmc.fit(X_train)
X_train_std = mmc.transform(X_train)
X_validate_std = mmc.transform(X_validate)
X_test_std = mmc.transform(X_test)

In [11]:
X_train_std

array([[1.        , 0.33333333, 0.27272727, 0.56521739, 0.66666667],
       [0.5       , 0.33333333, 0.45454545, 0.56521739, 0.        ],
       [0.        , 1.        , 0.90909091, 0.52173913, 0.33333333],
       ...,
       [1.        , 0.        , 0.18181818, 1.        , 0.        ],
       [1.        , 0.33333333, 0.45454545, 0.47826087, 0.        ],
       [0.5       , 1.        , 0.90909091, 0.34782609, 0.66666667]])

## Build a based line model: SVR

In [12]:
from sklearn import svm
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

# C and gamma are the parameters of SVR with kernel='rbf'
# Below statement does not work and reason unknow. The main issue is the data of y_train.

# C_range = np.logspace(2, 10, 13)
# gamma_range = np.logspace(0.1, 3, 13)
# param_grid = dict(gamma=gamma_range, C=C_range)
# cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
# grid = GridSearchCV(svm.SVR(), param_grid=param_grid, cv=cv)
# grid.fit(X_train_std, , y_train.values.ravel())

# print("The best parameters are %s with a score of %0.2f"
#       % (grid.best_params_, grid.best_score_))

# Bigger C takes longer time but better prediction. Be aware of overfitting.
clf = svm.SVR(C=200)
clf.fit(X_train_std, y_train) 

SVR(C=200, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [13]:
y_pred = clf.predict(X_validate_std)

## Evaluate the prediction

In [14]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_validate, y_pred)
rmse = np.sqrt(mse)
print ('rmse=%f'%rmse)

rmse=9.239314


## Predict the testing data set and output the data as file name submission.csv

In [15]:
y_pred = clf.predict(X_test_std)

In [16]:
d = {'ID' : pd.Series(test['ID']),
      'Vehicles' : pd.Series(y_pred)
    }
df = pd.DataFrame(d)
df['Vehicles'] = df['Vehicles'].astype(int)
df.to_csv('submission.csv', index=False)

## Build model: Try NN, LSTM, GRU for this prediction

In [17]:
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, TimeDistributed, LSTM, GRU, ConvLSTM2D
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, Callback

def create_NN_model(optimizer='adam', init='normal'):
    model = Sequential()
    model.add(Dense(512, input_dim=5, activation='relu', kernel_initializer=init))
    #model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu', kernel_initializer=init))
    model.add(Dense(64, activation='relu', kernel_initializer=init))
    model.add(Dense(1, activation='relu', kernel_initializer=init))
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
    return model



  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [18]:
# Parameters
s_optimizer = 'adam' # rmsprop or adam
s_init='uniform'
n_epoches = 5
n_batch_size = 6

model_NN = create_NN_model(optimizer=s_optimizer, init=s_init)
model_NN.fit(X_train_std, y_train.values.ravel(),
          epochs=n_epoches,
          batch_size=n_batch_size,
          callbacks=[EarlyStopping(monitor='acc', min_delta=0.001, patience=2, verbose=1, mode='auto')])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x27231b29240>

In [19]:
y_pred = model_NN.predict(X_validate_std, batch_size=n_batch_size)

In [20]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_validate, y_pred)
rmse = np.sqrt(mse)
print ('rmse=%f'%rmse)

rmse=8.977200


## Predict the testing data set and output the data as file name submission-nn.csv

In [21]:
y_result = model_NN.predict(X_test_std, batch_size=n_batch_size)

In [22]:
d = {'ID' : pd.Series(test['ID']),
      'Vehicles' : pd.Series(y_result.ravel())
    }
df = pd.DataFrame(d)
df['Vehicles'] = df['Vehicles'].astype(int)
df.to_csv('submission-nn.csv', index=False)

## NN model auto tuning. We can try to auto tuning from parameters.

In [23]:
from keras.wrappers.scikit_learn import KerasRegressor

# grid search epochs, batch size and optimizer
# optimizers = ['rmsprop', 'adam']
# init = ['glorot_uniform', 'normal', 'uniform']
# epochs = [5, 10]
# batches = [6, 12, 24]

optimizers = ['adam']
init = ['glorot_uniform', 'uniform', 'normal']
epochs = [3]
batches = [6]

# create model
model_NN = KerasRegressor(build_fn=create_NN_model, verbose=0)
#Best: 0.091082 using {'batch_size': 6, 'epochs': 10, 'init': 'uniform', 'optimizer': 'adam'}

param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=init)
grid = GridSearchCV(estimator=model_NN, param_grid=param_grid)
grid_result = grid.fit(X_train_std, y_train.values.ravel())

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -75.865369 using {'batch_size': 6, 'epochs': 3, 'init': 'glorot_uniform', 'optimizer': 'adam'}
-75.865369 (3.713737) with: {'batch_size': 6, 'epochs': 3, 'init': 'glorot_uniform', 'optimizer': 'adam'}
-75.965592 (3.578463) with: {'batch_size': 6, 'epochs': 3, 'init': 'uniform', 'optimizer': 'adam'}
-76.818805 (1.844773) with: {'batch_size': 6, 'epochs': 3, 'init': 'normal', 'optimizer': 'adam'}


https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
## No state of LSTM
### stateful = False
### If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch.
##  If you want to keep state memory of LSTM with Time windows approach (lags, horizon). You cannot shuffle your data.
## If you want to control the state of LSTM

### stateful = True, and change your training epochs to external loop. below example epochs = 100
```python
for i in range(100):
	model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
	model.reset_states()
```

In [24]:
def create_LSTM_model(optimizer='adam', init='normal'):
    model = Sequential()
    # batch_input_shape=(batch, timesteps/lags, features)
    # input_shape=(timesteps/lags, features)
    model.add(LSTM(512, input_shape=(1, 5), return_sequences=True, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, 
                   kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros',
                   unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, 
                   activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, 
                   dropout=0.0, recurrent_dropout=0.0, implementation=1, return_state=False, 
                   go_backwards=False, stateful=False, unroll=False))
    #model.add(Dropout(0.5))
    model.add(LSTM(256, return_sequences=True, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, implementation=1, return_state=False, go_backwards=False, stateful=False, unroll=False))
    # The last layer of LSTM, return_sequences = False
    model.add(LSTM(64, return_state=False, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, implementation=1, return_sequences=False, go_backwards=False, stateful=False, unroll=False))
    model.add(Dense(1, activation='relu', kernel_initializer=init))
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
    return model



In [25]:
X_train_std.shape

(33684, 5)

In [26]:
# Parameters
s_optimizer = 'adam' # rmsprop or adam
s_init='uniform'
n_epoches = 5
n_batch_size = 6

# if timesteps/lags = 1
X_3D_train_std = X_train_std.reshape(X_train_std.shape[0],1, X_train_std.shape[1])

model_LSTM = create_LSTM_model(optimizer=s_optimizer, init=s_init)
model_LSTM.fit(X_3D_train_std, y_train.values.ravel(),
          epochs=n_epoches,
          batch_size=n_batch_size,
          callbacks=[EarlyStopping(monitor='acc', min_delta=0.001, patience=2, verbose=1, mode='auto')])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x27235e23c50>

In [27]:
y_pred = model_LSTM.predict(X_validate_std.reshape(X_validate_std.shape[0],1, X_validate_std.shape[1]), batch_size=n_batch_size)

In [28]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_validate, y_pred)
rmse = np.sqrt(mse)
print ('rmse=%f'%rmse)

rmse=8.401580


In [29]:
y_result = model_LSTM.predict(X_test_std.reshape(X_test_std.shape[0],1, X_test_std.shape[1]), batch_size=n_batch_size)

In [30]:
d = {'ID' : pd.Series(test['ID']),
      'Vehicles' : pd.Series(y_result.ravel())
    }
df = pd.DataFrame(d)
df['Vehicles'] = df['Vehicles'].astype(int)
df.to_csv('submission-lstm.csv', index=False)

## Auto Tuning approach by grid search for LSTM model to get better parameters.

In [31]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor

# grid search epochs, batch size and optimizer
# optimizers = ['rmsprop', 'adam']
# init = ['glorot_uniform', 'normal', 'uniform']
# epochs = [10, 20, 50]
# batches = [5, 10, 20]

optimizers = ['adam']
init = ['glorot_uniform', 'normal', 'uniform']
epochs = [3]
batches = [6]

# create model
model_LSTM = KerasRegressor(build_fn=create_LSTM_model, verbose=0)

param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=init)
grid = GridSearchCV(estimator=model_LSTM, param_grid=param_grid)
grid_result = grid.fit(X_3D_train_std, y_train.values.ravel())

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -75.361852 using {'batch_size': 6, 'epochs': 3, 'init': 'glorot_uniform', 'optimizer': 'adam'}
-75.361852 (5.536853) with: {'batch_size': 6, 'epochs': 3, 'init': 'glorot_uniform', 'optimizer': 'adam'}
-370.314677 (416.153495) with: {'batch_size': 6, 'epochs': 3, 'init': 'normal', 'optimizer': 'adam'}
-82.446533 (2.601921) with: {'batch_size': 6, 'epochs': 3, 'init': 'uniform', 'optimizer': 'adam'}


## GRU model

In [32]:
def create_GRU_model(optimizer='adam', init='normal'):
    model = Sequential()
    model.add(GRU(512, input_shape=(1, 5), return_sequences=True, activation='tanh', 
                  recurrent_activation='hard_sigmoid', use_bias=True, 
                  kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', 
                  kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, 
                  activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, 
                  dropout=0.0, recurrent_dropout=0.0, implementation=1, return_state=False, 
                  go_backwards=False, stateful=False, unroll=False))
    #model.add(Dropout(0.5))
    model.add(GRU(256, return_sequences=True, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, implementation=1, return_state=False, go_backwards=False, stateful=False, unroll=False))
    model.add(GRU(64, return_sequences=False, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, implementation=1, return_state=False, go_backwards=False, stateful=False, unroll=False))
    model.add(Dense(1, activation='relu', kernel_initializer=init))
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])
    return model



In [33]:
# Parameters
s_optimizer = 'adam' # rmsprop or adam
s_init='uniform'
n_epoches = 5
n_batch_size = 6

# if timesteps/lags = 1
X_3D_train_std = X_train_std.reshape(X_train_std.shape[0],1, X_train_std.shape[1])

model_GRU = create_GRU_model(optimizer=s_optimizer, init=s_init)
model_GRU.fit(X_3D_train_std, y_train.values.ravel(),
          epochs=n_epoches,
          batch_size=n_batch_size,
          callbacks=[EarlyStopping(monitor='acc', min_delta=0.001, patience=2, verbose=1, mode='auto')])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x27213944588>

In [34]:
y_pred = model_GRU.predict(X_validate_std.reshape(X_validate_std.shape[0],1, X_validate_std.shape[1]), batch_size=n_batch_size)

In [35]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_validate, y_pred)
rmse = np.sqrt(mse)
print ('rmse=%f'%rmse)

rmse=8.090658


In [36]:
y_result = model_GRU.predict(X_test_std.reshape(X_test_std.shape[0],1, X_test_std.shape[1]), batch_size=n_batch_size)

In [37]:
d = {'ID' : pd.Series(test['ID']),
      'Vehicles' : pd.Series(y_result.ravel())
    }
df = pd.DataFrame(d)
df['Vehicles'] = df['Vehicles'].astype(int)
df.to_csv('submission-gru.csv', index=False)

In [38]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor

# grid search epochs, batch size and optimizer
# optimizers = ['rmsprop', 'adam']
# init = ['glorot_uniform', 'normal', 'uniform']
# epochs = [10, 20, 50]
# batches = [5, 10, 20]

optimizers = ['adam']
init = ['glorot_uniform', 'normal', 'uniform']
epochs = [3]
batches = [6]

# create model
model = KerasRegressor(build_fn=create_GRU_model, verbose=0)

param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=init)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_3D_train_std, y_train.values.ravel())

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -75.307071 using {'batch_size': 6, 'epochs': 3, 'init': 'glorot_uniform', 'optimizer': 'adam'}
-75.307071 (5.358852) with: {'batch_size': 6, 'epochs': 3, 'init': 'glorot_uniform', 'optimizer': 'adam'}
-361.536116 (394.932360) with: {'batch_size': 6, 'epochs': 3, 'init': 'normal', 'optimizer': 'adam'}
-77.325250 (4.809811) with: {'batch_size': 6, 'epochs': 3, 'init': 'uniform', 'optimizer': 'adam'}
