In [1]:
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, Normalizer
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Activation

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

%matplotlib inline
# plt.style.use("ggplot")
sns.set_style("darkgrid")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  (fname, cnt))
  (fname, cnt))


In [2]:
data = pd.read_csv("final_without_weather.csv")
data.drop(['Month'], axis =1, inplace= True)
data.set_index("pickuptime", inplace= True)
num_pickups = data.total
num_green_pickups = data.green
num_yellow_pickups = data.yellow
data.drop(['green', 'yellow', 'total'], axis =1, inplace = True)

In [3]:
def series_to_supervised(data, n_in, n_out, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
        
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    
    agg = pd.concat(cols, axis = 1)
    agg.columns = names
    
    if dropnan:
        agg.dropna(inplace = True)
        
    return agg

In [4]:
features_and_target = pd.concat([data, num_pickups], axis = 1)
values = features_and_target.values

values = values.astype('float32')

scaler = MinMaxScaler(feature_range = (0, 1))
scaled = scaler.fit_transform(values)

n_hours = 23

reframed = series_to_supervised(scaled, n_hours, 1)

n_features = features_and_target.shape[1]


In [7]:
# delete significantly wrong data & compare the date value
total_values = reframed.values
n_p = np.argmin(total_values[365*24*3:, -1])
n_p = n_p + 365*24*3

a = total_values[: n_p, :]
b = total_values[n_p + n_hours + 1 : , :]
total_values = np.concatenate((a, b), axis=0)

# result variable
result = np.zeros((4, 7))

In [None]:
for i in range (7):
    values = total_values[total_values[:, -3] == total_values[120 + 24*i, 2]]
    n_train_hours = 3754
    train = values[:n_train_hours, :]
    test = values[n_train_hours:, :]
    # split into input and outputs
    n_obs = n_hours * n_features
    train_X, train_y = train[:, :n_obs], train[:, -1]
    test_X, test_y = test[:, :n_obs], test[:, -1]
    # reshape input to be 3D [samples, timesteps, features]
    train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))
    test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))

    # design network
    model = Sequential()
    model.add(LSTM(24, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')

    # fit network
    history = model.fit(train_X, train_y, epochs=100, batch_size=100,
                    validation_data=(test_X, test_y), verbose=2, shuffle=False)
    
    history_loss = pd.DataFrame()
    history_loss['train'] = history.history['loss']
    history_loss['test'] = history.history['val_loss']
    history_loss.plot(figsize=(10,10), fontsize=16,
                title='LSTM Model Loss');
    
    # make a prediction 
    yhat = model.predict(test_X)
    # reshape back to 2d
    test_X = test_X.reshape((test_X.shape[0], -1))

    # invert scaling for forecast# invert 
    inv_yhat = np.concatenate((test[:, 115:119], yhat), axis=1)
    inv_yhat = scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:,-1]

    # invert scaling for actual
    test_y = test_y.reshape((len(test_y), 1))
    inv_y = np.concatenate((test[:, 115:119], test_y), axis=1)
    inv_y = scaler.inverse_transform(inv_y)
    inv_y = inv_y[:,-1]
    inv_yhat_gte_zero = inv_yhat.copy()
    inv_yhat_gte_zero[inv_yhat_gte_zero < 0] = 0

    # calculate r2
    result[0, i] = r2_score(inv_y, inv_yhat)

    # calculate mean absolute error
    result[1, i] = mean_absolute_error(inv_y, inv_yhat)

    # calculate rmse
    rmse = math.sqrt(mean_squared_error(inv_y, inv_yhat))
    result[2, i]= rmse

    # calculate mean absolute error percentage
    maep = np.mean(np.abs((inv_y - inv_yhat)/inv_y))*100
    result[3, i] = maep

Train on 3754 samples, validate on 911 samples
Epoch 1/100
 - 3s - loss: 0.1358 - val_loss: 0.0872
Epoch 2/100
 - 1s - loss: 0.1108 - val_loss: 0.0785
Epoch 3/100
 - 1s - loss: 0.1021 - val_loss: 0.0715
Epoch 4/100
 - 1s - loss: 0.0958 - val_loss: 0.0668
Epoch 5/100
 - 2s - loss: 0.0897 - val_loss: 0.0624
Epoch 6/100
 - 2s - loss: 0.0843 - val_loss: 0.0586
Epoch 7/100
 - 2s - loss: 0.0791 - val_loss: 0.0547
Epoch 8/100
 - 1s - loss: 0.0732 - val_loss: 0.0504
Epoch 9/100
 - 2s - loss: 0.0682 - val_loss: 0.0449
Epoch 10/100
 - 2s - loss: 0.0619 - val_loss: 0.0372
Epoch 11/100
 - 1s - loss: 0.0576 - val_loss: 0.0340
Epoch 12/100
 - 1s - loss: 0.0544 - val_loss: 0.0324
Epoch 13/100
 - 1s - loss: 0.0516 - val_loss: 0.0315
Epoch 14/100
 - 1s - loss: 0.0491 - val_loss: 0.0303
Epoch 15/100
 - 1s - loss: 0.0462 - val_loss: 0.0293
Epoch 16/100
 - 1s - loss: 0.0434 - val_loss: 0.0272
Epoch 17/100
 - 1s - loss: 0.0415 - val_loss: 0.0265
Epoch 18/100
 - 1s - loss: 0.0405 - val_loss: 0.0259
Epoch 19

 - 1s - loss: 0.0201 - val_loss: 0.0183
Epoch 55/100
 - 1s - loss: 0.0203 - val_loss: 0.0181
Epoch 56/100
 - 1s - loss: 0.0203 - val_loss: 0.0181
Epoch 57/100
 - 1s - loss: 0.0205 - val_loss: 0.0180
Epoch 58/100
 - 1s - loss: 0.0201 - val_loss: 0.0177
Epoch 59/100
 - 1s - loss: 0.0203 - val_loss: 0.0172
Epoch 60/100
 - 1s - loss: 0.0202 - val_loss: 0.0175
Epoch 61/100
 - 1s - loss: 0.0199 - val_loss: 0.0177
Epoch 62/100
 - 1s - loss: 0.0200 - val_loss: 0.0174
Epoch 63/100
 - 1s - loss: 0.0196 - val_loss: 0.0179
Epoch 64/100
 - 1s - loss: 0.0197 - val_loss: 0.0176
Epoch 65/100
 - 1s - loss: 0.0196 - val_loss: 0.0174
Epoch 66/100
 - 1s - loss: 0.0196 - val_loss: 0.0175
Epoch 67/100
 - 1s - loss: 0.0195 - val_loss: 0.0174
Epoch 68/100
 - 1s - loss: 0.0197 - val_loss: 0.0175
Epoch 69/100
 - 1s - loss: 0.0196 - val_loss: 0.0175
Epoch 70/100
 - 2s - loss: 0.0197 - val_loss: 0.0173
Epoch 71/100
 - 2s - loss: 0.0196 - val_loss: 0.0171
Epoch 72/100
 - 2s - loss: 0.0198 - val_loss: 0.0170
Epoch 

Epoch 8/100
 - 1s - loss: 0.0826 - val_loss: 0.0582
Epoch 9/100
 - 1s - loss: 0.0777 - val_loss: 0.0519
Epoch 10/100
 - 1s - loss: 0.0729 - val_loss: 0.0475
Epoch 11/100
 - 1s - loss: 0.0693 - val_loss: 0.0439
Epoch 12/100
 - 1s - loss: 0.0689 - val_loss: 0.0498
Epoch 13/100
 - 1s - loss: 0.0615 - val_loss: 0.0461
Epoch 14/100
 - 1s - loss: 0.0576 - val_loss: 0.0388
Epoch 15/100
 - 1s - loss: 0.0555 - val_loss: 0.0378
Epoch 16/100
 - 1s - loss: 0.0518 - val_loss: 0.0354
Epoch 17/100
 - 1s - loss: 0.0493 - val_loss: 0.0337
Epoch 18/100
 - 1s - loss: 0.0472 - val_loss: 0.0325
Epoch 19/100
 - 2s - loss: 0.0457 - val_loss: 0.0310
Epoch 20/100
 - 2s - loss: 0.0451 - val_loss: 0.0302
Epoch 21/100
 - 1s - loss: 0.0437 - val_loss: 0.0297
Epoch 22/100
 - 1s - loss: 0.0418 - val_loss: 0.0288
Epoch 23/100
 - 1s - loss: 0.0389 - val_loss: 0.0274
Epoch 24/100
 - 2s - loss: 0.0367 - val_loss: 0.0267
Epoch 25/100
 - 2s - loss: 0.0350 - val_loss: 0.0254
Epoch 26/100
 - 2s - loss: 0.0335 - val_loss: 0.

Epoch 62/100
 - 2s - loss: 0.0199 - val_loss: 0.0184
Epoch 63/100
 - 1s - loss: 0.0197 - val_loss: 0.0185
Epoch 64/100
 - 1s - loss: 0.0196 - val_loss: 0.0181
Epoch 65/100
 - 1s - loss: 0.0195 - val_loss: 0.0185
Epoch 66/100
 - 1s - loss: 0.0196 - val_loss: 0.0182
Epoch 67/100
 - 1s - loss: 0.0198 - val_loss: 0.0178
Epoch 68/100
 - 2s - loss: 0.0198 - val_loss: 0.0180
Epoch 69/100
 - 2s - loss: 0.0200 - val_loss: 0.0179
Epoch 70/100
 - 2s - loss: 0.0204 - val_loss: 0.0186
Epoch 71/100
 - 2s - loss: 0.0211 - val_loss: 0.0195
Epoch 72/100
 - 2s - loss: 0.0213 - val_loss: 0.0206
Epoch 73/100
 - 2s - loss: 0.0216 - val_loss: 0.0200
Epoch 74/100
 - 2s - loss: 0.0217 - val_loss: 0.0199
Epoch 75/100
 - 2s - loss: 0.0218 - val_loss: 0.0186
Epoch 76/100
 - 2s - loss: 0.0226 - val_loss: 0.0178
Epoch 77/100
 - 1s - loss: 0.0236 - val_loss: 0.0176
Epoch 78/100
 - 1s - loss: 0.0225 - val_loss: 0.0180
Epoch 79/100
 - 1s - loss: 0.0221 - val_loss: 0.0177
Epoch 80/100
 - 1s - loss: 0.0217 - val_loss: 

 - 1s - loss: 0.0457 - val_loss: 0.0310
Epoch 16/100
 - 1s - loss: 0.0429 - val_loss: 0.0300
Epoch 17/100
 - 1s - loss: 0.0408 - val_loss: 0.0292
Epoch 18/100
 - 1s - loss: 0.0396 - val_loss: 0.0284
Epoch 19/100
 - 1s - loss: 0.0376 - val_loss: 0.0276
Epoch 20/100
 - 1s - loss: 0.0355 - val_loss: 0.0261
Epoch 21/100
 - 1s - loss: 0.0322 - val_loss: 0.0253
Epoch 22/100
 - 1s - loss: 0.0302 - val_loss: 0.0259
Epoch 23/100
 - 1s - loss: 0.0299 - val_loss: 0.0262
Epoch 24/100
 - 1s - loss: 0.0296 - val_loss: 0.0238
Epoch 25/100
 - 1s - loss: 0.0299 - val_loss: 0.0253
Epoch 26/100
 - 2s - loss: 0.0287 - val_loss: 0.0241
Epoch 27/100
 - 2s - loss: 0.0279 - val_loss: 0.0231
Epoch 28/100
 - 2s - loss: 0.0272 - val_loss: 0.0230
Epoch 29/100
 - 2s - loss: 0.0270 - val_loss: 0.0226
Epoch 30/100
 - 2s - loss: 0.0271 - val_loss: 0.0221
Epoch 31/100
 - 2s - loss: 0.0263 - val_loss: 0.0209
Epoch 32/100
 - 2s - loss: 0.0261 - val_loss: 0.0211
Epoch 33/100
 - 2s - loss: 0.0261 - val_loss: 0.0209
Epoch 

In [None]:
result[3, :]