In [1]:
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, Normalizer
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Activation

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

%matplotlib inline
# plt.style.use("ggplot")
sns.set_style("darkgrid")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  (fname, cnt))
  (fname, cnt))


In [2]:
data = pd.read_csv("final_1minutes.csv")
data.drop(['Month'], axis =1, inplace= True)
#data.set_index("time", inplace= True)
num_pickups = data.total
num_green_pickups = data.green
num_yellow_pickups = data.yellow
data.drop(['Unnamed: 0',  'green', 'yellow', 'total'], axis =1, inplace = True)

In [3]:
def series_to_supervised(data, n_in, n_out, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
        
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    
    agg = pd.concat(cols, axis = 1)
    agg.columns = names
    
    if dropnan:
        agg.dropna(inplace = True)
        
    return agg

In [4]:
#normalize data & make LSTM model structure
features_and_target = pd.concat([data, num_pickups], axis = 1)
values = features_and_target.values

values = values.astype('float32')

scaler = MinMaxScaler(feature_range = (0, 1))
scaled = scaler.fit_transform(values)

n_hours = 47

reframed = series_to_supervised(scaled, n_hours, 1)

n_features = features_and_target.shape[1]


In [5]:
# exlcude significantly deviated data
total_values = reframed.values
n_p = np.argmin(total_values[365*24*3*60:, -1])
n_p = n_p + 365*24*3*60

a = total_values[: n_p, :]
b = total_values[n_p + n_hours + 60 + 1 : , :]
values = np.concatenate((a, b), axis=0)

# result variable
result = np.zeros((5, 7))

In [None]:
for i in range (1):
    values = total_values[total_values[:, -8] == total_values[120 + 24*i, 5]]
    n_train_hours = 225257
    train = values[:n_train_hours, :]
    test = values[n_train_hours:, :]
    # split into input and outputs
    n_obs = n_hours * n_features
    train_X, train_y = train[:, :n_obs], train[:, -1]
    test_X, test_y = test[:, :n_obs], test[:, -1]
    # reshape input to be 3D [samples, timesteps, features]
    train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))
    test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))

    # design network
    model = Sequential()
    model.add(LSTM(24, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='adam')

    # fit network
    history = model.fit(train_X, train_y, epochs=100, batch_size=100,
                    validation_data=(test_X, test_y), verbose=2, shuffle=False)
    
    history_loss = pd.DataFrame()
    history_loss['train'] = history.history['loss']
    history_loss['test'] = history.history['val_loss']
    history_loss.plot(figsize=(5,5), fontsize=8,
                title='LSTM Model Loss');
    
    # make a prediction 
    yhat = model.predict(test_X)
    # reshape back to 2d
    test_X = test_X.reshape((test_X.shape[0], -1))

    # invert scaling for forecast# invert 
    inv_yhat = np.concatenate((test[:, 235:239], yhat), axis=1)
    inv_yhat = scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:,-1]

    # invert scaling for actual
    test_y = test_y.reshape((len(test_y), 1))
    inv_y = np.concatenate((test[:, 235:239], test_y), axis=1)
    inv_y = scaler.inverse_transform(inv_y)
    inv_y = inv_y[:,-1]
    inv_yhat_gte_zero = inv_yhat.copy()
    inv_yhat_gte_zero[inv_yhat_gte_zero < 0] = 0

    # calculate r2
    result[0, i] = r2_score(inv_y, inv_yhat)

    # calculate mean absolute error
    result[1, i] = mean_absolute_error(inv_y, inv_yhat)

    # calculate rmse
    rmse = math.sqrt(mean_squared_error(inv_y, inv_yhat))
    result[2, i]= rmse

    # calculate mean absolute error percentage
    maep = np.mean(np.abs((inv_y - inv_yhat)/inv_y))*100
    result[3, i] = maep
    
    #train bias
    yhat_t = model.predict(train_X)
    # reshape back to 2d
    train_X = train_X.reshape((train_X.shape[0], -1))

    # invert scaling for forecast# invert 
    inv_yhat_t = np.concatenate((train[:, 235:239], yhat_t), axis=1)
    inv_yhat_t = scaler.inverse_transform(inv_yhat_t)
    inv_yhat_t = inv_yhat_t[:,-1]

    # invert scaling for actual
    train_y = train_y.reshape((len(train_y), 1))
    inv_y_t = np.concatenate((train[:, 235:239], train_y), axis=1)
    inv_y_t = scaler.inverse_transform(inv_y_t)
    inv_y_t = inv_y_t[:,-1]
    inv_yhat_gte_zero = inv_yhat_t.copy()
    inv_yhat_gte_zero[inv_yhat_gte_zero < 0] = 0

    index = (np.abs((inv_y_t - inv_yhat_t)/inv_y_t) < 1)
    # calculate mean absolute error percentage
    maep = np.mean(np.abs((inv_y_t[index] - inv_yhat_t[index])/inv_y_t[index]))*100
    
    result[4, i] = maep

Train on 225257 samples, validate on 55543 samples
Epoch 1/100
 - 121s - loss: 0.0250 - val_loss: 0.0193
Epoch 2/100
 - 123s - loss: 0.0170 - val_loss: 0.0190
Epoch 3/100
 - 122s - loss: 0.0163 - val_loss: 0.0146
Epoch 4/100
 - 122s - loss: 0.0151 - val_loss: 0.0124
Epoch 5/100
 - 123s - loss: 0.0152 - val_loss: 0.0174
Epoch 6/100
 - 122s - loss: 0.0149 - val_loss: 0.0140
Epoch 7/100
 - 122s - loss: 0.0148 - val_loss: 0.0156
Epoch 8/100
 - 122s - loss: 0.0147 - val_loss: 0.0149
Epoch 9/100
 - 121s - loss: 0.0148 - val_loss: 0.0130
Epoch 10/100
 - 122s - loss: 0.0145 - val_loss: 0.0139
Epoch 11/100
 - 122s - loss: 0.0146 - val_loss: 0.0135
Epoch 12/100
 - 122s - loss: 0.0144 - val_loss: 0.0139
Epoch 13/100
 - 121s - loss: 0.0145 - val_loss: 0.0186
Epoch 14/100
 - 122s - loss: 0.0144 - val_loss: 0.0135
Epoch 15/100
 - 122s - loss: 0.0143 - val_loss: 0.0175
Epoch 16/100
 - 121s - loss: 0.0144 - val_loss: 0.0132
Epoch 17/100
 - 123s - loss: 0.0143 - val_loss: 0.0172
Epoch 18/100
 - 122s - 

In [None]:
result