# Predict Bike Availability using Neural Network

In this report we predict bike availability for future (with horizon time of 15 minutes) using Neural Network.

In [1]:
# load packages
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from IPython.display import display

# load custom modules
import sys
sys.path.append("../data_preprocessing/")
from prepare_data_for_ML import prepare_data_for_ML
import plotting_tools as vs

In [2]:
# show the figures within the notebook
%matplotlib inline

# Select ggplot as style
plt.style.use("ggplot")

## Prepare the Data

In [3]:
# build a custom object for station_id = 2
#nrows = 10002; station_ids = [2]
nrows = None; station_ids = [2]
obj = prepare_data_for_ML(status_data_path="../data/status_time_res_15min.csv",
                          weather_data_path="../data/weather_fixed.csv",
                          nrows=nrows, station_ids=station_ids)

# prepare the data for Random Forestb
df = obj.prepare_data_for_NN()

# display the data
display(df.head())

# show the number of rows and columns
print("This table has {npnts} data points and {col} columns.".format(npnts=df.shape[0], col=df.shape[1]))

Unnamed: 0,station_id,bikes_available,time_of_day,day_of_week,month_of_year,mean_temperature_f,mean_humidity,mean_visibility_miles,mean_wind_speed_mph,precipitation_inches,events,bikes_available_future
0,2,2,1215,3,8,68.0,75.0,10.0,11.0,0.0,Normal,2.0
1,2,2,1230,3,8,68.0,75.0,10.0,11.0,0.0,Normal,2.0
2,2,2,1245,3,8,68.0,75.0,10.0,11.0,0.0,Normal,2.0
3,2,2,1300,3,8,68.0,75.0,10.0,11.0,0.0,Normal,3.0
4,2,3,1315,3,8,68.0,75.0,10.0,11.0,0.0,Normal,3.0


This table has 69878 data points and 12 columns.


### Encode Categorical Features

In [4]:
predictors = df.drop(["station_id", "bikes_available_future"], axis=1)
response = df[['bikes_available_future']]

# One-hot encode the categorical features
predictors = pd.get_dummies(predictors)

# display the predictors
display(predictors.head())

# show the number of rows and columns in predictors dataframe after one-hot-encoding
print("After one-hot-encoding, the number of predictors become {col}.".format(col=predictors.shape[1]))

Unnamed: 0,bikes_available,day_of_week,month_of_year,mean_temperature_f,mean_humidity,mean_visibility_miles,mean_wind_speed_mph,precipitation_inches,time_of_day_0000,time_of_day_0015,...,time_of_day_2245,time_of_day_2300,time_of_day_2315,time_of_day_2330,time_of_day_2345,events_Fog,events_Fog-Rain,events_Normal,events_Rain,events_Rain-Thunderstorm
0,2,3,8,68.0,75.0,10.0,11.0,0.0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,2,3,8,68.0,75.0,10.0,11.0,0.0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,2,3,8,68.0,75.0,10.0,11.0,0.0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,2,3,8,68.0,75.0,10.0,11.0,0.0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,3,3,8,68.0,75.0,10.0,11.0,0.0,0,0,...,0,0,0,0,0,0,0,1,0,0


After one-hot-encoding, the number of predictors become 109.


### Shuffle and Split the Data

In [5]:
from sklearn.model_selection import train_test_split

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(predictors, response, test_size=0.2, random_state=100)

# convert DataFrames into arrays. This step is needed for KerasRegressor that is used later
X_train = X_train.values
X_test = X_test.values
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

# Show the results of the split
print "Training set has {} samples.".format(X_train.shape[0])
print "Testing set has {} samples.".format(X_test.shape[0])

Training set has 55902 samples.
Testing set has 13976 samples.


## Build a Neural Network Model

In [6]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

def baseline_model():
    # build an NN model
    model = Sequential()

    # add the first layer
    model.add(Dense(predictors.shape[1], input_shape=(predictors.shape[1],), activation="relu"))

    # add the output layer
    model.add(Dense(1))

    # compile the model
    model.compile(loss='mean_absolute_error', optimizer='adam', metrics=["mean_absolute_error", "mean_squared_error"])
    
    return model

Using TensorFlow backend.


In [7]:
# set a value for random seed
seed = 100
np.random.seed(seed)

# evaluate model
estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=10, batch_size=20, verbose=1)
kfold_cv = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X_train, y_train, cv=kfold_cv)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

In [10]:
print("Results: mean MAE is %.2f with std. value of (%.2f) " % (results.mean(), results.std()))

Results: mean MAE is 0.40 with std. value of (0.07) 
