<a href="https://colab.research.google.com/github/ChideraU/traffic-Predictor/blob/master/traffic_forecasting_LSTM_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [66]:
# from google.colab import drive
# drive.mount('/content/drive/')

from google.colab import files

%matplotlib inline
import matplotlib.pyplot as pyplot
import pandas as pd
import keras as kr
import numpy as np
import datetime
import sklearn
from sklearn.metrics import mean_squared_error
from math import sqrt

# Frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag = 1):
  df = pd.DataFrame(data)
  columns = [df.shift(i) for i in range(1, lag+1)]
  columns.append(df)
  df = pd.concat(columns, axis = 1)
  df.fillna(0, inplace = True)
  return df

# Create a differenced series
def difference(dataset, interval = 1):
  diff = list()
  for i in range(interval, len(dataset)):
    value = dataset.iloc[i,0] - dataset.iloc[i - interval,0]
    diff.append(value)
  return pd.Series(diff)

# Invert differenced value
def inverse_difference(history, yhat, interval = 1):
  return yhat + history.iloc[-interval,0]

# Scale train and test data to [-1,1]
def scale(train,test):
  # Fit scaler
  scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(-1,1))
  scaler = scaler.fit(train)
  # Transform train
  train = train.reshape(train.shape[0], train.shape[1])
  train_scaled = scaler.transform(train)
  # Transform test
  test = test.reshape(test.shape[0], test.shape[1])
  test_scaled = scaler.transform(test)
  return scaler, train_scaled, test_scaled

# Inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
  new_row = [x for x in X] + [value]
  array = np.array(new_row)
  array = array.reshape(1, len(array))
  inverted = scaler.inverse(array)
  return inverted[0,-1]

# Fit an LSTm network to training data
def fit_lstm(train, batch_size, nb_epoch, neurons):
  X, y = train[:, 0:-1], train[:,-1]
  X = X.reshape(X.shape[0], 1, X.shape[1])
  model = Sequential()
  model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
  model.add(Dense(1))
  model.compile(loss = 'mean_squared_error', optimizer = 'adam')
  for i in range(nb_epoch):
    model.fit(X, y, epochs = 1, batch_size = batch_size, verbose = 0, shuffle = False)
    model.reset_states()
  return model

# Make a one-step forecast
def forecast_lstm(model, batch_size, X):
  X = X.reshape(1, 1, len(X))
  yhat = model.predict(X, batch_size = batch_size)
  return yhat[0,0]
  

# Load the dataset
series = pd.read_csv("/content/drive/My Drive/exp_journey_time_on_north_lanes_02_02_2018.csv")

# Set all values in the date column to datatime datatypes
series['Date'] = pd.to_datetime(series['Date'])

# Set the index to be the date column
series.set_index('Date', inplace = True)

# Transform data to be stationary
raw_values = series.values
diff_values = difference(raw_values,1)

# Transform data to be supervised learning
supervised = timeseries_to_supervised(diff_values,1)
supervised_values = supervised.values

# Split the data into train and test sets
train,test = supervised_values[0:-8], supervised_valudes[-8:]

# Transform the scale of the data
scaler, train_scaled, test_scaled = scale(train,test)

# Fit the model
lstm_model = fit_lstm(train_scaled, 1, 3000, 4)

# Forecast the entire training dataset to build up state for forecasting
train_reshaped = train.scaled[:,0].reshape(len(train_scaled),1,1)
lstm_model.predict(train_reshaped, batch_size = 1)

# Walk-forward validation on the test data
predictions = list()
for i in range(len(test_scaled)):
  # Make one-step forecast
  X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
  yhat = forecast_lstm(lstm_model, 1, X)
  # Invert scaling
  yhat = invert_scale(scaler, X, yhat)
  # Invert differencing 
  yhat = inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
  # Store forecast
  predictions.append(yhat)
  expected = raw_values[len(train) + i + 1]
  print('Month = %d, Predicted = %f, Expected = %f' % (i+1, yhat, exptected))
        
# Report performance
rmse = sqrt(mean_squared_error(raw_values[-8:], predictions))
print('Test RMSE: %3f' % rmse)
# Line plot of observed vs predicted
pyplot.plot(raw_values[-8:])
pyplot.plot(predictions)
pyplot.show()
        


# # Transform to supervised learning 
# #X = series.values
# #supervised = timeseries_to_supervised(X,1)

# # Transform to be stationary
# differenced = difference(series, 1)

# # Invert transform
# inverted = list()
# for i in range(len(differenced)):
#   value = inverse_difference(series, differenced[i], len(series)-i)
#   inverted.append(value)
  
# inverted = pd.Series(inverted)

# # Transform scale
# X = series.values
# X = X.reshape(len(X),1)

# scaled_X = scaler.transform(X)
# scaled_series = pd.Series(scaled_X[:,0])

# # Invert transform
# inverted_X = scaler.inverse_transform(scaled_X)
# inverted_series = pd.Series(inverted_X[:,0])





AttributeError: ignored