In [1]:
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
import matplotlib
import pandas as pd


  from pandas import datetime


In [2]:
# be able to save images on server
matplotlib.use('Agg')
from matplotlib import pyplot
import numpy
 


In [3]:
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
 df = pd.DataFrame(data)
 columns = [df.shift(i) for i in range(1, lag+1)]
 columns.append(df)
 df = concat(columns, axis=1)
 df = df.drop(0)
 return df
 


In [4]:
# create a differenced series
def difference(dataset, interval=2):
 diff = list()
 for i in range(interval, len(dataset)):
     value = dataset[i] - dataset[i - interval]
     diff.append(value)
 return Series(diff)
 


In [5]:
# scale train and test data to [-1, 1]
def scale(train, test):
 # fit scaler
 scaler = MinMaxScaler(feature_range=(-1, 1))
 scaler = scaler.fit(train)
 # transform train
 train = train.reshape(train.shape[0], train.shape[1])
 train_scaled = scaler.transform(train)
 # transform test
 test = test.reshape(test.shape[0], test.shape[1])
 test_scaled = scaler.transform(test)
 return scaler, train_scaled, test_scaled
 


In [6]:
# inverse scaling for a forecasted value
def invert_scale(scaler, X, yhat):
 new_row = [x for x in X] + [yhat]
 array = numpy.array(new_row)
 array = array.reshape(1, len(array))
 inverted = scaler.inverse_transform(array)
 return inverted[0, -1]
 


In [7]:
# evaluate the model on a dataset, returns RMSE in transformed units
def evaluate(model, raw_data, scaled_dataset, scaler, offset, batch_size):
 # separate
 X, y = scaled_dataset[:,0:-1], scaled_dataset[:,-1]
 # reshape
 reshaped = X.reshape(len(X), 1, 1)
 # forecast dataset
 output = model.predict(reshaped, batch_size=batch_size)
 # invert data transforms on forecast
 predictions = list()
 for i in range(len(output)):
     yhat = output[i,0]
     # invert scaling
     yhat = invert_scale(scaler, X[i], yhat)
     # invert differencing
     yhat = yhat + raw_data[i]
     # store forecast
     predictions.append(yhat)
     # report performance
 rmse = sqrt(mean_squared_error(raw_data[1:], predictions))
 return rmse
 


In [8]:
# fit an LSTM network to training data
def fit_lstm(train, test, raw, scaler, batch_size, nb_epoch, neurons):
 X, y = train[:, 0:-1], train[:, -1]
 print (X.shape)
 X = X.reshape(X.shape[0], 1, X.shape[1])
 print(X.shape)
 # prepare model
 model = Sequential()
 model.add(LSTM(neurons, batch_input_shape=(batch_size,1,1), stateful=True))
 model.add(Dense(1))
 model.compile(loss='mean_squared_error', optimizer='adam')
 # fit model
 train_rmse, test_rmse = list(), list()
 for i in range(nb_epoch):
     model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
     model.reset_states()
     # evaluate model on train data
     raw_train = raw[-(len(train)+len(test)+1):-len(test)]
     train_rmse.append(evaluate(model, raw_train, train, scaler, 0, batch_size))
     model.reset_states()
     # evaluate model on test data
     raw_test = raw[-(len(test)+1):]
     test_rmse.append(evaluate(model, raw_test, test, scaler, 0, batch_size))
     model.reset_states()
 history = DataFrame()
 history['train'], history['test'] = train_rmse, test_rmse
 return history
 


In [9]:
# run diagnostic experiments
def run():
 # load dataset
 series = read_csv('weekly.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
 series = series.set_index('Issuance Date')
 # transform data to be stationary
 raw_values = series.values
 diff_values = difference(raw_values, 1)
 # transform data to be supervised learning
 supervised = timeseries_to_supervised(diff_values, 1)
 supervised_values = supervised.values
 # split data into train and test-sets
 train, test = supervised_values[0:-12], supervised_values[-12:]
 # transform the scale of the data
 scaler, train_scaled, test_scaled = scale(train, test)
 # fit and evaluate model
 train_trimmed = train_scaled[2:, :]
 # config
 repeats = 10
 n_batch = 4
 n_epochs = 500
 n_neurons = 1
 # run diagnostic tests
 for i in range(repeats):
     history = fit_lstm(train_trimmed, test_scaled, raw_values, scaler, n_batch, n_epochs, n_neurons)
     pyplot.plot(history['train'], color='blue')
     pyplot.plot(history['test'], color='orange')
     print('%d) TrainRMSE=%f, TestRMSE=%f' % (i, history['train'].iloc[-1], history['test'].iloc[-1]))
     pyplot.savefig('epochs_diagnostic.png')
 
# entry point
run()



  series = read_csv('weekly.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)


(454, 1)
(454, 1, 1)


InvalidArgumentError:    Invalid input_h shape: [1,4,1] [1,2,1]
	 [[{{node CudnnRNN}}]]
	 [[sequential/lstm/PartitionedCall]] [Op:__inference_train_function_2445]

Function call stack:
train_function -> train_function -> train_function
