In [1]:
import numpy as np
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)
# fix random seed for reproducibility
np.random.seed(7)
# load the dataset
dataframe = read_csv('../data/it-data-4metrics.csv', sep=',')
data = read_csv('LSTM_id2.csv')

In [2]:
dataframe = dataframe.iloc[:,0:4]
# indexNames = dataframe[ dataframe['metric_id'] == dataframe.iloc[95744,0] ].index
# indexNames = dataframe[ dataframe['metric_id'] == dataframe.iloc[117264,0] ].index
# indexNames = dataframe[ dataframe['metric_id'] == data.iloc[70,1] ].index
# indexNames = dataframe[ dataframe['metric_id'] == dataframe.iloc[243333,0] ].index
indexNames = dataframe[ dataframe['metric_id'] == dataframe.iloc[0,0] ].index

dataframe = dataframe.iloc[indexNames].sort_values(by='timestamp', ascending=True).loc[:,['value']]
dataset = dataframe.values
dataset = dataset.astype('float32')

In [3]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

In [4]:
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [5]:
# reshape into X=t and Y=t+1
look_back = 30
x_train, y_train = create_dataset(train, look_back)
x_valid, y_valid = create_dataset(test, look_back)

In [6]:
# reshape input to be [samples, time steps, features]
x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
x_valid = np.reshape(x_valid, (x_valid.shape[0], 1, x_valid.shape[1]))
print(x_valid)
print(y_valid)

[[[28.125 29.625 44.375 ... 13.75  15.375 13.5  ]]

 [[29.625 44.375 16.875 ... 15.375 13.5   13.75 ]]

 [[44.375 16.875 17.875 ... 13.5   13.75  16.125]]

 ...

 [[15.625 18.375 16.75  ...  2.25   2.125 13.75 ]]

 [[18.375 16.75  16.5   ...  2.125 13.75  13.5  ]]

 [[16.75  16.5   27.    ... 13.75  13.5   13.375]]]
[13.75  16.125 14.25  14.125 14.375 15.75  15.    15.    15.125 17.
 18.625 15.375 14.5   14.25  15.125 18.125 16.25  14.375 16.375 14.625
 16.25  15.    15.    14.375 15.75  15.625 16.125 14.5   14.25  15.125
 12.75   6.5   11.5    7.375  8.125  7.375  8.5    5.375  9.875  8.75
  6.75   7.75   6.75  13.     7.875  6.625  7.375 10.375  9.875  9.625
  7.5   12.625  7.5   12.75   8.    10.625 10.875  9.375  9.125  9.875
  9.75  13.5    8.5    5.25  10.375 11.     8.5    7.5    7.5    9.25
 12.5    8.     9.75   8.25   6.625 11.375  7.75   9.375 10.125 10.
 12.625 13.625  9.125 14.625 10.5   12.625  8.875 22.    14.375 11.125
  9.375 11.    10.5   10.875  9.75  14.25  12.75  1

In [7]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=100, batch_size=1, verbose=0)

<keras.callbacks.History at 0x7fb8003d7340>

In [8]:
# make predictions
trainPredict = model.predict(x_train)
testPredict = model.predict(x_valid)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
y_train = scaler.inverse_transform([y_train])
testPredict = scaler.inverse_transform(testPredict)
y_valid = scaler.inverse_transform([y_valid])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_valid[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
testScore = mean_squared_error(y_valid[0], testPredict[:,0], squared=False)
print('Test Score: %.2f RMSE' % (testScore))

NameError: name 'scaler' is not defined

In [None]:
# shift train predictions for plotting
plt.figure(figsize=(18,4))
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()