In [None]:
import numpy as np
import time
import matplotlib.pyplot as plt
from pandas import read_csv
import math
import seaborn as sns
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
dataframe = pd.read_csv('../data/metric_date.csv', sep=',')
dataframe = dataframe.iloc[:,0:4]

In [None]:
# Creation of the dictionnary of all the metric_name in association with their metric_id
dic_name = {}
dic_id = {}
for indx in dataframe.index:
    if dataframe['metric_name'][indx] not in dic_name.keys():
        dic_name[dataframe['metric_name'][indx]] = []
    if dataframe['metric_id'][indx] not in dic_name[dataframe['metric_name'][indx]]:
        dic_name[dataframe['metric_name'][indx]].append(dataframe['metric_id'][indx])
        dic_id[dataframe['metric_id'][indx]] = [dataframe['metric_name'][indx]]
keys_name = list(dic_name.keys())
keys_id = list(dic_id.keys())

In [None]:
sample = len(keys_id)
look_back = 10
supr = []
for i in range(sample):
    indx = keys_id[i]
    indexNames = dataframe[ dataframe['metric_id'] == indx ].index
    data = dataframe.iloc[indexNames].sort_values(by='timestamp', ascending=True).loc[:,'value']
    dataset = data.values
    dataset = dataset.astype('float32')
    dic_id[indx].append(dataset)
    dataset = dataset.reshape(-1, 1)
    
    if len(dataset) > look_back*4:    
        # split into train and test sets
        train_size = int(len(dataset) * 0.67)
        train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

        # reshape into X=t and Y=t+1
        x_train, y_train = create_dataset(train, look_back)
        x_valid, y_valid = create_dataset(test, look_back)

        model=Sequential()
        model.add(Dense(units=16, input_dim=look_back, activation='relu'))
        model.add(Dense(16, activation='relu'))
        model.add(Dense(1))
        model.compile(loss='mean_squared_error',  optimizer='adam',metrics = ['mse', 'mae'])

        start_fit = time.time()
        model.fit(x_train,y_train, epochs=100, batch_size=30, verbose=0, validation_data=(x_valid,y_valid), shuffle=False)
        end_fit = time.time()

        # make predictions
        y_pred_train = model.predict(x_train)
        start_pred = time.time()
        y_pred_valid = model.predict(x_valid)
        end_pred = time.time()

        # calculate root mean squared error
        testScore = math.sqrt(mean_squared_error(y_valid, y_pred_valid[:,0]))

        dic_id[indx].append(testScore)
        dic_id[indx].append([x_train, y_train, y_pred_train])
        dic_id[indx].append([x_valid, y_valid, y_pred_valid])
        dic_id[indx].append(end_fit - start_fit)
        dic_id[indx].append(end_pred - start_pred)
    else:
        supr.append(indx)
        dic_name[dic_id[indx][0]].remove(indx)
    if (i+1) % 25 == 0:
        print("%.2f" % ((100/sample)*(i+1)),"% completed...")
for i in supr:
    dic_id.pop(i)

In [None]:
if sample == len(keys_id):
    pd.DataFrame(dic_id).to_csv('DNN_bis.csv', encoding='utf-8')

In [None]:
keys_id = list(dic_id.keys())
sample = len(keys_id)
for i in range(sample):
    indx = keys_id[i]
    fig, ax = plt.subplots()
    # shift train predictions for plotting
    trainPlot = np.empty_like(dic_id[indx][1].reshape(-1,1))
    trainPlot[:, :] = np.nan
    trainPlot[look_back:len(dic_id[indx][3][2])+look_back, :] = dic_id[indx][3][2]
    # shift test predictions for plotting
    validPlot = np.empty_like(dic_id[indx][1].reshape(-1,1))
    validPlot[:, :] = np.nan
    validPlot[len(dic_id[indx][3][2])+(look_back*2)+1:len(dic_id[indx][1])-1, :] = dic_id[indx][4][2]
    # plot baseline and predictions
    ax.set_xlabel(str(i))
    ax.plot(dic_id[indx][1].reshape(-1,1))
    ax.plot(trainPlot)
    ax.plot(validPlot)
plt.show()