# ElasticSearch Test

In [1]:
from elasticsearch import Elasticsearch
import pandas as pd
import numpy as np
import datetime
import keras
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model

Using TensorFlow backend.


Create Elasticsearch queue-prediction index

In [2]:
# ignore 400 cause by IndexAlreadyExistsException when creating an index
es = Elasticsearch()
# es = Elasticsearch(
#       ['localhost'],
#       http_auth=(username, 'password'),
#       verify_certs=False,
#       scheme="https",
#       port=443,
# )
es.indices.create(index='queues-prediction', ignore=400) #can be ignored

{'error': {'root_cause': [{'type': 'resource_already_exists_exception',
    'reason': 'index [queues-prediction/r9x-EYlIQf6w4MNP62DwnA] already exists',
    'index_uuid': 'r9x-EYlIQf6w4MNP62DwnA',
    'index': 'queues-prediction'}],
  'type': 'resource_already_exists_exception',
  'reason': 'index [queues-prediction/r9x-EYlIQf6w4MNP62DwnA] already exists',
  'index_uuid': 'r9x-EYlIQf6w4MNP62DwnA',
  'index': 'queues-prediction'},
 'status': 400}

### Load from Queue Index

Match products queue

    "gte" : "now-1d/d", (yesterday)
    "lt" :  "now/d"     (today)

In [3]:
res = es.search(index="queues", body={"query" : {
                                        "bool" : { 
                                          "must" : [
                                            {"match": {
                                                    "name" : "products"}},
                                            {"range": {
                                                    "timestamp":{
                                                        "gte": "2019-12-29",
                                                        "lte": "2019-12-31",
                                                        "format": "yyyy-MM-dd"
                                                  }
                                                }
                                              }
                                          ]
                                        }
                                      }
                                    }, size=1000) #define size

Get _source Data

In [4]:
d = [elem['_source'] for elem in res['hits']['hits']]

In [5]:
for elem in d:
    del elem['items']
    del elem['querytime']

Build Dataframe

In [6]:
df = pd.DataFrame(d)

In [7]:
df.index = df["timestamp"]

KeyError: 'timestamp'

In [None]:
df.index = pd.to_datetime(df.index, format='%Y-%m-%dT%H:%M:%S.%f%z').sort_values()

In [None]:
df.drop(columns=['timestamp', 'name', 'tier'], inplace=True)

Resample Data to 1min Interval 

In [None]:
df = df.resample('15T').mean().ffill()

In [None]:
df = df.fillna(0)

In [None]:
df.head()

## ML Model

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model = load_model('model_lstm_15min_woi.h5')

In [None]:
train = df.values

In [None]:
scaler = MinMaxScaler()
scaler.fit(train)
train = scaler.transform(train)

In [None]:
n_input = 96
n_features = 1

In [None]:
pred_list = []

batch = train[-n_input:].reshape((1, n_input, n_features)) #first batch is the last day in

# for 96 steps, always predict 1 step ahead and add that to the pred_list and update the batch with it
for i in range(n_input):   
    pred_list.append(model.predict(batch)[0]) 
    batch = np.append(batch[:,1:,:],[[pred_list[i]]],axis=1)

In [None]:
pred = np.array(scaler.inverse_transform(pred_list)).reshape(-1)

In [None]:
pred = [int(x) for x in pred]

In [None]:
time_stamps = pd.date_range(start=df.index[-1]+datetime.timedelta(minutes=15), periods=96, freq='15min',tz='utc')

In [None]:
d = {'timestamp': time_stamps, 'size': pred}
pred_df = pd.DataFrame(data=d)

In [None]:
pred_df

In [None]:
count = 0
for index, row in pred_df.iterrows():
    doc_data = {
        'timestamp': row['timestamp'],
        'tier' : 'pic',
        'name' : 'products',
    #     'querytime' : 0,
        'size' : row['size'],
    #     'items' : " ".join(items)
    }
    count += 1
    es.index('queues-prediction', body=doc_data)
    if count % 5 == 0:
        print(str(count) + " Elemente hochgeladen")