## Install all the needed packages

In [1]:
%matplotlib inline

# DataFrame
import pandas as pd

# Elasticsearch
from elasticsearch import Elasticsearch, helpers

# datetime
from datetime import datetime

# isnan()
import math

# plot
import numpy as np
import matplotlib.pyplot as plt

## Create the local raw_data_pool

In [2]:
raw_data_pool = {}

def make_sure(src_site, dest_site):
    if src_site not in raw_data_pool:
        raw_data_pool[src_site] = {}
    if dest_site not in raw_data_pool[src_site]:
        raw_data_pool[src_site][dest_site] = pd.DataFrame()

def put_data(src_site, dest_site, timestamp_epoch, column_type, value):
    make_sure(src_site, dest_site)
    raw_data_pool[src_site][dest_site].set_value(timestamp_epoch, column_type, value)

## Create the elasticsearch connection

In [3]:
es = Elasticsearch(['atlas-kibana.mwt2.org:9200'])

## Generate the common part of the queries

In [5]:
src_site = "source_site"
dest_site = "destination_site"

timestamp = { 'gte': '2016-01-01', 'lt': '2016-06-01' }

my_query = {}
my_query['query'] = {}
my_query['query']['filtered'] = {}
my_query['query']['filtered']['query'] = { "match_all": {} }
my_query['query']['filtered']['filter'] = {}
my_query['query']['filtered']['filter']['bool'] = {}
my_query['query']['filtered']['filter']['bool']['must'] = []
my_query['query']['filtered']['filter']['bool']['must'].append({ 'term': { 'srcSite': src_site } })
my_query['query']['filtered']['filter']['bool']['must'].append({ 'term': { 'destSite': dest_site } })
my_query['query']['filtered']['filter']['bool']['must'].append({ 'range': { 'timestamp': timestamp } })
my_query['fielddata_fields'] = [ 'timestamp' ]

print(my_query)

my_index = "network_weather_2-*"

print(my_index)

{'fielddata_fields': ['timestamp'], 'query': {'filtered': {'query': {'match_all': {}}, 'filter': {'bool': {'must': [{'term': {'srcSite': 'source_site'}}, {'term': {'destSite': 'destination_site'}}, {'range': {'timestamp': {'lt': '2016-06-01', 'gte': '2016-01-01'}}}]}}}}}
network_weather_2-*


## xxx

In [17]:
my_query['query']['filtered']['filter']['bool']['must'][0]['term']['srcSite'] = "Australia-ATLAS"
my_query['query']['filtered']['filter']['bool']['must'][0]['term']['destSite'] = "SFU-LCG2"

my_query

{'fielddata_fields': ['timestamp'], 'query': {'filtered': {'query': {'match_all': {}}, 'filter': {'bool': {'must': [{'term': {'srcSite': 'Australia-ATLAS', 'destSite': 'SFU-LCG2'}}, {'term': {'destSite': 'destination_site'}}, {'range': {'timestamp': {'lt': '2016-06-01', 'gte': '2016-01-01'}}}]}}}}}


In [None]:


scroll = list(helpers.scan(client=es, query=my_query))
count = 0

for res in scroll:
    count += 1
#     print(res)
#     print("====================")
    column_type = res['_type']
    timestamp_epoch = res['fields']['timestamp'][0]
    if column_type == 'latency':
        put_data(src_site, dest_site, timestamp_epoch, 'iso_8601', res['_source']['timestamp'])
        put_data(src_site, dest_site, timestamp_epoch, 'delay_median', res['_source']['delay_median'])
        put_data(src_site, dest_site, timestamp_epoch, 'delay_mean', res['_source']['delay_mean'])
        put_data(src_site, dest_site, timestamp_epoch, 'delay_sd', res['_source']['delay_sd'])
#         print(res['_source']['delay_median'])
#         print(res['_source']['delay_mean'])
#         print(res['_source']['delay_sd'])
#         print(res['fields']['timestamp'][0])
#         print(res['_source']['timestamp'])
    elif column_type == 'packet_loss_rate':
        put_data(src_site, dest_site, timestamp_epoch, 'iso_8601', res['_source']['timestamp'])
        put_data(src_site, dest_site, timestamp_epoch, 'packet_loss', res['_source']['packet_loss'])
#         print(res['_source']['packet_loss'])
#         print(res['fields']['timestamp'][0])
#         print(res['_source']['timestamp'])
    elif column_type == 'throughput':
        put_data(src_site, dest_site, timestamp_epoch, 'iso_8601', res['_source']['timestamp'])
        put_data(src_site, dest_site, timestamp_epoch, 'throughput', res['_source']['throughput'])
#         print(res['_source']['throughput'])
#         print(res['fields']['timestamp'][0])
#         print(res['_source']['timestamp'])
    else:
        print("Error: type should be latency, packet_loss_rate, or throughput. ")
    
#     if count >= 111:
#         break;

print(count)