## Import all the needed packages

In [11]:
# DataFrame
import pandas as pd

# Elasticsearch
from elasticsearch import Elasticsearch

## Create the local raw_data_pool

In [2]:
raw_data_pool = {}

def make_sure(src_site, dest_site):
    if src_site not in raw_data_pool:
        raw_data_pool[src_site] = {}
    if dest_site not in raw_data_pool[src_site]:
        raw_data_pool[src_site][dest_site] = pd.DataFrame()

def put_data(src_site, dest_site, timestamp, column_type, value):
    make_sure(src_site, dest_site)
    raw_data_pool[src_site][dest_site].set_value(timestamp, column_type, value)

## Create the elasticsearch connection

In [12]:
es = Elasticsearch(['atlas-kibana.mwt2.org:9200'])
my_index = "network_weather_2-*"
my_query = {}
my_timeout = 600

In [22]:
my_query = \
{
  "size": 0,
  "sort": [
    {
      "timestamp": {
        "order": "desc",
        "unmapped_type": "boolean"
      }
    }
  ],
  "query": {
    "filtered": {
      "query": {
        "query_string": {
          "analyze_wildcard": True,
          "lowercase_expanded_terms": False,
          "query": "*"
        }
      },
      "filter": {
        "bool": {
          "must": [
            {
              "query": {
                "match": {
                  "srcSite": {
                    "query": "Australia-ATLAS",
                    "type": "phrase"
                  }
                }
              }
            },
            {
              "query": {
                "match": {
                  "destSite": {
                    "query": "SFU-LCG2",
                    "type": "phrase"
                  }
                }
              }
            },
            {
              "range": {
                "timestamp": {
                  "gte": 1454306400000,
                  "lte": 1456812000000,
                  "format": "epoch_millis"
                }
              }
            }
          ],
          "must_not": []
        }
      }
    }
  },
  "aggs": {
    "2": {
      "date_histogram": {
        "field": "timestamp",
        "interval": "1d",
        "time_zone": "America/Chicago",
        "min_doc_count": 0,
        "extended_bounds": {
          "min": 1454306400000,
          "max": 1456812000000
        }
      }
    }
  },
  "fields": [
    "*",
    "_source"
  ],
  "script_fields": {
    "throughput [Gbps]": {
      "script": "doc['throughput'].value/1073741824.0",
      "lang": "expression"
    }
  },
  "fielddata_fields": [
    "timestamp"
  ]
}

In [14]:
my_query

{'aggs': {'2': {'date_histogram': {'extended_bounds': {'max': 1456812000000,
     'min': 1454306400000},
    'field': 'timestamp',
    'interval': '1d',
    'min_doc_count': 0,
    'time_zone': 'America/Chicago'}}},
 'fielddata_fields': ['timestamp'],
 'fields': ['*', '_source'],
 'query': {'filtered': {'filter': {'bool': {'must': [{'query': {'match': {'srcSite': {'query': 'Australia-ATLAS',
          'type': 'phrase'}}}},
      {'query': {'match': {'destSite': {'query': 'SFU-LCG2',
          'type': 'phrase'}}}},
      {'range': {'timestamp': {'format': 'epoch_millis',
         'gte': 1454306400000,
         'lte': 1456812000000}}}],
     'must_not': []}},
   'query': {'query_string': {'analyze_wildcard': True,
     'lowercase_expanded_terms': False,
     'query': '*'}}}},
 'script_fields': {'throughput [Gbps]': {'lang': 'expression',
   'script': "doc['throughput'].value/1073741824.0"}},
 'size': 10000,
 'sort': [{'timestamp': {'order': 'desc', 'unmapped_type': 'boolean'}}]}

In [15]:
type(my_query)

dict

In [19]:
my_query["query"]["filtered"]["filter"]

{'bool': {'must': [{'query': {'match': {'srcSite': {'query': 'Australia-ATLAS',
       'type': 'phrase'}}}},
   {'query': {'match': {'destSite': {'query': 'SFU-LCG2', 'type': 'phrase'}}}},
   {'range': {'timestamp': {'format': 'epoch_millis',
      'gte': 1454306400000,
      'lte': 1456812000000}}}],
  'must_not': []}}

In [23]:
response = es.search(index=my_index, body=my_query, request_timeout=600)

In [24]:
response

{'_shards': {'failed': 5,
  'failures': [{'index': 'network_weather_2-2016.1.16',
    'node': 'TxVZqrDUTaGMLEC3wuZwwg',
    'reason': {'caused_by': {'reason': 'Field [throughput] used in expression does not exist in mappings',
      'type': 'script_exception'},
     'reason': "Error during search with inline script [doc['throughput'].value/1073741824.0] using lang [expression]",
     'type': 'script_exception'},
    'shard': 0}],
  'successful': 895,
  'total': 900},
 'aggregations': {'2': {'buckets': [{'doc_count': 527,
     'key': 1454306400000,
     'key_as_string': '2016-02-01T00:00:00.000-06:00'},
    {'doc_count': 571,
     'key': 1454392800000,
     'key_as_string': '2016-02-02T00:00:00.000-06:00'},
    {'doc_count': 754,
     'key': 1454479200000,
     'key_as_string': '2016-02-03T00:00:00.000-06:00'},
    {'doc_count': 746,
     'key': 1454565600000,
     'key_as_string': '2016-02-04T00:00:00.000-06:00'},
    {'doc_count': 1029,
     'key': 1454652000000,
     'key_as_string':

In [None]:
def generate_query