In [15]:
from neo4j.v1 import GraphDatabase
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
import pandas as pd
import time


class HelloWorldExample(object):

    def __init__(self, uri, user, password):
        self._driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self._driver.close()

    def add_device(self, ip, flow):
        with self._driver.session() as session:
            session.write_transaction(self._create_device, ip, flow)

    def add_link(self, ip1, ip2, flow, hop, std_hop):
        with self._driver.session() as session:
            session.write_transaction(self._create_link, ip1, ip2, flow, hop, std_hop)

    @staticmethod
    def _create_device(tx, ip, flow):
        cr = 'CREATE (n:device:`' + ip + '` { flow:' + str(flow) + ', text:\'' + ip + '\'})'
        tx.run(cr)
        return

    @staticmethod
    def _create_link(tx, ip1, ip2, flow, hop, std_hop):
        cr = 'MATCH (a:device),(b:device) '
        cr += "WHERE a.text = '" + ip1 + "' AND b.text = '" + ip2 + "' "
        cr += "CREATE (a)-[r:hop { flow:" + str(flow) + ", hop:" + str(hop) + ", std_hop:" + str(std_hop) + " }]->(b)"
        tx.run(cr)
        return


interval = 10 * 86400000
now = int(time.time() - 60 * 60) * 1000
past = now - interval

print('past:', past, '\tnow', now)
chicago_index = "ps_trace"
es_chicago = Elasticsearch(['atlas-kibana.mwt2.org:9200'], timeout=60)

query_chicago = {
    "_source": ["rtts", "hops"],
    "query": {
        "bool": {
            "must": [
                {"range": {"timestamp": {"gte": past,  "lte": now,  "format": "epoch_millis"}}},
                {"term": {"src_site": "UKI-SOUTHGRID-OX-HEP"}},
                {"term": {"dest_site": "UKI-SCOTGRID-DURHAM"}}
            ]}}
}

scroll_chicago = scan(client=es_chicago, query=query_chicago, index=chicago_index, scroll='5m',
                      timeout='5m', size=10000)

s = []  # stores source ip
d = []  # stores destination ip
r = []  # stores interval between them

counter = 0
start_time = time.time()
uDev = {}
for entry in scroll_chicago:
    # print(entry)
    # break
    data = entry['_source']
    rtts = data['rtts']
    hops = data['hops']

    if len(rtts) != len(hops):
        print("issue!", rtts, hops)
        break

    for u in hops:
        if u not in uDev:
            uDev[u] = 1
        else:
            uDev[u] += 1

    for i in range(len(hops) - 1):
        s.append(hops[i])
        d.append(hops[i + 1])
        if i:
            r.append(rtts[i] - rtts[i - 1])
        else:
            r.append(rtts[i])

    if not counter % 10000:
        print("Processing event number ", counter)
    counter += 1

print("%s entries processes in %i seconds" % (counter, time.time() - start_time))

df = pd.DataFrame({'s': s, 'd': d, 'r': r})
print(df.head())


uri = 'bolt://128.135.98.56:7687'
user = 'neo4j'
passw = 'trace'
hwe = HelloWorldExample(uri, user, passw)
for ud in uDev:
    print(ud, uDev[ud] / counter)
    hwe.add_device(ud, uDev[ud] / counter)


gr = df.groupby(['s', 'd'])
for name, group in gr:
    print(name, group['r'].count(), group['r'].mean(), group['r'].std())
    hwe.add_link(name[0], name[1], group['r'].count() / counter, group['r'].mean(), group['r'].std())

print("Done.")


past: 1518446274000 	now 1519310274000
Processing event number  0
3660 entries processes in 0 seconds
               d    r              s
0   172.24.25.38  0.6    163.1.5.254
1   172.31.4.242  0.1   172.24.25.38
2  193.63.109.89 -0.2   172.31.4.242
3  193.63.108.97 -0.2  193.63.109.89
4  193.63.108.93  0.2  193.63.108.97


ServiceUnavailable: Cannot acquire connection to Address(host='128.135.98.56', port=7687)