In [1]:
#!/usr/bin/env python3
import os
from glob import glob
import json
import pickle

#class LogRecord(object):
#   def __init__ (self, uuid=None, uname=None, hname=None, source, test, start, local_tstamp):

def pickle_to_file(data, filename):
    with open(filename, 'wb') as f:
        pickle.dump(data, f)

def pickle_load_file(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

def process_log(logfile, central_log={}):
    new = 0
    hits = 0
    with open(logfile, 'r') as f:
        for line in f.readlines():
            line = line.strip()
            tstamp = line[0:23]
            try:
                [[_,source],[_,msg]]= [i.split(':',1) for i in line[24:].split(',',1)]
            except ValueError:
                continue

            record = json.loads(msg)
            if record['uuid'] not in central_log:
                new += 1
                central_log[record['uuid']] = {'source' : source}
                central_log[record['uuid']].update(record)
            else:
                hits += 1
                central_log[record['uuid']].update(record)

    return (new, hits)

def read_files():
    data = {}
    for logfile in glob("*log"):
        try:
            new, hits = process_log(logfile, central_log=data)
        except Exception as e:
            print("[ERROR] Failed to process logfile : ", logfile)

        print("Log:{} NewRecords:{} Hits:{}".format(logfile, new, hits))
    
    return data


In [2]:
data = pickle_load_file("data_oct_2021.pkl")

In [3]:
import pprint
cnt = 0
for e in data:
    pprint.pprint(data[e])
    cnt += 1
    if cnt > 10:
        break

{'c_time': None,
 'end': 1526009623.843923,
 'failed': 0,
 'hname': 'a1b4b182f4',
 'parsl_v': '0.5.0',
 'python_v': '3.5.5',
 'sites': 1,
 'source': '34.233.56.198',
 'start': 1526008996.9807324,
 't_apps': 0,
 'test': True,
 'uname': '9611d66d2d',
 'uuid': '2affd23c-8938-47f1-9bbe-88c2ddc10f47'}
{'hname': '628b5e4576',
 'parsl_v': '0.5.2',
 'python_v': '3.6.5',
 'source': '200.20.187.20',
 'start': 1534445687.047495,
 'test': False,
 'uname': '4813494d13',
 'uuid': '8bd85c01-2db2-4f79-80c7-4ca393e4d18f'}
{'c_time': None,
 'end': 1524092402.324931,
 'failed': 0,
 'hname': 'e4156a049b',
 'parsl_v': '0.5.0',
 'python_v': '3.5.5',
 'sites': 1,
 'source': '52.3.55.28',
 'start': 1524092400.1022968,
 't_apps': 4,
 'test': True,
 'uname': '9611d66d2d',
 'uuid': 'f71a228f-562c-41ee-b1ca-b1e2a6e2d932'}
{'c_time': None,
 'end': 1524674932.2870443,
 'failed': 0,
 'hname': '76104c53e4',
 'parsl_v': '0.5.0',
 'python_v': '3.5.5',
 'sites': 0,
 'source': '34.233.56.198',
 'start': 1524674869.375399

In [4]:
from collections import Counter

unique_uuid = set()
end_cnt = Counter(obj['uuid'] for obj in data.values() if 'end' in obj)

for e in data:
    obj = data[e]
    unique_uuid.add(obj['uuid'])

print("Total unique uuids:", len(unique_uuid))
print("Total unique uuids with end:", len(end_cnt))

Total unique uuids: 536066
Total unique uuids with end: 131355


In [5]:
print(len(end_cnt)/len(unique_uuid))

0.24503512627176505


In [6]:
wierd_cnt = Counter(obj['uuid'] for obj in data.values() if 'end' in obj and 'start' not in obj)
print("Total unique uuids with end but no start:", len(wierd_cnt))

Total unique uuids with end but no start: 623


In [7]:
import numpy as np

start_end = [obj['end'] - obj['start'] for obj in data.values() if 'end' in obj and 'start' in obj]

print("Mean:", np.mean(start_end))
print("Std:", np.std(start_end))
print("Min:", np.min(start_end))
print("Max:", np.max(start_end))


Mean: 745.5609406649937
Std: 9190.300066370284
Min: 0.00035071372985839844
Max: 1730707.6883428097
