In [None]:
import json
import glob
import os
import time

In [None]:
def read_json_from_file(path):
    with open(path, encoding="utf-8") as json_file:
        data = json.load(json_file)
        
    return data

In [None]:
# returns a Tuple of (a set of IDs and a dict traces)
def get_traces(directory):
    
    # Check files to merge
    file_names = glob.glob(os.path.join(directory, "*.json"))

    # Set of trace IDs (for comparing contents)
    trace_ids = set()
    # Index linking trace IDs to trace objects
    trace_dict = dict()
    
    for file_name in file_names:
        with open(file_name, encoding="utf-8") as traces_file:
            traces = json.load(traces_file)

            for trace in traces:
                # Add ID to IDs set
                trace_ids.add(trace["id"])
                # Add to trace dictionary
                trace_dict.update({trace["id"]: trace})
    
    # trace structure:       
    # [ {id: string, ...}, {id: string, ...}, {id: string, ...}, ...]
            
    return trace_ids, trace_dict

In [None]:
def write_json_to_file(directory, data, filename=None):
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    if (filename is None):
        filename = '{0}.json'.format(time.time())

    with open(os.path.join(directory, filename), 'w', encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False)

In [None]:
# Get external traces tuple (to be merged)
ext_trace_ids, ext_trace_dict = get_traces(os.path.join('..', 'merge_sets', 'tracesold'))
# Internal traces tuple (to be merged to)
int_trace_ids, int_trace_dict = get_traces('traces')

unique_ext_ids = ext_trace_ids.difference(int_trace_ids)

unique_merge_traces = list()

for ID in unique_ext_ids:
    unique_merge_traces.append(ext_trace_dict.get(ID))
    
write_json_to_file("traces", unique_merge_traces, 'merged_{0}.json'.format(time.time()))

In [None]:
# Check successfulness of merge

print("Set to merge is same size as id difference count? {}".format(len(unique_merge_traces) == len(unique_ext_ids)))

full_ids_set = ext_trace_ids.union(int_trace_ids)

union_traces_size = len(ext_trace_dict) + len(int_trace_dict) - len(ext_trace_ids.intersection(int_trace_ids))

print("Merged set is same size as expected? {}".format(len(full_ids_set) == union_traces_size))

In [None]:
# TODO: empty merge folder