In [13]:
import json
from tld import get_tld

# Declare the file that needs to be loaded
filePath = "../data/timvlummens.github.io_test.json"
outputPath = "SortedOutput/timvlummens.github.io_test.json"

In [14]:
# Load the data from the .json file
f = open(filePath, encoding="utf-8") 
allData = json.load(f)
f.close()

In [15]:
# Sort the output from event listeners per domain

srsData = allData["data"]["srs"][0]

# {key = domain, value = {key = url, value = {listenerTypes}}}
sortedListeners = {}

for listener in srsData:
    # print(listener)

    res = get_tld(listener["url"], as_object=True, fail_silently=True)
    if res is None:
        # print(listener["url"])
        continue
    
    # Check if the event listener is set to once
    if listener["once"]:
        continue

    if res.domain not in sortedListeners:
        # domain has no existing entries
        sortedListeners[res.domain] = {listener["url"] : {listener["listenerType"]}}
        continue

    # domain already has entries
    if listener["url"] in sortedListeners[res.domain]:
        # url already has entries, add new listener type
        sortedListeners[res.domain][listener["url"]].add(listener["listenerType"])
    else:
        # url has no entries, add it with the corresponding listener
        sortedListeners[res.domain][listener["url"]] = {listener["listenerType"]}
    
# print(sortedListeners)
        

In [16]:
# Sort the output from requests per initiator domain

requestData = allData["data"]["requests"]

# {key = domain, value = {key = url, value = [messages]}}
sortedRequests = {}

for request in requestData:
    for initiator in request["initiators"]:

        res = get_tld(initiator, as_object=True, fail_silently=True)
        if res is None:
            # print(initiator)
            continue

        if res.domain not in sortedRequests:
            # domain has no existing entries
            sortedRequests[res.domain] = {initiator : [request]}
            continue

        # domain already has entries
        if initiator in sortedRequests[res.domain]:
            # url already has entries, add new listener type
            sortedRequests[res.domain][initiator].append(request)
        else:
            # url has no entries, add it with the corresponding listener
            sortedRequests[res.domain][initiator] = [request]
            


In [17]:
# Sort the output from apis per initiator domain

apiData = allData["data"]["apis"]
callStats = apiData["callStats"]
inputResults = apiData["inputElementResults"]

# {key = domain, value = {key = url, value = [count, [timestamps]]}
sortedApis = {}

"""
for url in callStats:
    res = get_tld(url, as_object=True, fail_silently=True)
    if res is None:
        # print(initiator)
        continue

    if res.domain not in sortedApis:
        # domain has no existing entries
        sortedApis[res.domain] = {url : callStats[url]["Input element value"]}
    
    else:
        # domain has entries, add url with the corresponding count
        sortedApis[res.domain][url] = callStats[url]["Input element value"]
"""

for call in inputResults:
    if "source" in call:
        url = call["source"]
        res = get_tld(url, as_object=True, fail_silently=True)

        if res is None:
            continue

        if res.domain not in sortedApis:
            # domain has no existing entries
            sortedApis[res.domain] = {url : {"count": 1, "timestamps": []}}
            if "timestamp" in call:
                sortedApis[res.domain][url]["timestamps"].append(call["timestamp"])
            continue

        if url in sortedApis[res.domain]:
            sortedApis[res.domain][url]["count"] += 1
            if "timestamp" in call:
                sortedApis[res.domain][url]["timestamps"].append(call["timestamp"])
        else:
            sortedApis[res.domain][url] = {"count": 1, "timestamps": []}
            if "timestamp" in call:
                sortedApis[res.domain][url]["timestamps"].append(call["timestamp"])

        


In [18]:
# Sort output from filling in fields
fillData = srsData = allData["data"]["srs"][1]
sortedFill = {"filledFields": fillData[0], "startTime": fillData[1], "stopTime": fillData[2], "listOfFilledTypes": fillData[3], "failedFields": fillData[4]}

In [19]:
# Create combined dictionary for output

finalDict = {}

finalDict["srs"] = sortedListeners
finalDict["requests"] = sortedRequests
finalDict["apis"] = sortedApis 
finalDict["filled"] = sortedFill

In [20]:
# Save output
with open(outputPath, 'w') as fp:
    json.dump(finalDict, fp, default=tuple, sort_keys=True, indent=4)
    fp.close()