In [183]:
PATH = r"C:\Users\jfmgi\Desktop\2AMV10-group-16\2amv10website\src\json"

In [184]:
import os
os.chdir(PATH)

In [185]:
def save_to_file(filename, content):
    with open(filename, 'w') as file:
        file.write(json.dumps(content))

In [220]:
import json
import numpy as np

with open('predsxtnumerical.json') as f:
    json_file = json.load(f)

import sys
np.set_printoptions(threshold=sys.maxsize)

def get_duplicates(file, scale=True):
    """
        Calculates how many object person x and person y have in common (for any x and y)

        Input: 
            file    numerical predictions (which object assigned to which person)
            scale   whether to scale the amount of common item per person, by the amount
                    of items that person has (i.e. people with high amount of items will
                    always have more items in common)
        Output: 
            40x40 matrix, where i,j contains the count of items persons i and j have in common
    """
    matrix = np.zeros((39,39))
    for person in range(1, 40):
        for i in range(1,40):
            if person == i:
                count = -1
            else:
                count = len(set(file[str(person)]) & set(file[str(i)]))
                
            matrix[person-1][i-1] = count
    
    dupls = {}
    i = 1
    for row in matrix:
        dupls[f'person{i}'] = list(row)
        i += 1
    
    return dupls
#print(matrix)
# print(links)
dupls = get_duplicates(json_file)
save_to_file('closeness.json', dupls)


In [321]:
def get_digits(person):
    numbers = [word for word in person if word.isdigit()]
    return ''.join(numbers)
    

def get_edges(dups, cutoff=0):
    """
        Calculates the edges for the network graph

        Input:
            Matrix containing the counts person i and j have in common
        Output:
            Array of dictionaries for edges
    """
    edges = {}
    node_counts = {}
    i = 1
    for person in dups:
        edge_source = person
        arr = dups[person]
        count = 0
        for j in range(1,39):
            if arr[j] > cutoff:
                count += 1
                edge_name = f"edge{i}"
                edge_target = f"person{j}"
                edges[edge_name] = { 
                    "source": edge_source,
                    "target": edge_target,
                    "label": str(arr[j]),
                    "width": max(arr[j] //2, 0.5),
                    "s": get_digits(person),
                    "t": j }
                i += 1
        node_counts[person] = count
    return edges, node_counts

edgs, node_counts = get_edges(dupls)

#save_to_file('edges.json', edgs)


In [328]:
def get_nodes():
    nodes = {}

    ids = []
    for i in range(1, 40):
        amount_of_items = max(len(json_file[str(i)]) // 10, len(json_file[str(i)]))
        node_name = f"person{i}"
        node_label = f"Person {i}"
        size = node_counts[f"person{i}"] // amount_of_items
        if size > 1:
            ids.append(i)
            nodes[node_name] = { "name": node_label, "size": size*4, "color": 'red' }
    # Now we need to clean the edges
    i = 0
    new_edges = []
    for edge in edgs:
        if int(edgs[edge]['s']) in ids and int(edgs[edge]['t']) in ids:
            # Edges with width < 1 are not of interest
            if edgs[edge]['width'] > 0:
                new_edges.append(edgs[edge])

    print(f"Cut amount of edges from {len(edgs)} to {len(new_edges)}")

    return nodes, new_edges

nds, edges = get_nodes()
save_to_file('nodes.json', nds)
save_to_file('edges.json', edges)


Cut amount of edges from 1343 to 46


In [325]:
"""
    Need array with entries of form 
        { from: "A", to: "B", value: 10 },
    
    1. Get 8 most connected people
    2. Get all edges with both source and target in this array
    3. Create entries:
        from:       source
        to:         target
        value:      edge.label
            Entries can be created by looping over all edges found in 2
"""
def get_chord_data(nodes, edges):
    """
        Need edges in the form { from, to, value } 
            TODO: need better way of selecting x most important

        Input:
            given nodes and edges of network graph
        Output:
            array containing dictionary entries for chord diagram
    """
    most_connected = {k: v for k,v in nodes.items() if int(v['size']) > 11}.keys()
    filtered_edges = {k: v for k,v in edges.items() if v['source'] in most_connected and v['target'] in most_connected}

    print(filtered_edges)

    chord_entries = []

    for key, value in filtered_edges.items():
        if value['source'] != value['target']:
            chord_entries.append(
                {
                    'from': value['source'],
                    'to': value['target'],
                    'value': value['label']
                }
            )
            chord_entries.append(
                {
                    'from': value['target'],
                    'to': value['source'],
                    'value': value['label']
                }
            )
    return chord_entries

chord_data = get_chord_data(nds, edgs)
save_to_file('chord_nodes.json', chord_data)

{'edge1': {'source': 'person1', 'target': 'person1', 'label': '3.0', 'width': 1.0, 's': '1', 't': 1}, 'edge8': {'source': 'person1', 'target': 'person8', 'label': '6.0', 'width': 3.0, 's': '1', 't': 8}, 'edge16': {'source': 'person1', 'target': 'person16', 'label': '7.0', 'width': 3.0, 's': '1', 't': 16}, 'edge18': {'source': 'person1', 'target': 'person18', 'label': '1.0', 'width': 0.5, 's': '1', 't': 18}, 'edge19': {'source': 'person1', 'target': 'person19', 'label': '5.0', 'width': 2.0, 's': '1', 't': 19}, 'edge21': {'source': 'person1', 'target': 'person21', 'label': '1.0', 'width': 0.5, 's': '1', 't': 21}, 'edge22': {'source': 'person1', 'target': 'person22', 'label': '7.0', 'width': 3.0, 's': '1', 't': 22}, 'edge26': {'source': 'person1', 'target': 'person26', 'label': '3.0', 'width': 1.0, 's': '1', 't': 26}, 'edge28': {'source': 'person1', 'target': 'person28', 'label': '2.0', 'width': 1.0, 's': '1', 't': 28}, 'edge35': {'source': 'person1', 'target': 'person36', 'label': '2.0',

In [196]:
def get_scatter_data():
    """ 
        Heatmap data needs to be transposed to get the person-data on the y
        and object-data on the x axis.
    """
    scatter_transposed = []
    with open('predsxttuples_translated.json') as f:
        json_heatmap = json.load(f)
    for duo in json_heatmap:
        scatter_transposed.append([duo[1], duo[0]])

    return scatter_transposed
        

heatmap_data = get_scatter_data()
save_to_file('predsxttuples_translated_transposed.json', heatmap_data)

In [205]:
def count_occurrences(value):
    counter = 0
    for row in json_file:
        if value in json_file[row]:
            counter += 1
    return counter

def item_translator(value):
    items =  {
        20: 'miniCards',
        30: 'redDart',
        24: 'pinkCandle',
        14: 'gyroscope',
        8: 'cupcakePaper',
        9: 'eyeball',
        35: 'spiderRing',
        19: 'metalKey',
        40: 'voiceRecorder',
        33: 'sign',
        17: 'lavenderDie',
        16: 'hairRoller',
        21: 'noisemaker',
        42: 'yellowBalloon',
        1: 'blueSunglasses',
        31: 'redWhistle',
        3: 'cactusPaper',
        2: 'brownDie',
        38: 'turtle',
        36: 'stickerBox',
        4: 'canadaPencil',
        39: 'vancouverCards',
        26: 'plaidPencil',
        32: 'rubiksCube',
        29: 'redBow',
        12: 'giftBag',
        34: 'silverStraw',
        6: 'cloudSign',
        25: 'pinkEraser',
        15: 'hairClip',
        28: 'rainbowPens',
        27: 'pumpkinNotes',
        41: 'yellowBag',
        23: 'partyFavor',
        22: 'paperPlate',
        37: 'trophy',
        11: 'gClamp',
        5: 'carabiner',
        0: 'birdCall',
        7: 'cowbell',
        10: 'foamDart',
        13: 'glassBead',
        18: 'legoBracelet'
    }
    return items[value]

for i in range(43):
    print(f"Item {item_translator(i)} occurs {count_occurrences(i)} times")


Item birdCall occurs 17 times
Item blueSunglasses occurs 29 times
Item brownDie occurs 13 times
Item cactusPaper occurs 4 times
Item canadaPencil occurs 5 times
Item carabiner occurs 1 times
Item cloudSign occurs 6 times
Item cowbell occurs 10 times
Item cupcakePaper occurs 15 times
Item eyeball occurs 9 times
Item foamDart occurs 7 times
Item gClamp occurs 8 times
Item giftBag occurs 5 times
Item glassBead occurs 5 times
Item gyroscope occurs 15 times
Item hairClip occurs 18 times
Item hairRoller occurs 3 times
Item lavenderDie occurs 16 times
Item legoBracelet occurs 15 times
Item metalKey occurs 17 times
Item miniCards occurs 12 times
Item noisemaker occurs 10 times
Item paperPlate occurs 10 times
Item partyFavor occurs 24 times
Item pinkCandle occurs 7 times
Item pinkEraser occurs 7 times
Item plaidPencil occurs 21 times
Item pumpkinNotes occurs 6 times
Item rainbowPens occurs 6 times
Item redBow occurs 20 times
Item redDart occurs 15 times
Item redWhistle occurs 5 times
Item rubik

In [207]:
for row in json_file:
    if len(json_file[row]) == 8:
        print(f"{row} has {len(json_file[row])} items")

22 has 8 items
19 has 8 items
36 has 8 items
