In [1]:
import json
import networkx as nx

# Load JSON data

In [2]:
n = 5 # number of files
# format n to have 5 number places
files = [f'events-{str(i).zfill(5)}' for i in range(1, n+1)]

In [3]:
# load data
data = []
for file in files:
    with open(f'./data/{file}.json') as f:
        data.append(json.load(f))

# Create graph

In [4]:
G = nx.DiGraph()

## Add nodes

In [5]:
def get_features(event):
    info = event['info']
    articleCounts, multiLingInfo, eventDate = info['articleCounts'], info['multiLingInfo'], info['eventDate']
    
    return {
        'articleCounts': articleCounts,
        'multiLingInfo': multiLingInfo,
        'eventDate': eventDate
    }

In [6]:
for batch in data:
    for event in batch:
        info, similar_events = event['info'], event['similarEvents']['similarEvents']
        G.add_node(info['uri'], **get_features(event), type='event')
        
        # concepts = info['concepts']
        
        # for concept in concepts:
        #     G.add_node(concept['uri'], **concept, node_type='concept')
        #     G.add_edge(info['uri'], concept['uri'], edge_type='concept', weight=concept['score'])
        
        for similar_event in similar_events:
            G.add_node(similar_event['uri'], similar_event=similar_event, node_type='event')
            G.add_edge(info['uri'], similar_event['uri'], edge_type='similar', weight=similar_event['sim'])
        

In [7]:
degrees = G.degree()
focus_node = max(degrees, key=lambda x: x[1])[0]
radius = 1

ego = nx.ego_graph(G, focus_node, radius=radius)

## Similarity score

We confirmed that the similarity score is symmetric.

In [11]:
# for n1 in G.nodes():
#     for n2 in G.nodes():
#         if n1 == n2:
#             continue
#         if G.has_edge(n1, n2) and G.has_edge(n2, n1):
#             print(n1, n2, G.edges[n1, n2], G.edges[n2, n1])
#             break

15 4029 {'edge_type': 'similar', 'weight': 0.35} {'edge_type': 'similar', 'weight': 0.35}
4029 15 {'edge_type': 'similar', 'weight': 0.35} {'edge_type': 'similar', 'weight': 0.35}
4313 15 {'edge_type': 'similar', 'weight': 0.34} {'edge_type': 'similar', 'weight': 0.34}
3821 15 {'edge_type': 'similar', 'weight': 0.26} {'edge_type': 'similar', 'weight': 0.26}
70 2969 {'edge_type': 'similar', 'weight': 0.84} {'edge_type': 'similar', 'weight': 0.84}
2969 70 {'edge_type': 'similar', 'weight': 0.84} {'edge_type': 'similar', 'weight': 0.84}
2812 70 {'edge_type': 'similar', 'weight': 0.79} {'edge_type': 'similar', 'weight': 0.79}
2407 70 {'edge_type': 'similar', 'weight': 0.71} {'edge_type': 'similar', 'weight': 0.71}
2081 70 {'edge_type': 'similar', 'weight': 0.71} {'edge_type': 'similar', 'weight': 0.71}
2851 70 {'edge_type': 'similar', 'weight': 0.67} {'edge_type': 'similar', 'weight': 0.67}
118 3121 {'edge_type': 'similar', 'weight': 0.31} {'edge_type': 'similar', 'weight': 0.31}
3121 118 