In [1]:
import os
from pathlib import Path

import networkx as nx

from generate_graph.proceedings_base import crawl_proceedings
from generate_graph.snowball import snowball_generator
from generate_graph.utils import expand_graph

In [2]:
data_dir = Path("../data")
if not os.path.isdir(data_dir):
    os.mkdir(data_dir)

idx_storage = Path("../data/temp_index.txt")
if not os.path.isfile(idx_storage):
    with open(idx_storage, "w+"): pass

In [3]:
chiwork_base_path = Path("../data/chiwork_base.gml")
expanded_graph_path = Path("../data/expanded_graph.gml")

if os.path.isfile(chiwork_base_path):

    with open(chiwork_base_path, "w+"): pass
    with open(expanded_graph_path, "w+"): pass

    chiwork_source = nx.Graph()
    expanded_graph = nx.Graph()

    chiwork22_nodes, chiwork22_edges, _ = crawl_proceedings("https://dl.acm.org/doi/proceedings/10.1145/3533406", "https://dl.acm.org")
    chiwork23_nodes, chiwork23_edges, _ = crawl_proceedings("https://dl.acm.org/doi/proceedings/10.1145/3596671", "https://dl.acm.org")

    print(f"Number of Nodes in CHIWORK'22: {len(chiwork22_nodes)}")
    print(f"Number of Edges in CHIWORK'22: {len(chiwork22_edges)}")
    print(f"Number of Nodes in CHIWORK'23: {len(chiwork23_nodes)}")
    print(f"Number of Edges in CHIWORK'23: {len(chiwork23_edges)}\n")

    expand_graph(chiwork_source, chiwork22_nodes, chiwork22_edges)
    expand_graph(chiwork_source, chiwork23_nodes, chiwork23_edges)

    print(f"Number of Nodes: {len(chiwork_source.nodes)}")
    print(f"Number of Edges: {len(chiwork_source.edges)}\n")

    nx.write_gml(chiwork_source, chiwork_base_path)

else:
    chiwork_source = nx.read_gml(chiwork_base_path)
    expanded_graph = nx.read_gml(expanded_graph_path)

https://doi.org/10.1145/3533406
https://doi.org/10.1145/3596671
https://doi.org/10.1145/3596671
Number of Nodes in CHIWORK'22: 82
Number of Edges in CHIWORK'22: 220
Number of Nodes in CHIWORK'23: 49
Number of Edges in CHIWORK'23: 91

Number of Nodes: 117
Number of Edges: 307

{}
[('https://dl.acm.org/profile/99659365633', 'https://dl.acm.org/profile/99660481980', {'paperinfo': ['10.1145/3533406.3533409', 'The end of the active work break? Remote work, sedentariness and the role of technology in creating active break-taking norms', 'https://doi.org/10.1145/3533406.3533409', 'https://doi.org/10.1145/3533406']}), ('https://dl.acm.org/profile/99659365633', 'https://dl.acm.org/profile/81508705985', {'paperinfo': ['10.1145/3533406.3533409', 'The end of the active work break? Remote work, sedentariness and the role of technology in creating active break-taking norms', 'https://doi.org/10.1145/3533406.3533409', 'https://doi.org/10.1145/3533406']}), ('https://dl.acm.org/profile/99659365633', 'h

In [None]:
def snowball(graph_source, 
             graph_output,
             idx=False):
    
    nodes_list = list(graph_source.nodes)
    if idx:
        with open(idx_storage) as index_store:
            index_start = index_store.readlines()
            if index_start != []:
                index_start = int(index_start[-1])
                nodes_list = nodes_list[index_start:]

    for node in nodes_list:
        _ = snowball_generator(graph_output, node)
        if not _: 
            with open(idx_storage, "w") as index_store:
                index_store.write(f"{list(graph_source.nodes).index(node)}")
            break

snowball(chiwork_source, expanded_graph, True)

print(f"Number of Nodes: {len(expanded_graph.nodes)}")
print(f"Number of Edges: {len(expanded_graph.edges)}")

nx.write_gpickle(expand_graph, expanded_graph_path)