In [None]:
import os
from pathlib import Path

import networkx as nx

from generate_graph.proceedings_base import crawl_proceedings
from generate_graph.snowball import snowball_generator
from generate_graph.utils import expand_graph

In [2]:
data_dir = Path("../data")
if not os.path.isdir(data_dir):
    os.mkdir(data_dir)

idx_storage = Path("../data/intermediate_data/temp_index.txt")
if not os.path.isfile(idx_storage):
    with open(idx_storage, "w+"): pass

In [None]:
chiwork_base_path = Path("../data/chiwork_base.gml")
expanded_graph_path = Path("../data/chiwork_snowball_abstracts.gml")

if not os.path.isfile(chiwork_base_path):

    with open(chiwork_base_path, "w+"): pass
    chiwork_source = nx.Graph()

    chiwork22_nodes, chiwork22_edges, _ = crawl_proceedings("https://dl.acm.org/doi/proceedings/10.1145/3533406", "https://dl.acm.org")
    chiwork23_nodes, chiwork23_edges, _ = crawl_proceedings("https://dl.acm.org/doi/proceedings/10.1145/3596671", "https://dl.acm.org")

    print(f"Number of Nodes in CHIWORK'22: {len(chiwork22_nodes)}")
    print(f"Number of Edges in CHIWORK'22: {len(chiwork22_edges)}")
    print(f"Number of Nodes in CHIWORK'23: {len(chiwork23_nodes)}")
    print(f"Number of Edges in CHIWORK'23: {len(chiwork23_edges)}\n")

    expand_graph(chiwork_source, chiwork22_nodes, chiwork22_edges)
    expand_graph(chiwork_source, chiwork23_nodes, chiwork23_edges)

    print(f"Number of Nodes: {len(chiwork_source.nodes)}")
    print(f"Number of Edges: {len(chiwork_source.edges)}\n")

    nx.write_gml(chiwork_source, chiwork_base_path)

else:
    chiwork_source = nx.read_gml(chiwork_base_path)

    print(f"Number of Nodes: {len(chiwork_source.nodes)}")
    print(f"Number of Edges: {len(chiwork_source.edges)}\n")


if not os.path.isfile(expanded_graph_path):
    with open(expanded_graph_path, "w+"): pass
    expanded_graph = nx.read_gml(chiwork_base_path)

else:
    expanded_graph = nx.read_gml(expanded_graph_path)

    print(f"Number of Nodes: {len(expanded_graph.nodes)}")
    print(f"Number of Edges: {len(expanded_graph.edges)}\n")

In [None]:
def snowball(graph_source, 
             graph_output,
             idx=False):
    
    nodes_list = list(graph_source.nodes)
    if idx:
        with open(idx_storage) as index_store:
            index_start = index_store.readlines()
            if index_start != []:
                index_start = int(index_start[-1])
                nodes_list = nodes_list[index_start:]

                print(f"Starting from {index_start}")

    for node in nodes_list:
        _ = snowball_generator(graph_output, node)
        print(_)
        if not _: 
            with open(idx_storage, "w") as index_store:
                index_store.write(f"{list(graph_source.nodes).index(node)}")
            break

snowball(chiwork_source, expanded_graph, True)

In [None]:
print(f"Number of Nodes: {len(expanded_graph.nodes)}")
print(f"Number of Edges: {len(expanded_graph.edges)}")

In [None]:
with open(expanded_graph_path, "w") as remove:
    remove.truncate(0)
    
nx.write_gml(expanded_graph, expanded_graph_path)