In [3]:
import networkx as nx
from spikexplore import graph_explore
from spikexplore.backends.wikipedia import WikipediaNetwork
from spikexplore.config import SamplingConfig, GraphConfig, DataCollectionConfig, WikipediaConfig


def main():
    wiki_config = WikipediaConfig(user_agent="SpikexploreBot/1.0 (https://github.com/epfl-lts2/spikexplore; lts2@epfl.ch)", lang="en")
    wiki_config.pages_ignored = [
        "BNF (identifier)",
        "Bibcode (identifier)",
        "CANTIC (identifier)",
        "CiNii (identifier)",
        "BNE (identifier)",
        "BIBSYS (identifier)",
        "ArXiv (identifier)",
        "Doi (identifier)",
        "ISBN (identifier)",
        "PMC (identifier)",
        "PMID (identifier)",
        "NDL (identifier)",
        "NKC (identifier)",
        "NLA (identifier)",
        "NLI (identifier)",
        "NLK (identifier)",
        "LCCN (identifier)",
        "LNB (identifier)",
        "MGP (identifier)",
        "NLP (identifier)",
    ]
    sampling_backend = WikipediaNetwork(wiki_config)
    graph_config = GraphConfig(min_degree=1, min_weight=1, community_detection=False)
    data_collection_config = DataCollectionConfig(
        exploration_depth=2, random_subset_mode="percent", random_subset_size=15, expansion_type="coreball", degree=2, max_nodes_per_hop=100
    )
    sampling_config = SamplingConfig(graph_config, data_collection_config)
    initial_nodes = ["Jazz", "Ukraine", "Lausanne"]
    g_sub, _ = graph_explore.explore(sampling_backend, initial_nodes, sampling_config)
    print(
        f"Sampled graph with {g_sub.number_of_nodes()} nodes, {g_sub.number_of_edges()} edges and {nx.number_connected_components(g_sub)} connected components"
    )
    nx.write_gexf(g_sub, "out1.gexf")


In [4]:
main()

Sampled graph with 449 nodes, 5218 edges and 1 connected components
