In [None]:
import sys
from pathlib import Path

project_root = Path.cwd().parents[0]
pkg_parent    = project_root / "core-periphery-detection-master"
sys.path.insert(0, str(pkg_parent))             

import cpnet_new
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
## social networks
# https://networkrepository.com/fb-pages-food.php
# https://networkrepository.com/fb-pages-tvshow.php

## biological networks
# https://networkrepository.com/bio-celegans.php
# https://networkrepository.com/bio-CE-GT.php 

## techonological
# https://networkrepository.com/inf-USAir97.php explanation https://nps.edu/web/faculty-rgera/networkprofilesummaryresources?utm_source=chatgpt.com
# https://networks.skewed.de/net/webkb?utm_source=chatgpt.com webkb_wisconsin_link1 and webkb_wisconsin_cocite

#!/usr/bin/env python3
"""
Load six empirical networks as undirected, unweighted simple graphs (no loops) with NetworkX.

Sources (relative to ../empirical_networks):
- bio-CE-GN/bio-CE-GN.edges              (space-separated, 3 columns -> ignore 3rd)
- bio-celegans/bio-celegans.mtx          (MatrixMarket, skip first 2 lines)
- fb-pages-food/fb-pages-food.edges      (CSV, two columns)
- fb-pages-tvshow/fb-pages-tvshow.edges  (CSV, two columns)
- tech-routers-rf/tech-routers-rf.
- web-edu/web-edu.mtx                    (MatrixMarket, skip first 2 lines)

Each NetworkX Graph has its .name set to the *filename* used as the data source (e.g., "bio-CE-GN.edges").
All graphs are also returned in a dict keyed by that filename.
"""
from pathlib import Path
from typing import Dict
import networkx as nx
from scipy.io import mmread

G_fb_food = nx.read_edgelist('empirical_networks/fb-pages-food/fb-pages-food.edges', delimiter=',')
G_fb_food.remove_edges_from(nx.selfloop_edges(G_fb_food))
matrix_bio_celegans = mmread('empirical_networks/bio-celegans/bio-celegans.mtx')
G_bio_celegans = nx.from_scipy_sparse_array(matrix_bio_celegans)
G_bio_CE_GT = nx.read_edgelist('empirical_networks/bio-CE-GT/bio-CE-GT.edges', data=False)
matrix_rt_retweet = mmread('empirical_networks/rt-retweet/rt-retweet.mtx')
G_rt_retweet = nx.from_scipy_sparse_array(matrix_rt_retweet)
matrix_USAir = mmread('empirical_networks/inf-USAir97/inf-USAir97.mtx')
G_USAir = nx.from_scipy_sparse_array(matrix_USAir)
G_web_wisconsin = nx.read_edgelist('empirical_networks/webkb_wisconsin_link1.csv/edges.csv', delimiter=',', data=False)
G_web_wisconsin.remove_edges_from(nx.selfloop_edges(G_web_wisconsin))


graphs = [G_fb_food,G_bio_celegans,G_bio_CE_GT,G_rt_retweet,G_USAir,G_web_wisconsin]

for fname, G in graphs.items():
    print(f"{fname}: nodes={G.number_of_nodes()}, edges={G.number_of_edges()}, name={G.name}")

## test 
for fname, G in graphs.items():
    if fname=="bio-CE-GT.edges":
        alg = cpnet_new.BE()

        alg.detect(G) 

        x = alg.get_coreness() 

        c = alg.get_pair_id() 
        
        sig_c, sig_x, significant, p_values = cpnet_new.qstest(
            c, x, G, alg, significance_level=0.01, num_of_thread=16
        )
    


bio-CE-GT.edges: nodes=924, edges=3239, name=bio-CE-GT.edges
bio-celegans.mtx: nodes=453, edges=2025, name=bio-celegans.mtx
fb-pages-food.edges: nodes=620, edges=2091, name=fb-pages-food.edges
inf-USAir97.mtx: nodes=332, edges=2126, name=inf-USAir97.mtx
rt-retweet.mtx: nodes=96, edges=117, name=rt-retweet.mtx
edges.csv: nodes=297, edges=1130, name=edges.csv


  8%|â–Š         | 8/100 [06:12<1:11:20, 46.53s/it]


KeyboardInterrupt: 