# Knowledge Graph extraction from documents
Based on https://github.com/lamm-mit/GraphReasoning

This notebook creates a knowledge graph using functions in the GraphReasoning_Mod module which is adpated from https://github.com/lamm-mit/GraphReasoning to work with huggingface endpoints and for use with huggingface documents. 
- documents are scrapped using data.ipynb from pdfs, blogs, and youtube videos transcripts. The only metadata used in the knowledge graph creating is adding relationships between source document and node.
- LLM model Mistral-Nemo-Instruct-2407
- Embedding model: dunzhang/stella_en_1.5B_v5 (chosen based on size and position on leaderboard DEC 2024)
- KG created with refinement loops in when identifying nodes. 
- KG uses simplify graph and additonal tools in GraphReasoning_Mod

## Setup

In [9]:
%pip install -r requirements.txt -q

Note: you may need to restart the kernel to use updated packages.


In [10]:
import GraphReasoning_Mod
import importlib

In [11]:
# Reload the module to reflect any changes
importlib.reload(GraphReasoning_Mod)

# Re-import all objects from the module
from GraphReasoning_Mod import *

In [12]:
from langchain_huggingface import HuggingFaceEndpoint

#Initialize the model endpoint
HOST_URL_INF = ":8080"
MAX_NEW_TOKENS = 1500

TEMPERATURE = 0.2
TIMEOUT = 180
TOP_P = .9

llm = HuggingFaceEndpoint(
    endpoint_url=HOST_URL_INF,
    task="text-generation",
    max_new_tokens=MAX_NEW_TOKENS,
    do_sample=False,
    temperature = TEMPERATURE,
    timeout=TIMEOUT,
    top_p=TOP_P
)
#print(llm.invoke("What is HuggingFace?"))

In [13]:
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "dunzhang/stella_en_1.5B_v5" #"BAAI/bge-small-en-v1.5" #dunzhang/stella_en_1.5B_v5
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embd = HuggingFaceEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

## Import Documents

In [6]:
import pickle
import os

# load pickled documents
pickle_file_path = 'data/storage/full_all_documents.pkl'
if os.path.exists(pickle_file_path):
    with open(pickle_file_path, 'rb') as f:
        all_pdf_docs, all_yt_docs, all_blog_docs = pickle.load(f)

#check if the documents are loaded
print("Number of PDF documents:", len(all_pdf_docs))
print("Number of YouTube documents:", len(all_yt_docs))
print("Number of blog documents:", len(all_blog_docs))

#print dictionary keys from metadata
print("Metadata keys for PDF documents:", all_pdf_docs[0].metadata.keys())
print("Metadata keys for yt documents:", all_yt_docs[0].metadata.keys())
print("Metadata keys for blog documents:", all_blog_docs[0].metadata.keys())

# Combine all documents into a single list
all_docs = all_pdf_docs+  all_yt_docs+  all_blog_docs

print(f"Total number of documents: {len(all_docs)}")


Number of PDF documents: 2048
Number of YouTube documents: 442
Number of blog documents: 11
Metadata keys for PDF documents: dict_keys(['source', 'page'])
Metadata keys for yt documents: dict_keys(['source', 'title', 'description', 'upload_date', 'duration', 'uploader', 'view_count', 'like_count', 'dislike_count', 'categories', 'tags', 'sequence'])
Metadata keys for blog documents: dict_keys(['source', 'title', 'description', 'language'])
Total number of documents: 2501


## Create create and save networkx graph

In [7]:

# Initialize variables
G_existing = None
existing_node_embeddings = None
failed_batches = []  
output_directory = 'data_w_refine'


# Process documents in chunks
chunk_size = 1500
batch_size = 10

# Split all_docs into batches of size batch_size
doc_batches = [all_docs[i:i + batch_size] for i in range(0, len(all_docs), batch_size)]

for batch_idx, doc_batch in tqdm(enumerate(doc_batches), total=len(doc_batches), desc="Processing batches..."):
    try:
        G_existing, existing_node_embeddings, res = add_new_subgraph_from_docs(
            input_docs=doc_batch,
            llm=llm,
            embd=embd,
            data_dir_output=f"./{output_directory}/",
            verbatim=False,
            size_threshold=10,
            chunk_size=chunk_size,
            do_Louvain_on_new_graph=True,
            include_contextual_proximity=False,
            repeat_refine=2,
            similarity_threshold=0.95,
            do_simplify_graph=True,
            return_only_giant_component=False,
            save_common_graph=False,
            G_exisiting=G_existing,
            graph_GraphML_exisiting=None,
            existing_node_embeddings=existing_node_embeddings
        )

        print(f"Processed batch {batch_idx}, updated graph stats:", res)
        with open(f'{output_directory}/embeddings.pkl', 'wb') as f:
            pickle.dump(existing_node_embeddings, f)
        with open(f'{output_directory}/failed_batches.pkl', 'wb') as f:
            pickle.dump(failed_batches, f)
    except Exception as e:
        # Log the failed batch index
        failed_batches.append(batch_idx)
        print(f"Error processing batch {batch_idx} with batch size {batch_size}: {e}")

# Final graph statistics and saving
print("Final graph statistics:", res if 'res' in locals() else "No successful batches")
print("Failed batch indices:", failed_batches)

Processing batches...:   0%|          | 1/251 [10:06<42:06:55, 606.46s/it]

Processed batch 0, updated graph stats: ({'Number of Nodes': 28, 'Number of Edges': 70, 'Average Degree': 5.0, 'Density': 0.18518518518518517, 'Connected Components': 1, 'Number of Communities': 5}, False)


Processing batches...:   1%|          | 2/251 [23:44<50:32:41, 730.77s/it]

Processed batch 1, updated graph stats: ({'Number of Nodes': 55, 'Number of Edges': 120, 'Average Degree': 4.363636363636363, 'Density': 0.08080808080808081, 'Connected Components': 1, 'Number of Communities': 7}, False)


Processing batches...:   1%|          | 3/251 [35:58<50:26:39, 732.26s/it]

Processed batch 2, updated graph stats: ({'Number of Nodes': 91, 'Number of Edges': 227, 'Average Degree': 4.989010989010989, 'Density': 0.05543345543345543, 'Connected Components': 1, 'Number of Communities': 7}, False)


Processing batches...:   2%|▏         | 4/251 [49:05<51:44:23, 754.10s/it]

Processed batch 3, updated graph stats: ({'Number of Nodes': 107, 'Number of Edges': 268, 'Average Degree': 5.009345794392523, 'Density': 0.047257979192382296, 'Connected Components': 1, 'Number of Communities': 7}, False)


Processing batches...:   2%|▏         | 5/251 [1:05:05<56:35:02, 828.06s/it]

Processed batch 4, updated graph stats: ({'Number of Nodes': 115, 'Number of Edges': 293, 'Average Degree': 5.095652173913043, 'Density': 0.04469870327993898, 'Connected Components': 1, 'Number of Communities': 6}, False)


Processing batches...:   2%|▏         | 6/251 [1:19:10<56:45:16, 833.95s/it]

Processed batch 5, updated graph stats: ({'Number of Nodes': 120, 'Number of Edges': 314, 'Average Degree': 5.233333333333333, 'Density': 0.04397759103641457, 'Connected Components': 1, 'Number of Communities': 7}, False)


Processing batches...:   3%|▎         | 7/251 [1:34:39<58:37:52, 865.05s/it]

Processed batch 6, updated graph stats: ({'Number of Nodes': 130, 'Number of Edges': 339, 'Average Degree': 5.2153846153846155, 'Density': 0.04042933810375671, 'Connected Components': 1, 'Number of Communities': 9}, False)


Processing batches...:   3%|▎         | 8/251 [1:50:01<59:36:36, 883.11s/it]

Processed batch 7, updated graph stats: ({'Number of Nodes': 138, 'Number of Edges': 360, 'Average Degree': 5.217391304347826, 'Density': 0.03808314820691844, 'Connected Components': 1, 'Number of Communities': 9}, False)


Processing batches...:   4%|▎         | 9/251 [2:02:42<56:47:41, 844.88s/it]

Processed batch 8, updated graph stats: ({'Number of Nodes': 149, 'Number of Edges': 389, 'Average Degree': 5.221476510067114, 'Density': 0.035280246689642664, 'Connected Components': 1, 'Number of Communities': 8}, False)


Processing batches...:   4%|▍         | 10/251 [2:18:24<58:34:41, 875.03s/it]

Processed batch 9, updated graph stats: ({'Number of Nodes': 162, 'Number of Edges': 422, 'Average Degree': 5.209876543209877, 'Density': 0.03235948163484395, 'Connected Components': 1, 'Number of Communities': 8}, False)


Processing batches...:   4%|▍         | 11/251 [2:32:02<57:10:41, 857.67s/it]

Processed batch 10, updated graph stats: ({'Number of Nodes': 178, 'Number of Edges': 457, 'Average Degree': 5.134831460674158, 'Density': 0.029010347235447216, 'Connected Components': 1, 'Number of Communities': 7}, False)


Processing batches...:   5%|▍         | 12/251 [2:46:19<56:55:21, 857.41s/it]

Processed batch 11, updated graph stats: ({'Number of Nodes': 188, 'Number of Edges': 488, 'Average Degree': 5.191489361702128, 'Density': 0.027761975196268062, 'Connected Components': 1, 'Number of Communities': 7}, False)


Processing batches...:   5%|▌         | 13/251 [3:03:01<59:34:05, 901.03s/it]

Processed batch 12, updated graph stats: ({'Number of Nodes': 222, 'Number of Edges': 575, 'Average Degree': 5.18018018018018, 'Density': 0.023439729322082263, 'Connected Components': 1, 'Number of Communities': 9}, False)


Processing batches...:   6%|▌         | 14/251 [3:18:05<59:22:27, 901.89s/it]

Processed batch 13, updated graph stats: ({'Number of Nodes': 232, 'Number of Edges': 606, 'Average Degree': 5.224137931034483, 'Density': 0.022615315718763993, 'Connected Components': 1, 'Number of Communities': 14}, False)


Processing batches...:   6%|▌         | 15/251 [3:34:10<60:22:51, 921.07s/it]

Processed batch 14, updated graph stats: ({'Number of Nodes': 258, 'Number of Edges': 664, 'Average Degree': 5.147286821705427, 'Density': 0.02002835339184991, 'Connected Components': 1, 'Number of Communities': 9}, False)


Processing batches...:   6%|▋         | 16/251 [3:50:20<61:04:33, 935.63s/it]

Processed batch 15, updated graph stats: ({'Number of Nodes': 280, 'Number of Edges': 716, 'Average Degree': 5.114285714285714, 'Density': 0.018330773169482846, 'Connected Components': 1, 'Number of Communities': 10}, False)


Processing batches...:   7%|▋         | 17/251 [4:05:49<60:41:29, 933.72s/it]

Processed batch 16, updated graph stats: ({'Number of Nodes': 286, 'Number of Edges': 731, 'Average Degree': 5.111888111888112, 'Density': 0.017936449515396885, 'Connected Components': 1, 'Number of Communities': 9}, False)


Processing batches...:   7%|▋         | 18/251 [4:21:35<60:39:57, 937.33s/it]

Processed batch 17, updated graph stats: ({'Number of Nodes': 314, 'Number of Edges': 808, 'Average Degree': 5.146496815286624, 'Density': 0.016442481837976437, 'Connected Components': 1, 'Number of Communities': 13}, False)


Processing batches...:   8%|▊         | 19/251 [4:35:29<58:24:41, 906.38s/it]

Processed batch 18, updated graph stats: ({'Number of Nodes': 332, 'Number of Edges': 860, 'Average Degree': 5.180722891566265, 'Density': 0.015651730790230408, 'Connected Components': 1, 'Number of Communities': 12}, False)


Processing batches...:   8%|▊         | 20/251 [4:49:31<56:55:38, 887.18s/it]

Processed batch 19, updated graph stats: ({'Number of Nodes': 345, 'Number of Edges': 893, 'Average Degree': 5.176811594202898, 'Density': 0.01504887091338052, 'Connected Components': 1, 'Number of Communities': 10}, False)


Processing batches...:   8%|▊         | 21/251 [5:04:48<57:15:04, 896.11s/it]

Processed batch 20, updated graph stats: ({'Number of Nodes': 353, 'Number of Edges': 920, 'Average Degree': 5.212464589235127, 'Density': 0.014808138037599793, 'Connected Components': 1, 'Number of Communities': 12}, False)


Processing batches...:   9%|▉         | 22/251 [5:19:41<56:56:06, 895.05s/it]

Processed batch 21, updated graph stats: ({'Number of Nodes': 368, 'Number of Edges': 957, 'Average Degree': 5.201086956521739, 'Density': 0.014171899064091931, 'Connected Components': 1, 'Number of Communities': 10}, False)


Processing batches...:   9%|▉         | 23/251 [5:35:39<57:53:45, 914.15s/it]

Processed batch 22, updated graph stats: ({'Number of Nodes': 381, 'Number of Edges': 994, 'Average Degree': 5.217847769028872, 'Density': 0.013731178339549661, 'Connected Components': 1, 'Number of Communities': 16}, False)


Processing batches...:  10%|▉         | 24/251 [5:49:42<56:16:54, 892.58s/it]

Processed batch 23, updated graph stats: ({'Number of Nodes': 392, 'Number of Edges': 1021, 'Average Degree': 5.209183673469388, 'Density': 0.013322720392504829, 'Connected Components': 1, 'Number of Communities': 18}, False)


Processing batches...:  10%|▉         | 25/251 [6:01:00<52:00:05, 828.34s/it]

Processed batch 24, updated graph stats: ({'Number of Nodes': 401, 'Number of Edges': 1043, 'Average Degree': 5.201995012468828, 'Density': 0.013004987531172069, 'Connected Components': 1, 'Number of Communities': 19}, False)


Processing batches...:  10%|█         | 26/251 [6:16:00<53:06:41, 849.78s/it]

Processed batch 25, updated graph stats: ({'Number of Nodes': 416, 'Number of Edges': 1086, 'Average Degree': 5.221153846153846, 'Density': 0.01258109360518999, 'Connected Components': 1, 'Number of Communities': 17}, False)


Processing batches...:  11%|█         | 27/251 [6:29:04<51:38:41, 830.01s/it]

Processed batch 26, updated graph stats: ({'Number of Nodes': 425, 'Number of Edges': 1107, 'Average Degree': 5.209411764705882, 'Density': 0.012286348501664816, 'Connected Components': 1, 'Number of Communities': 15}, False)


Processing batches...:  11%|█         | 28/251 [6:42:36<51:04:57, 824.65s/it]

Processed batch 27, updated graph stats: ({'Number of Nodes': 427, 'Number of Edges': 1114, 'Average Degree': 5.217798594847775, 'Density': 0.012248353509032335, 'Connected Components': 1, 'Number of Communities': 18}, False)


Processing batches...:  12%|█▏        | 29/251 [6:57:05<51:40:36, 838.00s/it]

Processed batch 28, updated graph stats: ({'Number of Nodes': 436, 'Number of Edges': 1139, 'Average Degree': 5.224770642201835, 'Density': 0.012010966993567436, 'Connected Components': 1, 'Number of Communities': 16}, False)


Processing batches...:  12%|█▏        | 30/251 [7:13:40<54:19:40, 884.98s/it]

Processed batch 29, updated graph stats: ({'Number of Nodes': 448, 'Number of Edges': 1167, 'Average Degree': 5.209821428571429, 'Density': 0.011655081495685523, 'Connected Components': 1, 'Number of Communities': 19}, False)


Processing batches...:  12%|█▏        | 31/251 [7:28:13<53:52:26, 881.58s/it]

Processed batch 30, updated graph stats: ({'Number of Nodes': 466, 'Number of Edges': 1207, 'Average Degree': 5.180257510729613, 'Density': 0.011140338732751857, 'Connected Components': 1, 'Number of Communities': 13}, False)


Processing batches...:  13%|█▎        | 32/251 [7:45:03<55:57:32, 919.88s/it]

Processed batch 31, updated graph stats: ({'Number of Nodes': 486, 'Number of Edges': 1257, 'Average Degree': 5.172839506172839, 'Density': 0.010665648466335751, 'Connected Components': 1, 'Number of Communities': 17}, False)


Processing batches...:  13%|█▎        | 33/251 [7:59:40<54:56:10, 907.20s/it]

Processed batch 32, updated graph stats: ({'Number of Nodes': 501, 'Number of Edges': 1304, 'Average Degree': 5.205588822355289, 'Density': 0.010411177644710579, 'Connected Components': 1, 'Number of Communities': 19}, False)


Processing batches...:  14%|█▎        | 34/251 [8:14:50<54:43:49, 907.97s/it]

Processed batch 33, updated graph stats: ({'Number of Nodes': 510, 'Number of Edges': 1330, 'Average Degree': 5.215686274509804, 'Density': 0.01024692784775993, 'Connected Components': 1, 'Number of Communities': 19}, False)


Processing batches...:  14%|█▍        | 35/251 [8:29:11<53:38:26, 894.01s/it]

Processed batch 34, updated graph stats: ({'Number of Nodes': 519, 'Number of Edges': 1353, 'Average Degree': 5.213872832369942, 'Density': 0.010065391568281741, 'Connected Components': 1, 'Number of Communities': 18}, False)


Processing batches...:  14%|█▍        | 36/251 [8:40:52<49:55:33, 835.97s/it]

Processed batch 35, updated graph stats: ({'Number of Nodes': 531, 'Number of Edges': 1380, 'Average Degree': 5.19774011299435, 'Density': 0.009807056816970472, 'Connected Components': 1, 'Number of Communities': 17}, False)


Processing batches...:  15%|█▍        | 37/251 [8:57:52<52:58:57, 891.30s/it]

Processed batch 36, updated graph stats: ({'Number of Nodes': 541, 'Number of Edges': 1405, 'Average Degree': 5.194085027726432, 'Density': 0.009618675977271172, 'Connected Components': 1, 'Number of Communities': 18}, False)


Processing batches...:  15%|█▌        | 38/251 [9:13:00<53:01:36, 896.23s/it]

Processed batch 37, updated graph stats: ({'Number of Nodes': 547, 'Number of Edges': 1427, 'Average Degree': 5.217550274223035, 'Density': 0.009555952883192371, 'Connected Components': 1, 'Number of Communities': 18}, False)


Processing batches...:  16%|█▌        | 39/251 [9:25:29<50:10:15, 851.96s/it]

Processed batch 38, updated graph stats: ({'Number of Nodes': 559, 'Number of Edges': 1463, 'Average Degree': 5.2343470483005365, 'Density': 0.009380550265771571, 'Connected Components': 1, 'Number of Communities': 21}, False)


Processing batches...:  16%|█▌        | 40/251 [9:40:17<50:34:26, 862.88s/it]

Processed batch 39, updated graph stats: ({'Number of Nodes': 587, 'Number of Edges': 1528, 'Average Degree': 5.206132879045996, 'Density': 0.008884185800419789, 'Connected Components': 1, 'Number of Communities': 15}, False)


Processing batches...:  16%|█▋        | 41/251 [9:57:07<52:54:38, 907.04s/it]

Processed batch 40, updated graph stats: ({'Number of Nodes': 608, 'Number of Edges': 1575, 'Average Degree': 5.180921052631579, 'Density': 0.008535290037284314, 'Connected Components': 1, 'Number of Communities': 17}, False)


Processing batches...:  17%|█▋        | 42/251 [10:13:07<53:34:16, 922.76s/it]

Processed batch 41, updated graph stats: ({'Number of Nodes': 636, 'Number of Edges': 1645, 'Average Degree': 5.172955974842767, 'Density': 0.008146387361957113, 'Connected Components': 1, 'Number of Communities': 18}, False)


Processing batches...:  17%|█▋        | 43/251 [10:27:15<52:01:16, 900.37s/it]

Processed batch 42, updated graph stats: ({'Number of Nodes': 664, 'Number of Edges': 1726, 'Average Degree': 5.198795180722891, 'Density': 0.007841320031256246, 'Connected Components': 1, 'Number of Communities': 21}, False)


Processing batches...:  18%|█▊        | 44/251 [10:42:55<52:27:11, 912.23s/it]

Processed batch 43, updated graph stats: ({'Number of Nodes': 674, 'Number of Edges': 1760, 'Average Degree': 5.222551928783383, 'Density': 0.007760106877835636, 'Connected Components': 1, 'Number of Communities': 18}, False)


Processing batches...:  18%|█▊        | 45/251 [10:59:02<53:09:12, 928.89s/it]

Processed batch 44, updated graph stats: ({'Number of Nodes': 702, 'Number of Edges': 1829, 'Average Degree': 5.210826210826211, 'Density': 0.0074334182750730535, 'Connected Components': 1, 'Number of Communities': 21}, False)


Processing batches...:  18%|█▊        | 46/251 [11:12:48<51:07:48, 897.89s/it]

Processed batch 45, updated graph stats: ({'Number of Nodes': 721, 'Number of Edges': 1882, 'Average Degree': 5.220527045769765, 'Density': 0.007250732008013561, 'Connected Components': 1, 'Number of Communities': 18}, False)


Processing batches...:  19%|█▊        | 47/251 [11:26:18<49:22:58, 871.47s/it]

Processed batch 46, updated graph stats: ({'Number of Nodes': 744, 'Number of Edges': 1942, 'Average Degree': 5.220430107526882, 'Density': 0.007026150884962156, 'Connected Components': 1, 'Number of Communities': 24}, False)


Processing batches...:  19%|█▉        | 48/251 [11:41:45<50:05:11, 888.23s/it]

Processed batch 47, updated graph stats: ({'Number of Nodes': 762, 'Number of Edges': 1992, 'Average Degree': 5.228346456692913, 'Density': 0.006870363280805405, 'Connected Components': 1, 'Number of Communities': 19}, False)


Processing batches...:  20%|█▉        | 49/251 [11:55:15<48:30:45, 864.58s/it]

Processed batch 48, updated graph stats: ({'Number of Nodes': 794, 'Number of Edges': 2056, 'Average Degree': 5.178841309823677, 'Density': 0.00653069522045861, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  20%|█▉        | 50/251 [12:09:24<48:01:03, 860.02s/it]

Processed batch 49, updated graph stats: ({'Number of Nodes': 804, 'Number of Edges': 2080, 'Average Degree': 5.174129353233831, 'Density': 0.006443498571897672, 'Connected Components': 1, 'Number of Communities': 26}, False)


Processing batches...:  20%|██        | 51/251 [12:21:42<45:44:35, 823.38s/it]

Processed batch 50, updated graph stats: ({'Number of Nodes': 848, 'Number of Edges': 2176, 'Average Degree': 5.132075471698113, 'Density': 0.006059120981934018, 'Connected Components': 1, 'Number of Communities': 26}, False)


Processing batches...:  21%|██        | 52/251 [12:38:10<48:15:16, 872.95s/it]

Processed batch 51, updated graph stats: ({'Number of Nodes': 852, 'Number of Edges': 2187, 'Average Degree': 5.133802816901408, 'Density': 0.006032670760166167, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  21%|██        | 53/251 [12:54:58<50:14:08, 913.38s/it]

Processed batch 52, updated graph stats: ({'Number of Nodes': 865, 'Number of Edges': 2228, 'Average Degree': 5.151445086705202, 'Density': 0.005962320702205095, 'Connected Components': 1, 'Number of Communities': 28}, False)


Processing batches...:  22%|██▏       | 54/251 [13:09:50<49:38:07, 907.04s/it]

Processed batch 53, updated graph stats: ({'Number of Nodes': 878, 'Number of Edges': 2265, 'Average Degree': 5.159453302961276, 'Density': 0.005883071041004875, 'Connected Components': 1, 'Number of Communities': 24}, False)


Processing batches...:  22%|██▏       | 55/251 [13:17:18<41:52:52, 769.25s/it]

Processed batch 54, updated graph stats: ({'Number of Nodes': 897, 'Number of Edges': 2307, 'Average Degree': 5.1438127090301, 'Density': 0.005740862398471094, 'Connected Components': 1, 'Number of Communities': 19}, False)


Processing batches...:  22%|██▏       | 56/251 [13:27:27<39:03:11, 720.98s/it]

Processed batch 55, updated graph stats: ({'Number of Nodes': 906, 'Number of Edges': 2337, 'Average Degree': 5.158940397350993, 'Density': 0.005700486626907175, 'Connected Components': 1, 'Number of Communities': 23}, False)


Processing batches...:  23%|██▎       | 57/251 [13:40:37<39:58:19, 741.75s/it]

Processed batch 56, updated graph stats: ({'Number of Nodes': 917, 'Number of Edges': 2359, 'Average Degree': 5.145038167938932, 'Density': 0.005616853895129838, 'Connected Components': 1, 'Number of Communities': 29}, False)


Processing batches...:  23%|██▎       | 58/251 [13:55:41<42:22:21, 790.37s/it]

Processed batch 57, updated graph stats: ({'Number of Nodes': 947, 'Number of Edges': 2428, 'Average Degree': 5.127771911298838, 'Density': 0.005420477707504058, 'Connected Components': 1, 'Number of Communities': 21}, False)


Processing batches...:  24%|██▎       | 59/251 [14:09:00<42:18:06, 793.16s/it]

Processed batch 58, updated graph stats: ({'Number of Nodes': 963, 'Number of Edges': 2460, 'Average Degree': 5.109034267912772, 'Density': 0.00531084643234176, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  24%|██▍       | 60/251 [14:20:30<40:26:15, 762.18s/it]

Processed batch 59, updated graph stats: ({'Number of Nodes': 975, 'Number of Edges': 2494, 'Average Degree': 5.115897435897436, 'Density': 0.005252461433159585, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  24%|██▍       | 61/251 [14:31:49<38:54:26, 737.19s/it]

Processed batch 60, updated graph stats: ({'Number of Nodes': 984, 'Number of Edges': 2515, 'Average Degree': 5.111788617886178, 'Density': 0.005200191879843519, 'Connected Components': 1, 'Number of Communities': 31}, False)


Processing batches...:  25%|██▍       | 62/251 [14:45:53<40:22:59, 769.21s/it]

Processed batch 61, updated graph stats: ({'Number of Nodes': 1000, 'Number of Edges': 2555, 'Average Degree': 5.11, 'Density': 0.005115115115115115, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  25%|██▌       | 63/251 [14:59:17<40:42:33, 779.54s/it]

Processed batch 62, updated graph stats: ({'Number of Nodes': 1014, 'Number of Edges': 2586, 'Average Degree': 5.100591715976331, 'Density': 0.005035134961477129, 'Connected Components': 1, 'Number of Communities': 32}, False)


Processing batches...:  25%|██▌       | 64/251 [15:10:10<38:31:44, 741.74s/it]

Processed batch 63, updated graph stats: ({'Number of Nodes': 1029, 'Number of Edges': 2624, 'Average Degree': 5.100097181729835, 'Density': 0.0049611840289200725, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  26%|██▌       | 65/251 [15:20:41<36:36:06, 708.42s/it]

Processed batch 64, updated graph stats: ({'Number of Nodes': 1036, 'Number of Edges': 2639, 'Average Degree': 5.094594594594595, 'Density': 0.004922313617965792, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  26%|██▋       | 66/251 [15:32:22<36:17:59, 706.38s/it]

Processed batch 65, updated graph stats: ({'Number of Nodes': 1061, 'Number of Edges': 2701, 'Average Degree': 5.091423185673893, 'Density': 0.004803229420447069, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  27%|██▋       | 67/251 [15:44:56<36:50:00, 720.65s/it]

Processed batch 66, updated graph stats: ({'Number of Nodes': 1077, 'Number of Edges': 2735, 'Average Degree': 5.078922934076138, 'Density': 0.004720188600442507, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  27%|██▋       | 68/251 [15:57:37<37:15:01, 732.79s/it]

Processed batch 67, updated graph stats: ({'Number of Nodes': 1090, 'Number of Edges': 2765, 'Average Degree': 5.073394495412844, 'Density': 0.004658764458597653, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  27%|██▋       | 69/251 [16:09:33<36:46:42, 727.48s/it]

Processed batch 68, updated graph stats: ({'Number of Nodes': 1098, 'Number of Edges': 2784, 'Average Degree': 5.0710382513661205, 'Density': 0.004622641979367475, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  28%|██▊       | 70/251 [16:22:01<36:53:49, 733.86s/it]

Processed batch 69, updated graph stats: ({'Number of Nodes': 1108, 'Number of Edges': 2807, 'Average Degree': 5.066787003610108, 'Density': 0.004577043363694768, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  28%|██▊       | 71/251 [16:34:40<37:03:49, 741.28s/it]

Processed batch 70, updated graph stats: ({'Number of Nodes': 1114, 'Number of Edges': 2819, 'Average Degree': 5.061041292639138, 'Density': 0.004547206911625461, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  29%|██▊       | 72/251 [16:48:07<37:50:38, 761.11s/it]

Processed batch 71, updated graph stats: ({'Number of Nodes': 1131, 'Number of Edges': 2872, 'Average Degree': 5.07869142351901, 'Density': 0.004494417188954876, 'Connected Components': 1, 'Number of Communities': 35}, False)


Processing batches...:  29%|██▉       | 73/251 [16:58:49<35:51:52, 725.35s/it]

Processed batch 72, updated graph stats: ({'Number of Nodes': 1152, 'Number of Edges': 2923, 'Average Degree': 5.074652777777778, 'Density': 0.00440890771309972, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  29%|██▉       | 74/251 [17:11:25<36:06:27, 734.39s/it]

Processed batch 73, updated graph stats: ({'Number of Nodes': 1161, 'Number of Edges': 2946, 'Average Degree': 5.074935400516796, 'Density': 0.004374944310790341, 'Connected Components': 1, 'Number of Communities': 32}, False)


Processing batches...:  30%|██▉       | 75/251 [17:25:48<37:47:35, 773.04s/it]

Processed batch 74, updated graph stats: ({'Number of Nodes': 1176, 'Number of Edges': 2986, 'Average Degree': 5.078231292517007, 'Density': 0.004321898972354899, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  30%|███       | 76/251 [17:37:53<36:52:34, 758.60s/it]

Processed batch 75, updated graph stats: ({'Number of Nodes': 1179, 'Number of Edges': 3000, 'Average Degree': 5.089058524173028, 'Density': 0.004320083636819209, 'Connected Components': 1, 'Number of Communities': 25}, False)


Processing batches...:  31%|███       | 77/251 [17:52:21<38:15:01, 791.39s/it]

Processed batch 76, updated graph stats: ({'Number of Nodes': 1186, 'Number of Edges': 3024, 'Average Degree': 5.099494097807757, 'Density': 0.004303370546673213, 'Connected Components': 1, 'Number of Communities': 29}, False)


Processing batches...:  31%|███       | 78/251 [18:05:56<38:22:21, 798.51s/it]

Processed batch 77, updated graph stats: ({'Number of Nodes': 1192, 'Number of Edges': 3043, 'Average Degree': 5.105704697986577, 'Density': 0.0042869057077972944, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  31%|███▏      | 79/251 [18:19:02<37:58:03, 794.67s/it]

Processed batch 78, updated graph stats: ({'Number of Nodes': 1210, 'Number of Edges': 3105, 'Average Degree': 5.132231404958677, 'Density': 0.004245021840329758, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  32%|███▏      | 80/251 [18:31:25<37:00:53, 779.26s/it]

Processed batch 79, updated graph stats: ({'Number of Nodes': 1221, 'Number of Edges': 3140, 'Average Degree': 5.143325143325144, 'Density': 0.004215840281414052, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  32%|███▏      | 81/251 [18:42:15<34:58:27, 740.63s/it]

Processed batch 80, updated graph stats: ({'Number of Nodes': 1235, 'Number of Edges': 3183, 'Average Degree': 5.154655870445344, 'Density': 0.004177192763732045, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  33%|███▎      | 82/251 [18:55:48<35:46:39, 762.13s/it]

Processed batch 81, updated graph stats: ({'Number of Nodes': 1255, 'Number of Edges': 3229, 'Average Degree': 5.145816733067729, 'Density': 0.004103522115683995, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  33%|███▎      | 83/251 [19:07:55<35:04:55, 751.76s/it]

Processed batch 82, updated graph stats: ({'Number of Nodes': 1279, 'Number of Edges': 3280, 'Average Degree': 5.129007036747459, 'Density': 0.0040133075404909695, 'Connected Components': 1, 'Number of Communities': 32}, False)


Processing batches...:  33%|███▎      | 84/251 [19:24:27<38:12:54, 823.80s/it]

Processed batch 83, updated graph stats: ({'Number of Nodes': 1289, 'Number of Edges': 3313, 'Average Degree': 5.140418929402638, 'Density': 0.003991008485561054, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  34%|███▍      | 85/251 [19:38:51<38:32:07, 835.71s/it]

Processed batch 84, updated graph stats: ({'Number of Nodes': 1301, 'Number of Edges': 3347, 'Average Degree': 5.145272867025365, 'Density': 0.003957902205404127, 'Connected Components': 1, 'Number of Communities': 35}, False)


Processing batches...:  34%|███▍      | 86/251 [19:52:06<37:44:54, 823.60s/it]

Processed batch 85, updated graph stats: ({'Number of Nodes': 1314, 'Number of Edges': 3376, 'Average Degree': 5.138508371385083, 'Density': 0.003913563116058708, 'Connected Components': 1, 'Number of Communities': 29}, False)


Processing batches...:  35%|███▍      | 87/251 [20:06:19<37:55:05, 832.35s/it]

Processed batch 86, updated graph stats: ({'Number of Nodes': 1323, 'Number of Edges': 3407, 'Average Degree': 5.150415721844293, 'Density': 0.0038959271723481795, 'Connected Components': 1, 'Number of Communities': 25}, False)


Processing batches...:  35%|███▌      | 88/251 [20:16:52<34:59:17, 772.74s/it]

Processed batch 87, updated graph stats: ({'Number of Nodes': 1333, 'Number of Edges': 3430, 'Average Degree': 5.146286571642911, 'Density': 0.0038635785072394227, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  35%|███▌      | 89/251 [20:30:22<35:16:32, 783.90s/it]

Processed batch 88, updated graph stats: ({'Number of Nodes': 1349, 'Number of Edges': 3478, 'Average Degree': 5.156412157153447, 'Density': 0.0038252315705886103, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  36%|███▌      | 90/251 [20:42:34<34:21:46, 768.37s/it]

Processed batch 89, updated graph stats: ({'Number of Nodes': 1358, 'Number of Edges': 3509, 'Average Degree': 5.167893961708395, 'Density': 0.0038083227426001435, 'Connected Components': 1, 'Number of Communities': 28}, False)


Processing batches...:  36%|███▋      | 91/251 [20:58:20<36:31:04, 821.65s/it]

Processed batch 90, updated graph stats: ({'Number of Nodes': 1376, 'Number of Edges': 3559, 'Average Degree': 5.1729651162790695, 'Density': 0.00376215644820296, 'Connected Components': 1, 'Number of Communities': 32}, False)


Processing batches...:  37%|███▋      | 92/251 [21:09:36<34:21:11, 777.81s/it]

Processed batch 91, updated graph stats: ({'Number of Nodes': 1403, 'Number of Edges': 3619, 'Average Degree': 5.158945117605132, 'Density': 0.0036797040781776976, 'Connected Components': 1, 'Number of Communities': 28}, False)


Processing batches...:  37%|███▋      | 93/251 [21:21:16<33:07:02, 754.57s/it]

Processed batch 92, updated graph stats: ({'Number of Nodes': 1415, 'Number of Edges': 3666, 'Average Degree': 5.181625441696113, 'Density': 0.003664515871072216, 'Connected Components': 1, 'Number of Communities': 28}, False)


Processing batches...:  37%|███▋      | 94/251 [21:34:03<33:03:46, 758.13s/it]

Processed batch 93, updated graph stats: ({'Number of Nodes': 1427, 'Number of Edges': 3707, 'Average Degree': 5.195515066573231, 'Density': 0.0036434187002617325, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  38%|███▊      | 95/251 [21:47:28<33:27:59, 772.31s/it]

Processed batch 94, updated graph stats: ({'Number of Nodes': 1447, 'Number of Edges': 3778, 'Average Degree': 5.221838286109191, 'Density': 0.0036112297967560106, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  38%|███▊      | 96/251 [21:57:08<30:46:22, 714.73s/it]

Processed batch 95, updated graph stats: ({'Number of Nodes': 1459, 'Number of Edges': 3810, 'Average Degree': 5.222755311857437, 'Density': 0.0035821367022341815, 'Connected Components': 1, 'Number of Communities': 31}, False)


Processing batches...:  39%|███▊      | 97/251 [22:08:40<30:16:52, 707.87s/it]

Processed batch 96, updated graph stats: ({'Number of Nodes': 1468, 'Number of Edges': 3837, 'Average Degree': 5.227520435967302, 'Density': 0.0035634086134746436, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  39%|███▉      | 98/251 [22:18:06<28:16:09, 665.16s/it]

Processed batch 97, updated graph stats: ({'Number of Nodes': 1483, 'Number of Edges': 3884, 'Average Degree': 5.238031018206338, 'Density': 0.0035344338854293783, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  39%|███▉      | 99/251 [22:26:39<26:09:11, 619.42s/it]

Processed batch 98, updated graph stats: ({'Number of Nodes': 1492, 'Number of Edges': 3914, 'Average Degree': 5.246648793565684, 'Density': 0.0035188791372003244, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  40%|███▉      | 100/251 [22:35:38<24:58:35, 595.47s/it]

Processed batch 99, updated graph stats: ({'Number of Nodes': 1494, 'Number of Edges': 3919, 'Average Degree': 5.246318607764391, 'Density': 0.0035139441445173414, 'Connected Components': 1, 'Number of Communities': 38}, False)


Processing batches...:  40%|████      | 101/251 [22:45:04<24:26:42, 586.68s/it]

Processed batch 100, updated graph stats: ({'Number of Nodes': 1502, 'Number of Edges': 3941, 'Average Degree': 5.247669773635153, 'Density': 0.0034961157719088296, 'Connected Components': 1, 'Number of Communities': 32}, False)


Processing batches...:  41%|████      | 102/251 [22:59:14<27:32:57, 665.62s/it]

Processed batch 101, updated graph stats: ({'Number of Nodes': 1506, 'Number of Edges': 3951, 'Average Degree': 5.247011952191235, 'Density': 0.003486386679196834, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  41%|████      | 103/251 [23:12:06<28:40:48, 697.62s/it]

Processed batch 102, updated graph stats: ({'Number of Nodes': 1550, 'Number of Edges': 4048, 'Average Degree': 5.223225806451613, 'Density': 0.0033719985838938753, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  41%|████▏     | 104/251 [23:24:12<28:49:31, 705.93s/it]

Processed batch 103, updated graph stats: ({'Number of Nodes': 1555, 'Number of Edges': 4062, 'Average Degree': 5.22443729903537, 'Density': 0.0033619287638580243, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  42%|████▏     | 105/251 [23:36:17<28:52:09, 711.84s/it]

Processed batch 104, updated graph stats: ({'Number of Nodes': 1561, 'Number of Edges': 4078, 'Average Degree': 5.224855861627162, 'Density': 0.0033492665779661294, 'Connected Components': 1, 'Number of Communities': 29}, False)


Processing batches...:  42%|████▏     | 106/251 [23:48:20<28:47:59, 715.03s/it]

Processed batch 105, updated graph stats: ({'Number of Nodes': 1566, 'Number of Edges': 4088, 'Average Degree': 5.2209450830140485, 'Density': 0.0033360671456958775, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  43%|████▎     | 107/251 [23:58:45<27:31:19, 688.05s/it]

Processed batch 106, updated graph stats: ({'Number of Nodes': 1576, 'Number of Edges': 4114, 'Average Degree': 5.220812182741117, 'Density': 0.003314801385867376, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  43%|████▎     | 108/251 [24:10:44<27:42:17, 697.46s/it]

Processed batch 107, updated graph stats: ({'Number of Nodes': 1581, 'Number of Edges': 4129, 'Average Degree': 5.223276407337129, 'Density': 0.0033058711438842584, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  43%|████▎     | 109/251 [24:23:01<27:58:27, 709.21s/it]

Processed batch 108, updated graph stats: ({'Number of Nodes': 1587, 'Number of Edges': 4144, 'Average Degree': 5.2224322621298045, 'Density': 0.003292832447748931, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  44%|████▍     | 110/251 [24:34:24<27:28:28, 701.48s/it]

Processed batch 109, updated graph stats: ({'Number of Nodes': 1600, 'Number of Edges': 4176, 'Average Degree': 5.22, 'Density': 0.0032645403377110694, 'Connected Components': 1, 'Number of Communities': 38}, False)


Processing batches...:  44%|████▍     | 111/251 [24:45:25<26:48:10, 689.22s/it]

Processed batch 110, updated graph stats: ({'Number of Nodes': 1616, 'Number of Edges': 4213, 'Average Degree': 5.214108910891089, 'Density': 0.003228550409220489, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  45%|████▍     | 112/251 [24:58:40<27:50:23, 721.04s/it]

Processed batch 111, updated graph stats: ({'Number of Nodes': 1625, 'Number of Edges': 4237, 'Average Degree': 5.214769230769231, 'Density': 0.003211064797271694, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  45%|████▌     | 113/251 [25:15:21<30:51:17, 804.91s/it]

Processed batch 112, updated graph stats: ({'Number of Nodes': 1633, 'Number of Edges': 4255, 'Average Degree': 5.211267605633803, 'Density': 0.0031931786799226734, 'Connected Components': 1, 'Number of Communities': 38}, False)


Processing batches...:  45%|████▌     | 114/251 [25:30:52<32:04:20, 842.78s/it]

Processed batch 113, updated graph stats: ({'Number of Nodes': 1640, 'Number of Edges': 4277, 'Average Degree': 5.215853658536585, 'Density': 0.0031823390229021266, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  46%|████▌     | 115/251 [25:45:45<32:24:26, 857.84s/it]

Processed batch 114, updated graph stats: ({'Number of Nodes': 1647, 'Number of Edges': 4303, 'Average Degree': 5.225258044930176, 'Density': 0.0031745188608324278, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  46%|████▌     | 116/251 [26:00:07<32:13:07, 859.16s/it]

Processed batch 115, updated graph stats: ({'Number of Nodes': 1657, 'Number of Edges': 4326, 'Average Degree': 5.221484610742305, 'Density': 0.0031530704171149186, 'Connected Components': 1, 'Number of Communities': 40}, False)


Processing batches...:  47%|████▋     | 117/251 [26:12:22<30:35:39, 821.94s/it]

Processed batch 116, updated graph stats: ({'Number of Nodes': 1673, 'Number of Edges': 4369, 'Average Degree': 5.222952779438135, 'Density': 0.0031237755857883584, 'Connected Components': 1, 'Number of Communities': 40}, False)


Processing batches...:  47%|████▋     | 118/251 [26:28:28<31:57:45, 865.16s/it]

Processed batch 117, updated graph stats: ({'Number of Nodes': 1677, 'Number of Edges': 4379, 'Average Degree': 5.22242098986285, 'Density': 0.0031160029772451373, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  47%|████▋     | 119/251 [26:37:06<27:53:49, 760.83s/it]

Processed batch 118, updated graph stats: ({'Number of Nodes': 1682, 'Number of Edges': 4393, 'Average Degree': 5.223543400713436, 'Density': 0.0031074023799604025, 'Connected Components': 1, 'Number of Communities': 31}, False)


Processing batches...:  48%|████▊     | 120/251 [26:49:53<27:45:03, 762.62s/it]

Processed batch 119, updated graph stats: ({'Number of Nodes': 1695, 'Number of Edges': 4430, 'Average Degree': 5.227138643067847, 'Density': 0.0030856780655654347, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  48%|████▊     | 121/251 [27:01:54<27:05:23, 750.18s/it]

Processed batch 120, updated graph stats: ({'Number of Nodes': 1695, 'Number of Edges': 4428, 'Average Degree': 5.2247787610619465, 'Density': 0.003084284982917324, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  49%|████▊     | 122/251 [27:15:34<27:37:49, 771.08s/it]

Processed batch 121, updated graph stats: ({'Number of Nodes': 1703, 'Number of Edges': 4447, 'Average Degree': 5.22254844392249, 'Density': 0.0030684773466054583, 'Connected Components': 1, 'Number of Communities': 38}, False)


Processing batches...:  49%|████▉     | 123/251 [27:29:45<28:16:37, 795.29s/it]

Processed batch 122, updated graph stats: ({'Number of Nodes': 1707, 'Number of Edges': 4455, 'Average Degree': 5.219683655536028, 'Density': 0.003059603549552185, 'Connected Components': 1, 'Number of Communities': 40}, False)


Processing batches...:  49%|████▉     | 124/251 [27:43:42<28:29:20, 807.56s/it]

Processed batch 123, updated graph stats: ({'Number of Nodes': 1712, 'Number of Edges': 4480, 'Average Degree': 5.233644859813084, 'Density': 0.0030588222441923344, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  50%|████▉     | 125/251 [27:56:01<27:32:54, 787.10s/it]

Processed batch 124, updated graph stats: ({'Number of Nodes': 1717, 'Number of Edges': 4494, 'Average Degree': 5.234711706464764, 'Density': 0.0030505312974736386, 'Connected Components': 1, 'Number of Communities': 35}, False)


Processing batches...:  50%|█████     | 126/251 [28:10:24<28:07:29, 810.00s/it]

Processed batch 125, updated graph stats: ({'Number of Nodes': 1720, 'Number of Edges': 4501, 'Average Degree': 5.2337209302325585, 'Density': 0.0030446311403330762, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  51%|█████     | 127/251 [28:23:05<27:23:30, 795.24s/it]

Processed batch 126, updated graph stats: ({'Number of Nodes': 1729, 'Number of Edges': 4520, 'Average Degree': 5.228455754771544, 'Density': 0.0030257267099372365, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  51%|█████     | 128/251 [28:35:20<26:33:06, 777.12s/it]

Processed batch 127, updated graph stats: ({'Number of Nodes': 1731, 'Number of Edges': 4526, 'Average Degree': 5.229347198151357, 'Density': 0.003022744045174195, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  51%|█████▏    | 129/251 [28:47:43<25:59:30, 766.97s/it]

Processed batch 128, updated graph stats: ({'Number of Nodes': 1737, 'Number of Edges': 4539, 'Average Degree': 5.226252158894646, 'Density': 0.0030105139164139667, 'Connected Components': 1, 'Number of Communities': 40}, False)


Processing batches...:  52%|█████▏    | 130/251 [29:00:28<25:45:34, 766.40s/it]

Processed batch 129, updated graph stats: ({'Number of Nodes': 1744, 'Number of Edges': 4562, 'Average Degree': 5.231651376146789, 'Density': 0.003001521156710722, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  52%|█████▏    | 131/251 [29:12:28<25:04:45, 752.38s/it]

Processed batch 130, updated graph stats: ({'Number of Nodes': 1751, 'Number of Edges': 4582, 'Average Degree': 5.2335808109651625, 'Density': 0.002990617606265807, 'Connected Components': 1, 'Number of Communities': 32}, False)


Processing batches...:  53%|█████▎    | 132/251 [29:25:46<25:19:08, 765.95s/it]

Processed batch 131, updated graph stats: ({'Number of Nodes': 1759, 'Number of Edges': 4601, 'Average Degree': 5.231381466742468, 'Density': 0.0029757573758489573, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  53%|█████▎    | 133/251 [29:38:04<24:50:18, 757.78s/it]

Processed batch 132, updated graph stats: ({'Number of Nodes': 1769, 'Number of Edges': 4633, 'Average Degree': 5.237987563595252, 'Density': 0.0029626626490923368, 'Connected Components': 1, 'Number of Communities': 31}, False)


Processing batches...:  53%|█████▎    | 134/251 [29:49:12<23:45:08, 730.84s/it]

Processed batch 133, updated graph stats: ({'Number of Nodes': 1779, 'Number of Edges': 4657, 'Average Degree': 5.235525576166386, 'Density': 0.0029446150597111282, 'Connected Components': 1, 'Number of Communities': 38}, False)


Processing batches...:  54%|█████▍    | 135/251 [30:02:17<24:03:58, 746.89s/it]

Processed batch 134, updated graph stats: ({'Number of Nodes': 1783, 'Number of Edges': 4674, 'Average Degree': 5.242849130678631, 'Density': 0.002942115112614271, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  54%|█████▍    | 136/251 [30:16:31<24:53:31, 779.23s/it]

Processed batch 135, updated graph stats: ({'Number of Nodes': 1795, 'Number of Edges': 4700, 'Average Degree': 5.236768802228412, 'Density': 0.002919046155088301, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  55%|█████▍    | 137/251 [30:29:15<24:31:26, 774.44s/it]

Processed batch 136, updated graph stats: ({'Number of Nodes': 1802, 'Number of Edges': 4719, 'Average Degree': 5.237513873473918, 'Density': 0.002908114310646262, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  55%|█████▍    | 138/251 [30:41:34<23:58:51, 764.00s/it]

Processed batch 137, updated graph stats: ({'Number of Nodes': 1815, 'Number of Edges': 4752, 'Average Degree': 5.236363636363636, 'Density': 0.00288663927032174, 'Connected Components': 1, 'Number of Communities': 25}, False)


Processing batches...:  55%|█████▌    | 139/251 [30:52:17<22:38:21, 727.69s/it]

Processed batch 138, updated graph stats: ({'Number of Nodes': 1817, 'Number of Edges': 4759, 'Average Degree': 5.238304898183819, 'Density': 0.0028845291289558476, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  56%|█████▌    | 140/251 [31:04:08<22:16:34, 722.48s/it]

Processed batch 139, updated graph stats: ({'Number of Nodes': 1822, 'Number of Edges': 4774, 'Average Degree': 5.2403951701427, 'Density': 0.0028777568205067, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  56%|█████▌    | 141/251 [31:14:11<20:59:11, 686.83s/it]

Processed batch 140, updated graph stats: ({'Number of Nodes': 1831, 'Number of Edges': 4797, 'Average Degree': 5.239759694156199, 'Density': 0.0028632566634733325, 'Connected Components': 1, 'Number of Communities': 42}, False)


Processing batches...:  57%|█████▋    | 142/251 [31:25:00<20:27:07, 675.49s/it]

Processed batch 141, updated graph stats: ({'Number of Nodes': 1838, 'Number of Edges': 4815, 'Average Degree': 5.239390642002176, 'Density': 0.002852145150790515, 'Connected Components': 1, 'Number of Communities': 40}, False)


Processing batches...:  57%|█████▋    | 143/251 [31:35:20<19:45:59, 658.88s/it]

Processed batch 142, updated graph stats: ({'Number of Nodes': 1839, 'Number of Edges': 4817, 'Average Degree': 5.238716693855356, 'Density': 0.0028502267104762546, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  57%|█████▋    | 144/251 [31:45:00<18:52:28, 635.03s/it]

Processed batch 143, updated graph stats: ({'Number of Nodes': 1845, 'Number of Edges': 4829, 'Average Degree': 5.234688346883469, 'Density': 0.0028387680839932045, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  58%|█████▊    | 145/251 [31:55:59<18:54:45, 642.32s/it]

Processed batch 144, updated graph stats: ({'Number of Nodes': 1851, 'Number of Edges': 4844, 'Average Degree': 5.233927606699082, 'Density': 0.002829150057675179, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  58%|█████▊    | 146/251 [32:08:19<19:35:14, 671.57s/it]

Processed batch 145, updated graph stats: ({'Number of Nodes': 1852, 'Number of Edges': 4847, 'Average Degree': 5.234341252699784, 'Density': 0.002827845085197074, 'Connected Components': 1, 'Number of Communities': 43}, False)


Processing batches...:  59%|█████▊    | 147/251 [32:20:28<19:53:51, 688.76s/it]

Processed batch 146, updated graph stats: ({'Number of Nodes': 1859, 'Number of Edges': 4866, 'Average Degree': 5.235072619688005, 'Density': 0.002817584832986009, 'Connected Components': 1, 'Number of Communities': 38}, False)


Processing batches...:  59%|█████▉    | 148/251 [32:32:39<20:04:04, 701.40s/it]

Processed batch 147, updated graph stats: ({'Number of Nodes': 1860, 'Number of Edges': 4869, 'Average Degree': 5.235483870967742, 'Density': 0.0028162904093425186, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  59%|█████▉    | 149/251 [32:46:08<20:47:13, 733.66s/it]

Processed batch 148, updated graph stats: ({'Number of Nodes': 1866, 'Number of Edges': 4887, 'Average Degree': 5.237942122186495, 'Density': 0.0028085480547916865, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  60%|█████▉    | 150/251 [32:57:12<20:00:17, 713.04s/it]

Processed batch 149, updated graph stats: ({'Number of Nodes': 1868, 'Number of Edges': 4894, 'Average Degree': 5.23982869379015, 'Density': 0.002806549916331093, 'Connected Components': 1, 'Number of Communities': 43}, False)


Processing batches...:  60%|██████    | 151/251 [33:10:17<20:24:14, 734.54s/it]

Processed batch 150, updated graph stats: ({'Number of Nodes': 1877, 'Number of Edges': 4914, 'Average Degree': 5.236014917421417, 'Density': 0.002791052727836576, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  61%|██████    | 152/251 [33:23:47<20:49:10, 757.08s/it]

Processed batch 151, updated graph stats: ({'Number of Nodes': 1896, 'Number of Edges': 4955, 'Average Degree': 5.226793248945148, 'Density': 0.002758202242187413, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  61%|██████    | 153/251 [33:38:11<21:28:57, 789.16s/it]

Processed batch 152, updated graph stats: ({'Number of Nodes': 1936, 'Number of Edges': 5053, 'Average Degree': 5.220041322314049, 'Density': 0.0026976957738057104, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  61%|██████▏   | 154/251 [33:54:57<23:00:48, 854.10s/it]

Processed batch 153, updated graph stats: ({'Number of Nodes': 1953, 'Number of Edges': 5095, 'Average Degree': 5.217613927291347, 'Density': 0.0026729579545549933, 'Connected Components': 1, 'Number of Communities': 44}, False)


Processing batches...:  62%|██████▏   | 155/251 [34:01:55<19:17:18, 723.32s/it]

Processed batch 154, updated graph stats: ({'Number of Nodes': 1955, 'Number of Edges': 5102, 'Average Degree': 5.219437340153453, 'Density': 0.002671155240610774, 'Connected Components': 1, 'Number of Communities': 44}, False)


Processing batches...:  62%|██████▏   | 156/251 [34:15:52<19:59:26, 757.54s/it]

Processed batch 155, updated graph stats: ({'Number of Nodes': 1970, 'Number of Edges': 5137, 'Average Degree': 5.215228426395939, 'Density': 0.002648668576127953, 'Connected Components': 1, 'Number of Communities': 43}, False)


Processing batches...:  63%|██████▎   | 157/251 [34:28:35<19:49:10, 759.05s/it]

Processed batch 156, updated graph stats: ({'Number of Nodes': 1971, 'Number of Edges': 5139, 'Average Degree': 5.2146118721461185, 'Density': 0.002647011102612243, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  63%|██████▎   | 158/251 [34:41:36<19:46:42, 765.62s/it]

Processed batch 157, updated graph stats: ({'Number of Nodes': 1975, 'Number of Edges': 5151, 'Average Degree': 5.216202531645569, 'Density': 0.0026424531568619907, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  63%|██████▎   | 159/251 [34:54:44<19:44:30, 772.50s/it]

Processed batch 158, updated graph stats: ({'Number of Nodes': 1981, 'Number of Edges': 5163, 'Average Degree': 5.212518929833418, 'Density': 0.0026325853180976856, 'Connected Components': 1, 'Number of Communities': 40}, False)


Processing batches...:  64%|██████▎   | 160/251 [35:07:15<19:21:48, 766.03s/it]

Processed batch 159, updated graph stats: ({'Number of Nodes': 1988, 'Number of Edges': 5184, 'Average Degree': 5.2152917505030185, 'Density': 0.002624706467288887, 'Connected Components': 1, 'Number of Communities': 38}, False)


Processing batches...:  64%|██████▍   | 161/251 [35:18:57<18:40:00, 746.68s/it]

Processed batch 160, updated graph stats: ({'Number of Nodes': 2004, 'Number of Edges': 5225, 'Average Degree': 5.214570858283433, 'Density': 0.002603380358603811, 'Connected Components': 1, 'Number of Communities': 45}, False)


Processing batches...:  65%|██████▍   | 162/251 [35:33:59<19:36:55, 793.43s/it]

Processed batch 161, updated graph stats: ({'Number of Nodes': 2008, 'Number of Edges': 5234, 'Average Degree': 5.213147410358566, 'Density': 0.002597482516371981, 'Connected Components': 1, 'Number of Communities': 42}, False)


Processing batches...:  65%|██████▍   | 163/251 [35:45:27<18:37:26, 761.89s/it]

Processed batch 162, updated graph stats: ({'Number of Nodes': 2009, 'Number of Edges': 5239, 'Average Degree': 5.215530114484818, 'Density': 0.00259737555502232, 'Connected Components': 1, 'Number of Communities': 44}, False)


Processing batches...:  65%|██████▌   | 164/251 [36:05:47<21:43:52, 899.22s/it]

Processed batch 163, updated graph stats: ({'Number of Nodes': 2023, 'Number of Edges': 5281, 'Average Degree': 5.2209589718240235, 'Density': 0.0025820766428407632, 'Connected Components': 1, 'Number of Communities': 42}, False)


Processing batches...:  66%|██████▌   | 165/251 [36:18:56<20:41:28, 866.15s/it]

Processed batch 164, updated graph stats: ({'Number of Nodes': 2026, 'Number of Edges': 5292, 'Average Degree': 5.224086870681145, 'Density': 0.002579795985521553, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  66%|██████▌   | 166/251 [36:31:29<19:38:58, 832.22s/it]

Processed batch 165, updated graph stats: ({'Number of Nodes': 2038, 'Number of Edges': 5316, 'Average Degree': 5.216879293424927, 'Density': 0.002561060036045619, 'Connected Components': 1, 'Number of Communities': 40}, False)


Processing batches...:  67%|██████▋   | 167/251 [36:43:25<18:36:03, 797.19s/it]

Processed batch 166, updated graph stats: ({'Number of Nodes': 2046, 'Number of Edges': 5334, 'Average Degree': 5.214076246334311, 'Density': 0.0025496705361047977, 'Connected Components': 1, 'Number of Communities': 45}, False)


Processing batches...:  67%|██████▋   | 168/251 [36:58:33<19:09:06, 830.69s/it]

Processed batch 167, updated graph stats: ({'Number of Nodes': 2056, 'Number of Edges': 5357, 'Average Degree': 5.211089494163424, 'Density': 0.002535809972828917, 'Connected Components': 1, 'Number of Communities': 31}, False)


Processing batches...:  67%|██████▋   | 169/251 [37:11:25<18:31:13, 813.09s/it]

Processed batch 168, updated graph stats: ({'Number of Nodes': 2064, 'Number of Edges': 5381, 'Average Degree': 5.214147286821706, 'Density': 0.0025274586945330615, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  68%|██████▊   | 170/251 [37:27:00<19:06:47, 849.48s/it]

Processed batch 169, updated graph stats: ({'Number of Nodes': 2075, 'Number of Edges': 5415, 'Average Degree': 5.219277108433735, 'Density': 0.0025165270532467384, 'Connected Components': 1, 'Number of Communities': 32}, False)


Processing batches...:  68%|██████▊   | 171/251 [37:39:47<18:19:53, 824.92s/it]

Processed batch 170, updated graph stats: ({'Number of Nodes': 2082, 'Number of Edges': 5438, 'Average Degree': 5.223823246878002, 'Density': 0.002510246634732341, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  69%|██████▊   | 172/251 [37:54:58<18:39:47, 850.48s/it]

Processed batch 171, updated graph stats: ({'Number of Nodes': 2092, 'Number of Edges': 5462, 'Average Degree': 5.221797323135755, 'Density': 0.0024972727513800836, 'Connected Components': 1, 'Number of Communities': 42}, False)


Processing batches...:  69%|██████▉   | 173/251 [38:04:57<16:47:52, 775.29s/it]

Processed batch 172, updated graph stats: ({'Number of Nodes': 2096, 'Number of Edges': 5476, 'Average Degree': 5.2251908396946565, 'Density': 0.0024941245058208382, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  69%|██████▉   | 174/251 [38:19:50<17:20:02, 810.42s/it]

Processed batch 173, updated graph stats: ({'Number of Nodes': 2111, 'Number of Edges': 5507, 'Average Degree': 5.217432496447182, 'Density': 0.0024727168229607497, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  70%|██████▉   | 175/251 [38:32:16<16:42:09, 791.18s/it]

Processed batch 174, updated graph stats: ({'Number of Nodes': 2122, 'Number of Edges': 5534, 'Average Degree': 5.2158341187558905, 'Density': 0.002459139141327624, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  70%|███████   | 176/251 [38:46:33<16:53:46, 811.02s/it]

Processed batch 175, updated graph stats: ({'Number of Nodes': 2140, 'Number of Edges': 5582, 'Average Degree': 5.216822429906542, 'Density': 0.0024389071668567282, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  71%|███████   | 177/251 [39:00:03<16:39:36, 810.50s/it]

Processed batch 176, updated graph stats: ({'Number of Nodes': 2150, 'Number of Edges': 5615, 'Average Degree': 5.223255813953489, 'Density': 0.0024305517980239594, 'Connected Components': 1, 'Number of Communities': 40}, False)


Processing batches...:  71%|███████   | 178/251 [39:13:36<16:27:14, 811.43s/it]

Processed batch 177, updated graph stats: ({'Number of Nodes': 2159, 'Number of Edges': 5656, 'Average Degree': 5.239462714219546, 'Density': 0.0024279252614548405, 'Connected Components': 1, 'Number of Communities': 38}, False)


Processing batches...:  71%|███████▏  | 179/251 [39:25:17<15:33:48, 778.18s/it]

Processed batch 178, updated graph stats: ({'Number of Nodes': 2161, 'Number of Edges': 5669, 'Average Degree': 5.246645071726053, 'Density': 0.0024290023480213205, 'Connected Components': 1, 'Number of Communities': 42}, False)


Processing batches...:  72%|███████▏  | 180/251 [39:36:06<14:34:57, 739.41s/it]

Processed batch 179, updated graph stats: ({'Number of Nodes': 2166, 'Number of Edges': 5684, 'Average Degree': 5.248384118190212, 'Density': 0.0024241958975474423, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  72%|███████▏  | 181/251 [39:46:45<13:47:43, 709.49s/it]

Processed batch 180, updated graph stats: ({'Number of Nodes': 2172, 'Number of Edges': 5706, 'Average Degree': 5.254143646408839, 'Density': 0.0024201490771113957, 'Connected Components': 1, 'Number of Communities': 44}, False)


Processing batches...:  73%|███████▎  | 182/251 [40:00:17<14:11:01, 740.02s/it]

Processed batch 181, updated graph stats: ({'Number of Nodes': 2184, 'Number of Edges': 5738, 'Average Degree': 5.254578754578755, 'Density': 0.002407044779926136, 'Connected Components': 1, 'Number of Communities': 44}, False)


Processing batches...:  73%|███████▎  | 183/251 [40:10:06<13:07:19, 694.70s/it]

Processed batch 182, updated graph stats: ({'Number of Nodes': 2191, 'Number of Edges': 5772, 'Average Degree': 5.268827019625742, 'Density': 0.002405857086587097, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  73%|███████▎  | 184/251 [40:25:48<14:18:37, 768.91s/it]

Processed batch 183, updated graph stats: ({'Number of Nodes': 2201, 'Number of Edges': 5801, 'Average Degree': 5.271240345297592, 'Density': 0.002396018338771633, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  74%|███████▎  | 185/251 [40:37:51<13:50:52, 755.34s/it]

Processed batch 184, updated graph stats: ({'Number of Nodes': 2207, 'Number of Edges': 5821, 'Average Degree': 5.275033982782057, 'Density': 0.002391221207063489, 'Connected Components': 1, 'Number of Communities': 29}, False)


Processing batches...:  74%|███████▍  | 186/251 [40:50:30<13:39:26, 756.41s/it]

Processed batch 185, updated graph stats: ({'Number of Nodes': 2211, 'Number of Edges': 5830, 'Average Degree': 5.27363184079602, 'Density': 0.0023862587514914117, 'Connected Components': 1, 'Number of Communities': 44}, False)


Processing batches...:  75%|███████▍  | 187/251 [41:02:28<13:14:31, 744.87s/it]

Processed batch 186, updated graph stats: ({'Number of Nodes': 2219, 'Number of Edges': 5857, 'Average Degree': 5.278954484001803, 'Density': 0.002380051615870966, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  75%|███████▍  | 188/251 [41:14:44<12:59:17, 742.18s/it]

Processed batch 187, updated graph stats: ({'Number of Nodes': 2224, 'Number of Edges': 5878, 'Average Degree': 5.2859712230215825, 'Density': 0.0023778548011792995, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  75%|███████▌  | 189/251 [41:27:09<12:47:46, 743.02s/it]

Processed batch 188, updated graph stats: ({'Number of Nodes': 2229, 'Number of Edges': 5905, 'Average Degree': 5.2983400628084345, 'Density': 0.0023780700461438213, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  76%|███████▌  | 190/251 [41:39:24<12:33:02, 740.69s/it]

Processed batch 189, updated graph stats: ({'Number of Nodes': 2244, 'Number of Edges': 5939, 'Average Degree': 5.293226381461675, 'Density': 0.002359886928872793, 'Connected Components': 1, 'Number of Communities': 40}, False)


Processing batches...:  76%|███████▌  | 191/251 [41:54:54<13:17:25, 797.43s/it]

Processed batch 190, updated graph stats: ({'Number of Nodes': 2254, 'Number of Edges': 5968, 'Average Degree': 5.29547471162378, 'Density': 0.002350410435696307, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  76%|███████▋  | 192/251 [42:06:01<12:25:31, 758.15s/it]

Processed batch 191, updated graph stats: ({'Number of Nodes': 2261, 'Number of Edges': 5984, 'Average Degree': 5.293233082706767, 'Density': 0.002342138532171136, 'Connected Components': 1, 'Number of Communities': 42}, False)


Processing batches...:  77%|███████▋  | 193/251 [42:19:13<12:22:39, 768.27s/it]

Processed batch 192, updated graph stats: ({'Number of Nodes': 2271, 'Number of Edges': 6013, 'Average Degree': 5.2954645530603255, 'Density': 0.002332803767867985, 'Connected Components': 1, 'Number of Communities': 43}, False)


Processing batches...:  77%|███████▋  | 194/251 [42:37:43<13:47:26, 870.99s/it]

Processed batch 193, updated graph stats: ({'Number of Nodes': 2277, 'Number of Edges': 6036, 'Average Degree': 5.301712779973649, 'Density': 0.0023293992882133787, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  78%|███████▊  | 195/251 [42:53:51<14:00:06, 900.11s/it]

Processed batch 194, updated graph stats: ({'Number of Nodes': 2306, 'Number of Edges': 6112, 'Average Degree': 5.3009540329575024, 'Density': 0.002299763137942517, 'Connected Components': 1, 'Number of Communities': 45}, False)


Processing batches...:  78%|███████▊  | 196/251 [43:05:01<12:41:37, 830.86s/it]

Processed batch 195, updated graph stats: ({'Number of Nodes': 2317, 'Number of Edges': 6143, 'Average Degree': 5.302546396201985, 'Density': 0.0022895278049231372, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  78%|███████▊  | 197/251 [43:15:55<11:40:01, 777.80s/it]

Processed batch 196, updated graph stats: ({'Number of Nodes': 2318, 'Number of Edges': 6147, 'Average Degree': 5.3037100949094045, 'Density': 0.0022890419054421253, 'Connected Components': 1, 'Number of Communities': 42}, False)


Processing batches...:  79%|███████▉  | 198/251 [43:28:27<11:20:14, 770.08s/it]

Processed batch 197, updated graph stats: ({'Number of Nodes': 2324, 'Number of Edges': 6165, 'Average Degree': 5.305507745266781, 'Density': 0.002283903463308989, 'Connected Components': 1, 'Number of Communities': 31}, False)


Processing batches...:  79%|███████▉  | 199/251 [43:41:53<11:16:43, 780.83s/it]

Processed batch 198, updated graph stats: ({'Number of Nodes': 2327, 'Number of Edges': 6174, 'Average Degree': 5.306403094112591, 'Density': 0.002281342688784433, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  80%|███████▉  | 200/251 [43:54:37<10:59:27, 775.83s/it]

Processed batch 199, updated graph stats: ({'Number of Nodes': 2331, 'Number of Edges': 6184, 'Average Degree': 5.305877305877305, 'Density': 0.002277200560462363, 'Connected Components': 1, 'Number of Communities': 43}, False)


Processing batches...:  80%|████████  | 201/251 [44:11:25<11:44:39, 845.59s/it]

Processed batch 200, updated graph stats: ({'Number of Nodes': 2341, 'Number of Edges': 6214, 'Average Degree': 5.308842375053396, 'Density': 0.0022687360577151264, 'Connected Components': 1, 'Number of Communities': 45}, False)


Processing batches...:  80%|████████  | 202/251 [44:25:25<11:29:17, 844.02s/it]

Processed batch 201, updated graph stats: ({'Number of Nodes': 2362, 'Number of Edges': 6263, 'Average Degree': 5.303132938187976, 'Density': 0.002246138474454882, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  81%|████████  | 203/251 [44:36:34<10:33:10, 791.46s/it]

Processed batch 202, updated graph stats: ({'Number of Nodes': 2385, 'Number of Edges': 6316, 'Average Degree': 5.29643605870021, 'Density': 0.0022216594205957253, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  81%|████████▏ | 204/251 [44:51:20<10:42:12, 819.83s/it]

Processed batch 203, updated graph stats: ({'Number of Nodes': 2395, 'Number of Edges': 6338, 'Average Degree': 5.292693110647182, 'Density': 0.002210815835692223, 'Connected Components': 1, 'Number of Communities': 48}, False)


Processing batches...:  82%|████████▏ | 205/251 [45:10:58<11:50:48, 927.15s/it]

Processed batch 204, updated graph stats: ({'Number of Nodes': 2406, 'Number of Edges': 6370, 'Average Degree': 5.2950955943474645, 'Density': 0.0022017029498326256, 'Connected Components': 1, 'Number of Communities': 43}, False)


Processing batches...:  82%|████████▏ | 206/251 [45:47:37<16:21:31, 1308.70s/it]

Processed batch 205, updated graph stats: ({'Number of Nodes': 2421, 'Number of Edges': 6422, 'Average Degree': 5.305245766212309, 'Density': 0.0021922503166166567, 'Connected Components': 1, 'Number of Communities': 52}, False)


Processing batches...:  82%|████████▏ | 207/251 [46:20:11<18:21:48, 1502.47s/it]

Processed batch 206, updated graph stats: ({'Number of Nodes': 2426, 'Number of Edges': 6443, 'Average Degree': 5.311624072547403, 'Density': 0.002190360442287589, 'Connected Components': 1, 'Number of Communities': 48}, False)


Processing batches...:  83%|████████▎ | 208/251 [46:51:51<19:22:13, 1621.72s/it]

Processed batch 207, updated graph stats: ({'Number of Nodes': 2441, 'Number of Edges': 6497, 'Average Degree': 5.323228185170012, 'Density': 0.0021816508955614803, 'Connected Components': 1, 'Number of Communities': 43}, False)


Processing batches...:  83%|████████▎ | 209/251 [47:25:14<20:15:11, 1735.98s/it]

Processed batch 208, updated graph stats: ({'Number of Nodes': 2452, 'Number of Edges': 6533, 'Average Degree': 5.328711256117455, 'Density': 0.0021740967997215237, 'Connected Components': 1, 'Number of Communities': 45}, False)


Processing batches...:  84%|████████▎ | 210/251 [47:53:57<19:43:29, 1731.95s/it]

Processed batch 209, updated graph stats: ({'Number of Nodes': 2464, 'Number of Edges': 6592, 'Average Degree': 5.35064935064935, 'Density': 0.0021724114294150833, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  84%|████████▍ | 211/251 [48:23:24<19:21:38, 1742.46s/it]

Processed batch 210, updated graph stats: ({'Number of Nodes': 2478, 'Number of Edges': 6634, 'Average Degree': 5.354317998385795, 'Density': 0.0021616140486014515, 'Connected Components': 1, 'Number of Communities': 41}, False)


Processing batches...:  84%|████████▍ | 212/251 [48:58:57<20:08:49, 1859.73s/it]

Processed batch 211, updated graph stats: ({'Number of Nodes': 2485, 'Number of Edges': 6677, 'Average Degree': 5.373843058350101, 'Density': 0.0021633828737319246, 'Connected Components': 1, 'Number of Communities': 46}, False)


Processing batches...:  85%|████████▍ | 213/251 [49:39:23<21:25:20, 2029.49s/it]

Processed batch 212, updated graph stats: ({'Number of Nodes': 2501, 'Number of Edges': 6790, 'Average Degree': 5.429828068772491, 'Density': 0.0021719312275089962, 'Connected Components': 1, 'Number of Communities': 42}, False)


Processing batches...:  85%|████████▌ | 214/251 [52:06:25<41:48:14, 4067.42s/it]

Processed batch 213, updated graph stats: ({'Number of Nodes': 2552, 'Number of Edges': 7025, 'Average Degree': 5.505485893416928, 'Density': 0.00215816773556132, 'Connected Components': 1, 'Number of Communities': 37}, False)


Processing batches...:  86%|████████▌ | 215/251 [54:41:59<56:28:29, 5647.50s/it]

Processed batch 214, updated graph stats: ({'Number of Nodes': 2602, 'Number of Edges': 7218, 'Average Degree': 5.54803996925442, 'Density': 0.0021330411262031604, 'Connected Components': 1, 'Number of Communities': 38}, False)


Processing batches...:  86%|████████▌ | 216/251 [57:23:30<66:41:55, 6860.44s/it]

Processed batch 215, updated graph stats: ({'Number of Nodes': 2678, 'Number of Edges': 7542, 'Average Degree': 5.632561613144137, 'Density': 0.0021040573825715867, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  86%|████████▋ | 217/251 [58:40:25<58:25:47, 6186.68s/it]

Processed batch 216, updated graph stats: ({'Number of Nodes': 2701, 'Number of Edges': 7657, 'Average Degree': 5.669751943724546, 'Density': 0.0020999081273053877, 'Connected Components': 1, 'Number of Communities': 25}, False)


Processing batches...:  87%|████████▋ | 218/251 [59:27:16<47:25:44, 5174.09s/it]

Processed batch 217, updated graph stats: ({'Number of Nodes': 2719, 'Number of Edges': 7723, 'Average Degree': 5.6807649871276205, 'Density': 0.0020900533433140618, 'Connected Components': 1, 'Number of Communities': 34}, False)


Processing batches...:  87%|████████▋ | 219/251 [59:59:59<37:25:47, 4210.86s/it]

Processed batch 218, updated graph stats: ({'Number of Nodes': 2743, 'Number of Edges': 7807, 'Average Degree': 5.6923076923076925, 'Density': 0.002075969253212142, 'Connected Components': 1, 'Number of Communities': 31}, False)


Processing batches...:  88%|████████▊ | 220/251 [60:24:59<29:15:24, 3397.57s/it]

Processed batch 219, updated graph stats: ({'Number of Nodes': 2761, 'Number of Edges': 7875, 'Average Degree': 5.704454907642159, 'Density': 0.0020668314882761442, 'Connected Components': 1, 'Number of Communities': 32}, False)


Processing batches...:  88%|████████▊ | 221/251 [60:53:26<24:05:10, 2890.34s/it]

Processed batch 220, updated graph stats: ({'Number of Nodes': 2776, 'Number of Edges': 7935, 'Average Degree': 5.71685878962536, 'Density': 0.002060129293558688, 'Connected Components': 1, 'Number of Communities': 36}, False)


Processing batches...:  88%|████████▊ | 222/251 [61:19:09<20:01:37, 2486.12s/it]

Processed batch 221, updated graph stats: ({'Number of Nodes': 2779, 'Number of Edges': 7949, 'Average Degree': 5.72076286433969, 'Density': 0.0020593098863713787, 'Connected Components': 1, 'Number of Communities': 39}, False)


Processing batches...:  89%|████████▉ | 223/251 [61:51:18<18:02:15, 2319.11s/it]

Processed batch 222, updated graph stats: ({'Number of Nodes': 2802, 'Number of Edges': 8033, 'Average Degree': 5.733761598857958, 'Density': 0.0020470409135515738, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  89%|████████▉ | 224/251 [62:27:21<17:02:29, 2272.20s/it]

Processed batch 223, updated graph stats: ({'Number of Nodes': 2809, 'Number of Edges': 8079, 'Average Degree': 5.752224991100036, 'Density': 0.0020485131734686735, 'Connected Components': 1, 'Number of Communities': 33}, False)


Processing batches...:  90%|████████▉ | 225/251 [63:19:58<18:19:37, 2537.60s/it]

Processed batch 224, updated graph stats: ({'Number of Nodes': 2822, 'Number of Edges': 8115, 'Average Degree': 5.7512402551382, 'Density': 0.00203872394723084, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  90%|█████████ | 226/251 [64:04:24<17:53:24, 2576.19s/it]

Processed batch 225, updated graph stats: ({'Number of Nodes': 2834, 'Number of Edges': 8153, 'Average Degree': 5.753705010585745, 'Density': 0.0020309583517775305, 'Connected Components': 1, 'Number of Communities': 28}, False)


Processing batches...:  90%|█████████ | 227/251 [64:47:02<17:08:14, 2570.61s/it]

Processed batch 226, updated graph stats: ({'Number of Nodes': 2858, 'Number of Edges': 8256, 'Average Degree': 5.777466759972008, 'Density': 0.0020222144767140385, 'Connected Components': 1, 'Number of Communities': 29}, False)


Processing batches...:  91%|█████████ | 228/251 [65:50:39<18:48:45, 2944.60s/it]

Processed batch 227, updated graph stats: ({'Number of Nodes': 2863, 'Number of Edges': 8304, 'Average Degree': 5.800908138316451, 'Density': 0.0020268721657290185, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  91%|█████████ | 229/251 [66:42:03<18:15:02, 2986.47s/it]

Processed batch 228, updated graph stats: ({'Number of Nodes': 2875, 'Number of Edges': 8371, 'Average Degree': 5.823304347826087, 'Density': 0.0020262019303500653, 'Connected Components': 1, 'Number of Communities': 31}, False)


Processing batches...:  92%|█████████▏| 230/251 [67:37:52<18:03:19, 3095.20s/it]

Processed batch 229, updated graph stats: ({'Number of Nodes': 2901, 'Number of Edges': 8469, 'Average Degree': 5.838676318510858, 'Density': 0.0020133366615554685, 'Connected Components': 1, 'Number of Communities': 28}, False)


Processing batches...:  92%|█████████▏| 231/251 [68:14:40<15:43:00, 2829.05s/it]

Processed batch 230, updated graph stats: ({'Number of Nodes': 2910, 'Number of Edges': 8516, 'Average Degree': 5.852920962199312, 'Density': 0.0020120044558952603, 'Connected Components': 1, 'Number of Communities': 28}, False)


Processing batches...:  92%|█████████▏| 232/251 [69:08:44<15:35:17, 2953.56s/it]

Processed batch 231, updated graph stats: ({'Number of Nodes': 2924, 'Number of Edges': 8567, 'Average Degree': 5.859781121751026, 'Density': 0.0020047147183547814, 'Connected Components': 1, 'Number of Communities': 30}, False)


Processing batches...:  93%|█████████▎| 233/251 [69:54:32<14:27:33, 2891.87s/it]

Processed batch 232, updated graph stats: ({'Number of Nodes': 2946, 'Number of Edges': 8692, 'Average Degree': 5.900882552613713, 'Density': 0.0020036952640454034, 'Connected Components': 1, 'Number of Communities': 28}, False)


Processing batches...:  93%|█████████▎| 234/251 [70:28:18<12:25:45, 2632.09s/it]

Processed batch 233, updated graph stats: ({'Number of Nodes': 2952, 'Number of Edges': 8722, 'Average Degree': 5.909214092140921, 'Density': 0.002002444626276151, 'Connected Components': 1, 'Number of Communities': 24}, False)


Processing batches...:  94%|█████████▎| 235/251 [71:25:28<12:45:42, 2871.44s/it]

Processed batch 234, updated graph stats: ({'Number of Nodes': 2959, 'Number of Edges': 8748, 'Average Degree': 5.912808381209868, 'Density': 0.0019989210213691238, 'Connected Components': 1, 'Number of Communities': 23}, False)


Processing batches...:  94%|█████████▍| 236/251 [72:36:39<13:42:48, 3291.21s/it]

Processed batch 235, updated graph stats: ({'Number of Nodes': 2992, 'Number of Edges': 8872, 'Average Degree': 5.93048128342246, 'Density': 0.0019827754207363624, 'Connected Components': 1, 'Number of Communities': 27}, False)


Processing batches...:  94%|█████████▍| 237/251 [73:45:00<13:44:40, 3534.34s/it]

Processed batch 236, updated graph stats: ({'Number of Nodes': 3030, 'Number of Edges': 9004, 'Average Degree': 5.9432343234323435, 'Density': 0.0019621110344774984, 'Connected Components': 1, 'Number of Communities': 29}, False)
Error occurred: (ProtocolError('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)), '(Request ID: f01d5712-3e2b-477d-8fa5-d8db6ad4e513)')


Traceback (most recent call last):
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connectionpool.py", line 534, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connection.py", line 516, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\http\client.py", line 1428, in getresponse
    response.begin()
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\http\client.py", line 331, in begin
    version, status, reason = self._read_status()
       

Processed batch 237, updated graph stats: None
Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x000001C3C2F29CD0>: Failed to resolve \'trac-crimson.ern.nps.edu\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 8e8d8a18-b3c2-4837-86f2-08f640bd7cdd)')
Processed batch 238, updated graph stats: None


Traceback (most recent call last):
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\util\connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\socket.py", line 976, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
socket.gaierror: [Errno 11001] getaddrinfo failed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\jonathan

Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x000001C3C2F28170>: Failed to resolve \'trac-crimson.ern.nps.edu\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 405eec9e-dab8-43a8-93b9-b7a3dddbb46c)')
Processed batch 239, updated graph stats: None
Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x000001C3C2F29F70>: Failed to resolve \'trac-crimson.ern.nps.edu\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 389a6e91-34ed-472f-a50f-7ffb24e79c3d)')
Processed batch 240, updated graph stats: None


Traceback (most recent call last):
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\util\connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\socket.py", line 976, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
socket.gaierror: [Errno 11001] getaddrinfo failed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\jonathan

Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x000001C3BECF7B30>: Failed to resolve \'trac-crimson.ern.nps.edu\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 1c6666df-1fd1-4188-8964-06974738b56c)')
Processed batch 241, updated graph stats: None
Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x000001C3C2F2B230>: Failed to resolve \'trac-crimson.ern.nps.edu\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 4d77dcf6-6dee-4a27-abc1-344c68224f5c)')
Processed batch 242, updated graph stats: None
Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<

Traceback (most recent call last):
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\util\connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\socket.py", line 976, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
socket.gaierror: [Errno 11001] getaddrinfo failed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\jonathan

Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x000001C3A7A8F170>: Failed to resolve \'trac-crimson.ern.nps.edu\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 8d6256ff-a4ce-46db-93f2-d36ca0845972)')
Processed batch 248, updated graph stats: None
Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x000001C3A7A8F590>: Failed to resolve \'trac-crimson.ern.nps.edu\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 88eb552c-4d66-44c7-a0d6-5d25a03e9cfc)')
Processed batch 249, updated graph stats: None
Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<




### Redload and continue buidling graph
Processed batch 236, updated graph stats: ({'Number of Nodes': 3030, 'Number of Edges': 9004,

In [22]:
# Initialize variables
cur_batch = 236

g_graphml_existing = "C:\\Users\\jonathan.kasprisin\\github\\Learning\\KG_ilp\\data_w_refine\\final_augmented_graph.graphml"
failed_batches_path = "C:\\Users\\jonathan.kasprisin\\github\\Learning\\KG_ilp\\data_w_refine\\failed_batches.pkl"
node_embd_path = "C:\\Users\\jonathan.kasprisin\\github\\Learning\\KG_ilp\\data_w_refine\\embeddings.pkl"
new_dir = "C:\\Users\\jonathan.kasprisin\\github\\Learning\\KG_ilp\\data_w_refine2"

G_existing = None
failed_batches = []
existing_node_embeddings = {}

if os.path.exists(failed_batches_path):
    with open(failed_batches_path, 'rb') as f:
        failed_batches = pickle.load(f)

#existing embeddings is a NoneType so just regenerating them
# if os.path.exists(node_embd_path):
#     with open(node_embd_path, 'rb') as f:
#         existing_node_embeddings = pickle.load(f)
# else:
#     print("No existing node embeddings file found.")


# Process documents in chunks
chunk_size = 1500
batch_size = 10

# Split all_docs into batches of size batch_size
doc_batches = [all_docs[i:i + batch_size] for i in range(0, len(all_docs), batch_size)]

#load existing graph and embeddings
G_existing = nx.read_graphml(g_graphml_existing)

#if existing_node_embeddings is nonetype use regenerate_embeddings
if not existing_node_embeddings:
    for node in tqdm(G_existing.nodes(), desc="Generating embeddings for existing nodes..."):
        existing_node_embeddings[node]= embd.embed_query(node)
print(f"Existing graph loaded with {len(G_existing.nodes())} nodes and {len(G_existing.edges())} edges.")
print(f"Num failed_batches: {len(failed_batches)}")
print(f"Num existing_node_embeddings: {len(existing_node_embeddings)}")


Generating embeddings for existing nodes...: 100%|██████████| 3030/3030 [22:55<00:00,  2.20it/s]

Existing graph loaded with 3030 nodes and 9004 edges.
Num failed_batches: 0
Num existing_node_embeddings: 3030





In [23]:
output_directory = 'data_w_refine2'

start_batch = cur_batch + 1
for batch_idx, doc_batch in tqdm(enumerate(doc_batches[start_batch:]), total=len(doc_batches[start_batch:])):
    try:
        G_existing, existing_node_embeddings, res = add_new_subgraph_from_docs(
            input_docs=doc_batch,
            llm=llm,
            embd=embd,
            data_dir_output=f"./{output_directory}/",
            verbatim=False,
            size_threshold=10,
            chunk_size=chunk_size,
            do_Louvain_on_new_graph=True,
            include_contextual_proximity=False,
            repeat_refine=2,
            similarity_threshold=0.95,
            do_simplify_graph=True,
            return_only_giant_component=False,
            save_common_graph=False,
            G_exisiting=G_existing,
            graph_GraphML_exisiting=None,
            existing_node_embeddings=existing_node_embeddings
        )

        print(f"Processed batch {batch_idx}, updated graph stats:", res)
        with open(f'{output_directory}/embeddings.pkl', 'wb') as f:
            pickle.dump(existing_node_embeddings, f)
        with open(f'{output_directory}/failed_batches.pkl', 'wb') as f:
            pickle.dump(failed_batches, f)
    except Exception as e:
        # Log the failed batch index
        failed_batches.append(batch_idx)
        print(f"Error processing batch {batch_idx} with batch size {batch_size}: {e}")

# Final graph statistics and saving
print("Final graph statistics:", res if 'res' in locals() else "No successful batches")
print("Failed batch indices:", failed_batches)

  7%|▋         | 1/14 [1:05:07<14:06:32, 3907.11s/it]

Processed batch 0, updated graph stats: ({'Number of Nodes': 3049, 'Number of Edges': 9074, 'Average Degree': 5.952115447687767, 'Density': 0.001952793782049792, 'Connected Components': 1, 'Number of Communities': 29}, False)


 14%|█▍        | 2/14 [2:06:23<12:34:17, 3771.48s/it]

Processed batch 1, updated graph stats: ({'Number of Nodes': 3075, 'Number of Edges': 9158, 'Average Degree': 5.956422764227642, 'Density': 0.001937678192657008, 'Connected Components': 1, 'Number of Communities': 25}, False)


 21%|██▏       | 3/14 [3:06:05<11:15:34, 3684.97s/it]

Processed batch 2, updated graph stats: ({'Number of Nodes': 3096, 'Number of Edges': 9229, 'Average Degree': 5.9618863049095605, 'Density': 0.0019262960597446076, 'Connected Components': 1, 'Number of Communities': 25}, False)


 29%|██▊       | 4/14 [4:09:31<10:22:07, 3732.73s/it]

Processed batch 3, updated graph stats: ({'Number of Nodes': 3119, 'Number of Edges': 9313, 'Average Degree': 5.971785828791279, 'Density': 0.0019152616513121487, 'Connected Components': 1, 'Number of Communities': 22}, False)


 36%|███▌      | 5/14 [5:14:26<9:28:40, 3791.14s/it] 

Processed batch 4, updated graph stats: ({'Number of Nodes': 3132, 'Number of Edges': 9362, 'Average Degree': 5.978288633461047, 'Density': 0.0019093863409329439, 'Connected Components': 1, 'Number of Communities': 27}, False)


 43%|████▎     | 6/14 [5:59:52<7:37:13, 3429.21s/it]

Processed batch 5, updated graph stats: ({'Number of Nodes': 3141, 'Number of Edges': 9397, 'Average Degree': 5.983444762814391, 'Density': 0.001905555656947258, 'Connected Components': 1, 'Number of Communities': 27}, False)


 50%|█████     | 7/14 [7:06:01<7:00:38, 3605.46s/it]

Processed batch 6, updated graph stats: ({'Number of Nodes': 3167, 'Number of Edges': 9504, 'Average Degree': 6.001894537417114, 'Density': 0.0018957342190199348, 'Connected Components': 1, 'Number of Communities': 27}, False)


 57%|█████▋    | 8/14 [8:08:59<6:06:02, 3660.44s/it]

Processed batch 7, updated graph stats: ({'Number of Nodes': 3205, 'Number of Edges': 9667, 'Average Degree': 6.032449297971919, 'Density': 0.0018827869219637699, 'Connected Components': 1, 'Number of Communities': 24}, False)


 64%|██████▍   | 9/14 [9:09:52<5:04:50, 3658.06s/it]

Processed batch 8, updated graph stats: ({'Number of Nodes': 3221, 'Number of Edges': 9741, 'Average Degree': 6.0484321639242475, 'Density': 0.0018783950819640518, 'Connected Components': 1, 'Number of Communities': 28}, False)


 71%|███████▏  | 10/14 [10:22:16<4:17:59, 3869.77s/it]

Processed batch 9, updated graph stats: ({'Number of Nodes': 3253, 'Number of Edges': 9847, 'Average Degree': 6.054103904088533, 'Density': 0.0018616555670628947, 'Connected Components': 1, 'Number of Communities': 27}, False)


 79%|███████▊  | 11/14 [11:14:02<3:01:48, 3636.00s/it]

Processed batch 10, updated graph stats: ({'Number of Nodes': 3281, 'Number of Edges': 9940, 'Average Degree': 6.059128314538251, 'Density': 0.0018472952178470276, 'Connected Components': 1, 'Number of Communities': 25}, False)


Traceback (most recent call last):
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connectionpool.py", line 534, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connection.py", line 516, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\http\client.py", line 1428, in getresponse
    response.begin()
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\http\client.py", line 331, in begin
    version, status, reason = self._read_status()
       

Error occurred: (ProtocolError('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)), '(Request ID: ec1dd24c-5cf3-4677-8a42-92651c77f5a7)')
Processed batch 11, updated graph stats: None


Traceback (most recent call last):
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\site-packages\urllib3\util\connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\jonathan.kasprisin\AppData\Local\miniconda3\envs\kg_ilp\Lib\socket.py", line 976, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
socket.gaierror: [Errno 11001] getaddrinfo failed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\jonathan

Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x000001C3A86D1100>: Failed to resolve \'trac-crimson.ern.nps.edu\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: da911d63-2c35-42b4-852d-0fbb8cdd6929)')
Processed batch 12, updated graph stats: None
Error occurred: (MaxRetryError('HTTPConnectionPool(host=\'trac-crimson.ern.nps.edu\', port=8090): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x000001C3A86D3320>: Failed to resolve \'trac-crimson.ern.nps.edu\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 2c92612e-acaf-4aa7-b30f-fdaf8f572d60)')
Processed batch 13, updated graph stats: None
Final graph statistics: None
Failed batch indices: []





In [24]:
# Initialize variables
cur_batch = cur_batch+ 10

g_graphml_existing = "C:\\Users\\jonathan.kasprisin\\github\\Learning\\KG_ilp\\data_w_refine2\\final_augmented_graph.graphml"
failed_batches_path = "C:\\Users\\jonathan.kasprisin\\github\\Learning\\KG_ilp\\data_w_refine2\\failed_batches.pkl"
node_embd_path = "C:\\Users\\jonathan.kasprisin\\github\\Learning\\KG_ilp\\data_w_refine2\\embeddings.pkl"
new_dir = "C:\\Users\\jonathan.kasprisin\\github\\Learning\\KG_ilp\\data_w_refine2"

G_existing = None
failed_batches = []
existing_node_embeddings = {}

if os.path.exists(failed_batches_path):
    with open(failed_batches_path, 'rb') as f:
        failed_batches = pickle.load(f)

#existing embeddings is a NoneType so just regenerating them
# if os.path.exists(node_embd_path):
#     with open(node_embd_path, 'rb') as f:
#         existing_node_embeddings = pickle.load(f)
# else:
#     print("No existing node embeddings file found.")


# Process documents in chunks
chunk_size = 1500
batch_size = 10

# Split all_docs into batches of size batch_size
doc_batches = [all_docs[i:i + batch_size] for i in range(0, len(all_docs), batch_size)]

#load existing graph and embeddings
G_existing = nx.read_graphml(g_graphml_existing)

#if existing_node_embeddings is nonetype use regenerate_embeddings
if not existing_node_embeddings:
    for node in tqdm(G_existing.nodes(), desc="Generating embeddings for existing nodes..."):
        existing_node_embeddings[node]= embd.embed_query(node)
print(f"Existing graph loaded with {len(G_existing.nodes())} nodes and {len(G_existing.edges())} edges.")
print(f"Num failed_batches: {len(failed_batches)}")
print(f"Num existing_node_embeddings: {len(existing_node_embeddings)}")


Generating embeddings for existing nodes...: 100%|██████████| 3281/3281 [53:46<00:00,  1.02it/s] 

Existing graph loaded with 3281 nodes and 9940 edges.
Num failed_batches: 0
Num existing_node_embeddings: 3281





In [25]:
output_directory = 'data_w_refine2'

start_batch = cur_batch + 1
for batch_idx, doc_batch in tqdm(enumerate(doc_batches[start_batch:]), total=len(doc_batches[start_batch:])):
    try:
        G_existing, existing_node_embeddings, res = add_new_subgraph_from_docs(
            input_docs=doc_batch,
            llm=llm,
            embd=embd,
            data_dir_output=f"./{output_directory}/",
            verbatim=False,
            size_threshold=10,
            chunk_size=chunk_size,
            do_Louvain_on_new_graph=True,
            include_contextual_proximity=False,
            repeat_refine=2,
            similarity_threshold=0.95,
            do_simplify_graph=True,
            return_only_giant_component=False,
            save_common_graph=False,
            G_exisiting=G_existing,
            graph_GraphML_exisiting=None,
            existing_node_embeddings=existing_node_embeddings
        )

        print(f"Processed batch {batch_idx}, updated graph stats:", res)
        with open(f'{output_directory}/embeddings.pkl', 'wb') as f:
            pickle.dump(existing_node_embeddings, f)
        with open(f'{output_directory}/failed_batches.pkl', 'wb') as f:
            pickle.dump(failed_batches, f)
    except Exception as e:
        # Log the failed batch index
        failed_batches.append(batch_idx)
        print(f"Error processing batch {batch_idx} with batch size {batch_size}: {e}")

# Final graph statistics and saving
print("Final graph statistics:", res if 'res' in locals() else "No successful batches")
print("Failed batch indices:", failed_batches)

  0%|          | 0/4 [00:00<?, ?it/s]

Processed batch 0, updated graph stats: ({'Number of Nodes': 3307, 'Number of Edges': 10027, 'Average Degree': 6.064106440882975, 'Density': 0.0018342729706240096, 'Connected Components': 1, 'Number of Communities': 28}, False)


 50%|█████     | 2/4 [1:54:57<1:56:23, 3491.73s/it]

Processed batch 1, updated graph stats: ({'Number of Nodes': 3339, 'Number of Edges': 10109, 'Average Degree': 6.055106319257263, 'Density': 0.0018139923065480115, 'Connected Components': 1, 'Number of Communities': 24}, False)


 75%|███████▌  | 3/4 [3:55:20<1:26:35, 5195.48s/it]

Processed batch 2, updated graph stats: ({'Number of Nodes': 3427, 'Number of Edges': 10500, 'Average Degree': 6.127808578932011, 'Density': 0.0017886189664133132, 'Connected Components': 1, 'Number of Communities': 23}, False)


100%|██████████| 4/4 [4:07:36<00:00, 3434.96s/it]  

Processed batch 3, updated graph stats: ({'Number of Nodes': 3431, 'Number of Edges': 10537, 'Average Degree': 6.142232585252113, 'Density': 0.0017907383630472633, 'Connected Components': 1, 'Number of Communities': 24}, False)


100%|██████████| 4/4 [4:07:36<00:00, 3714.25s/it]

Final graph statistics: ({'Number of Nodes': 3431, 'Number of Edges': 10537, 'Average Degree': 6.142232585252113, 'Density': 0.0017907383630472633, 'Connected Components': 1, 'Number of Communities': 24}, False)
Failed batch indices: []



