In [None]:
import json
import os
from typing import List
from pydantic import BaseModel, Field
from itext2kg import itext2kg_star
from itext2kg.documents_distiller import DocumentsDistiller, Article
import asyncio
from itext2kg import iText2KG_Star
from itext2kg.logging_config import setup_logging, get_logger
from itext2kg import itext2kg_star
from langchain_ollama import ChatOllama, OllamaEmbeddings
import time
from datetime import datetime

In [None]:
# Create models for destill

In [None]:
from typing import List, Optional
from pydantic import BaseModel, Field, field_validator
class ContentSource(BaseModel):
    name: str = Field(description="Name of the author/speaker/presenter")
    role: Optional[str] = Field(description="Role or position")
    affiliation: Optional[str] = Field(description="Organization or company affiliation")
class Content(BaseModel):
    title: str = Field(description="Title of the content (article/video)")
    sources: List[ContentSource] = Field(description="Authors/speakers involved")
    summary: str = Field(description="Brief summary of the content")
    key_concepts: List[str] = Field(description="Main concepts or topics covered")
    insights: str = Field(description="Key insights and findings")
    challenges: str = Field(description="Challenges or limitations discussed")
    solutions: str = Field(description="Proposed solutions or approaches")
    practical_applications: str = Field(description="Practical applications or implementations mentioned")

In [None]:
# Create function for creating semantic blocks from batches (output from .py script)

In [None]:
def load_and_create_semantic_blocks(results_dir="distilled_results"):
    """One-function solution: Load JSONs and create semantic blocks"""

    all_semantic_blocks = []

    # Get all JSON files
    json_files = [f for f in os.listdir(results_dir) if f.endswith('.json')]
    json_files.sort()  # Sort alphabetically

    print(f"Found {len(json_files)} JSON files")

    for filename in json_files:
        file_path = os.path.join(results_dir, filename)

        try:
            # Load JSON
            with open(file_path, 'r', encoding='utf-8') as f:
                json_data = json.load(f)

            # Create Content object
            content_obj = Content(**json_data)

            # Convert to semantic blocks
            for key, value in content_obj.model_dump().items():
                if value and value != [] and value != "":
                    if key == "sources" and isinstance(value, list):
                        # Handle sources list
                        sources_text = ", ".join([f"{s['name']}" for s in value if s.get('name')])
                        if sources_text:
                            block = f"{key} - {sources_text}".replace("{", "[").replace("}", "]")
                            all_semantic_blocks.append(block)
                    elif key == "key_concepts" and isinstance(value, list):
                        # Handle concepts list
                        concepts_text = ", ".join(value)
                        block = f"{key} - {concepts_text}".replace("{", "[").replace("}", "]")
                        all_semantic_blocks.append(block)
                    elif isinstance(value, str):
                        # Handle string fields
                        block = f"{key} - {value}".replace("{", "[").replace("}", "]")
                        all_semantic_blocks.append(block)

            print(f"✓ Processed {filename}")

        except Exception as e:
            print(f"✗ Error with {filename}: {e}")

    print(f"\nTotal semantic blocks created: {len(all_semantic_blocks)}")
    return all_semantic_blocks

In [None]:
# Create semantic blocks

In [None]:
semantic_blocks = load_and_create_semantic_blocks("vnp_itext2kg/distilled_results")

Found 28 JSON files
✓ Processed batch_1.json
✓ Processed batch_10.json
✓ Processed batch_11.json
✓ Processed batch_12.json
✓ Processed batch_13.json
✓ Processed batch_14.json
✓ Processed batch_15.json
✓ Processed batch_17.json
✓ Processed batch_18.json
✓ Processed batch_19.json
✓ Processed batch_2.json
✓ Processed batch_21.json
✓ Processed batch_22.json
✓ Processed batch_23.json
✓ Processed batch_24.json
✓ Processed batch_25.json
✓ Processed batch_26.json
✓ Processed batch_27.json
✓ Processed batch_28.json
✓ Processed batch_29.json
✓ Processed batch_3.json
✓ Processed batch_30.json
✓ Processed batch_31.json
✓ Processed batch_4.json
✓ Processed batch_5.json
✓ Processed batch_6.json
✓ Processed batch_8.json
✓ Processed batch_9.json

Total semantic blocks created: 223


In [None]:
semantic_blocks

['title - Scrum/Agile Software Development Tools for Project Management Project Initiation in Scrum/Agile Software Development Scrum/Agile Software Development Directives Extraction',
 'sources - Scrum.org, Agile Alliance, YouTube Transcript',
 "summary - In this response, we will discuss the Scrum/Agile software development tools that are essential for project management. We will cover the key concepts, methodologies, best practices, and insights discussed in a YouTube transcript about Scrum/Agile software development. In this context, initiation tools are being discussed as part of a feasibility study for a project. The goal is to determine if the project is worthwhile and whether it's a good idea to start with the business case. This involves summarizing the current issue or problem, outlining solutions, and recommending the project. In this specific case, Billy will be working on putting together the business case, which includes creating a project charter, obtaining resources, and

In [None]:
# Set up connection to Ollama using langchain

In [None]:
llm_text = ChatOllama(
    model="gemma2:2b",
    temperature=0,
)
embeddings = OllamaEmbeddings(
    model="nomic-embed-text:latest",
)

In [None]:
itext2kg_star = iText2KG_Star(llm_model=llm_text, embeddings_model=embeddings)

In [None]:
sb = semantic_blocks[0:40]

In [None]:
# Create graph

In [None]:
kg = await itext2kg_star.build_graph(
    sections=sb,
    ent_threshold=0.8,
    rel_threshold=0.7,
)

[2025-08-26 13:46:16] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 1
[2025-08-26 13:46:26] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 2
[2025-08-26 13:46:35] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 3
[2025-08-26 13:47:03] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 4
[2025-08-26 13:47:37] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Relation was matched --- [Relationship_between_Product_Owner_and_Development_Team] --merged --> [Software_Development_Tool_Relationship]
[2025-08-26 13:47:37] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Relation was matched --- [Relationship_between_Key_Requirements_and_Development_Team] --merged --> [Software_Development_Tool_Relationship]
[2025-08-26 13:47:37] [    INFO] [i

In [None]:
# Ran into blocking here, had to remove several blocks to continue (later automated in .py script)

In [None]:
len(sb)

17

In [None]:
len(semantic_blocks)

223

In [None]:
13:46:16
13:58:13
11:58 min

In [None]:
len(kg.entities)

429

In [None]:
sb = semantic_blocks[41:47] + semantic_blocks[49:80]

In [None]:
sb.remove(sb[16])

In [None]:
sb.remove(sb[33])

In [None]:
# Continue to create graph

In [None]:
kg = await itext2kg_star.build_graph(
    sections=sb,
    ent_threshold=0.9,
    rel_threshold=0.8,
    existing_knowledge_graph=kg.model_copy(),
)

[2025-08-26 14:56:57] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 1
[2025-08-26 14:57:07] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 2
[2025-08-26 14:57:27] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Entity was matched --- [methodology:Scrum] --merged--> [software development methodology:Scrum]
[2025-08-26 14:57:27] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Relation was matched --- [Feature_of_Scrum_Agile_Software_Development] --merged --> [Relationship__Scrum_and_Agile_are_related_methodologies_in_software_development_]
[2025-08-26 14:57:27] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Relation was matched --- [Scrum_Methodology_in_Agile_Software_Development] --merged --> [Relationship__Scrum_and_Agile_are_related_methodologies_in_software_development_]
[2025-08-26 14:57:27] [    INFO] [itext2kg.itext2kg.itext2kg_star] ---

In [None]:
sb = semantic_blocks[101:120]

In [None]:
sb.remove(sb[9])

In [None]:
# Continue to create graph

In [None]:
kg = await itext2kg_star.build_graph(
    sections=sb,
    ent_threshold=0.9,
    rel_threshold=0.8,
    existing_knowledge_graph=kg.model_copy(),
)

[2025-08-26 15:46:29] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 1
[2025-08-26 15:46:42] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 2
[2025-08-26 15:47:10] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 3
[2025-08-26 15:47:38] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 4
[2025-08-26 15:47:48] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 5
[2025-08-26 15:47:56] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 6
[2025-08-26 15:48:11] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Entity was matched --- [software development process:Scrum_Agile_software_development] --merged--> [software development

In [None]:
# Attempt to store in Neo4j

In [None]:
from itext2kg.graph_integration import Neo4jStorage

URI = "bolt://localhost:7687"
USERNAME = "neo4j"
PASSWORD = "admin123"

# Note: Graph visualization remains synchronous
graph_integrator = Neo4jStorage(uri=URI, username=USERNAME, password=PASSWORD)
graph_integrator.visualize_graph(knowledge_graph=kg)

CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input '80_20_Rule': expected a node label/relationship type name, '$', '%' or '(' (line 1, column 10 (offset: 9))
"MERGE (n:80_20_Rule {name: "rule"}) SET n.embeddings = "0.0499981502,0.06644653959999999,-0.18358426,-0.030401406800000003,0.022782035200000002,0.00766889764,0.025469878600000002,0.00608047044,0.019255195200000002,-0.01572532,-0.021883343,0.028402123600000002,0.031376094,-0.00644968686,0.0332855606,-0.0166330877,-0.0187204554,-0.045099414399999996,-0.0109418124,0.01653050192,-0.0199832836,-0.0362484106,-0.039525116,-0.01325837042,0.141187064,0.0243714856,-0.0014968228000000038,-0.006360986400000001,-0.061427711600000004,-0.002482877799999998,0.00948941428,-0.0020669404,0.0361590642,-0.033189496799999996,-0.0246137606,-0.0565259652,0.07293151640000001,0.008264540800000001,0.011205588200000002,-0.0236812164,0.0310420824,0.0322139372,0.00046596460000000034,0.002924427784,-0.0007954043999999999,-0.0140901176,0.09085309800000001,0.0299561584,0.0018165182599999999,-0.0053895652799999996,-0.03266922,-0.0714799176,-0.005430412399999999,-0.0478928708,0.0187362646,0.01518883828,-0.0011971900000000042,-0.011679648799999996,0.0386715158,-0.024000000799999997,0.0724295598,0.011454851359999999,-0.020915914372,0.08470124879999999,0.025761633900000004,-0.0056151375999999985,0.0025848906000000013,0.040311608,0.040224814600000006,0.00581072306,0.02423563452,-0.0238524546,0.0290768608,-0.0350358468,-0.010432536580000002,-0.00961765228,-0.059299753600000005,-0.0056358905600000005,0.00986466124,0.0383815942,-0.0004489992000000012,-0.0010400746200000003,0.076333952,0.021194247399999998,0.009849156999999999,-0.0262090512,0.0231315188,-0.006882545399999999,-0.011427724399999999,0.0512864042,-0.0012513668000000001,-0.0121940144,0.06597251,0.025747568800000002,-0.025004291999999997,0.0035058097800000003,0.032431104599999996,0.019869523599999998,-0.0544838666,-0.031078912400000002,-0.011342398079999998,0.00197453862,0.010876877519999998,-0.0264174272,0.0024246592199999996,0.07492304359999999,0.0296604764,0.035401302600000004,-0.0070503612,-0.0494698548,-0.029118798200000004,0.0229668572,0.0008438657999999995,-0.09752655199999999,-0.0250115298,0.0020881549,0.06452304880000001,-0.0338564064,0.0299115744,0.0508290304,-0.0146301434,-0.0218732134,0.030889279199999998,0.0603178682,-0.014247599780000002,0.013580807628000001,-0.05425431,0.029190135800000003,0.0223622864,-0.028158031800000004,-0.0147010586,-0.0037858512399999997,-0.048236871199999996,0.00831194898,-0.0438860782,0.005936849000000002,0.026978003200000003,-0.0344718304,0.0160033786,-0.009968802799999999,0.0124188072,-5.759700000000038e-05,0.00176215968,-0.0386444368,-0.0068939112,-0.0692265982,0.082488924,-0.030963376800000005,-0.0001850838052,0.0033472316399999996,-0.0163123084,7.065149999999965e-05,-0.004640727799999999,0.0399166572,-0.0075532459,-0.047470658,0.016430324,0.0199212536,0.0020251407999999993,-0.020180059200000003,0.013721184999999999,0.01110679028,-0.08240835799999999,0.0197851688,-0.0547491126,-0.05914543339999999,-0.031173300799999998,0.09912009199999999,-0.0005597742000000001,0.0214436292,-0.049868281800000004,-0.0361760078,-0.051987994,-0.0288564224,0.030549476399999997,-0.0154058744,-0.029122161039999997,-0.023528423,0.0414179148,-0.039667973999999995,-0.0033529632399999995,-0.047178394799999995,0.04491399,0.0379725348,-0.0018529699999999994,-0.0341364398,0.011920872380000001,-0.0446967344,-0.031921308759999996,-0.02265838996,-0.052945054,0.018387916615999998,-0.038860632,-0.0308127436,-0.005311617400000001,0.014490495980000002,0.045870442799999994,0.0150501894,0.0054877295999999996,-0.01219334696,-0.0218322912,-0.006382044400000001,-0.00839772408,-0.0193676814,-0.01085743948,0.034294754999999996,-0.020110578400000002,0.076623988,0.0100523964,-0.0132916644,0.0442086168,-0.003299965400000002,-0.0281630624,0.032523613800000004,-0.020671477359999996,-0.039106656000000004,-0.0106884047,-0.0014853423279999997,-0.0048138443999999996,0.0201426336,0.027099695800000004,0.034308387600000004,0.004747021800000004,-0.024231173,0.0304326284,-0.0246681488,-0.04239361,0.012686705199999999,-0.029116066,0.00477347096,0.00196486508,-0.096056104,0.0116581193,0.0020839665800000003,-0.0019412188000000018,-0.0016871224000000002,0.024369009999999997,0.0614425108,0.058490059999999996,-0.010476826219999999,-0.010032846,0.0405348308,-0.006351332,-0.0173210242,-0.00169045118,0.0092304506,-0.0043688706,-0.01330237548,0.0155578106,0.0712531412,0.01301673,-0.000980481200000001,0.014602475440000002,0.0465962236,0.012392810680000001,-0.031183362,-0.0646252578,0.01015187084,-0.014710409399999999,-0.0225749888,0.043322457,-0.0198832208,0.0271549388,-0.047569751800000004,-0.033807036,-0.057668096,-0.03962223200000001,0.0221189954,-0.0101450306,-0.00252840588,0.008401786800000002,0.0428268804,-0.0312219254,0.04062735420000001,0.0124603644,-0.00560101036,0.018111551400000002,-0.0308203192,-0.0353908996,0.051120812,0.01555279404,-0.0449057832,-0.0626262816,-0.0003920914000000008,-0.0191762654,-0.008987678999999998,0.0332903812,0.0002159529000000009,-0.0480616688,0.0144739662,0.0188981218,0.0196165128,-0.010954979240000001,0.060339798,0.0366415054,0.0346205676,0.064398354,0.028526730799999997,0.012125199000000001,0.02859969252,0.0104142918,-0.014249211720000002,-0.0211657418,0.01432474772,0.0210961628,-0.01808205098,0.067628409,-0.0303124272,0.0433349088,-0.02838061,-0.0134831272,0.023700772600000003,-0.00686706326,0.0258626416,-0.02575172708,0.056175304,-0.00295806446,0.0431413248,0.050466835599999996,-0.0368401608,0.0349087456,-0.009072547980000002,-0.0025135786799999997,-0.013306337600000002,0.010789440800000001,0.0574392018,-0.039160072399999996,0.02883627708,-0.0188689362,-0.010061552799999998,-0.0107423644,0.0514357222,0.02183087328,-0.02428747916,-0.0207646994,0.037630159600000004,0.0330989564,-0.0025818924000000016,-0.02100283188,0.0484577862,0.0627408914,-0.0038214658,0.027786743000000003,-0.041026098,0.0243309146,0.004778050600000001,-0.01863248702,0.046677667,0.0020267602,0.0368141316,-0.040832008999999996,0.00014416839999999978,-0.0316124808,0.010014480999999999,0.011738226348000001,0.008760383799999999,0.0210876148,0.039179832000000005,0.0034793828399999997,-0.0080678852,0.01076068812,-0.004029409,-0.0106902951,-0.035959606,0.0277893118,0.0663042696,0.0296872752,0.0099302574,0.05923316199999999,0.023116180400000002,-0.044615639,-0.0271603256,-0.0114495054,0.0163464134,-0.010960409,0.004024995260000001,-0.019493463,-0.028410018199999998,0.0030445262400000004,0.039794745,0.027920391000000003,0.0099918896,-0.0275488592,-0.026111681600000002,0.0085477074,-0.0178753774,-0.0180905828,-0.0371399218,0.0385930612,0.0241174854,-0.0301199556,-0.0392163356,-0.0173647212,0.018982363,0.0434397652,-0.0372300644,0.0404730284,-0.01069021024,-0.0123831513,0.0116964932,-0.0310882564,-0.0074179642,-0.00013620899999999998,-0.0158551232,0.011520423,0.0471926736,-0.0078839475,-0.0286075718,0.045532928,0.0263820128,0.009145963,0.0209737238,-0.0275901,-0.0395849708,2.8012820000000195e-05,0.012970309710000003,0.06162744199999999,0.06545477999999999,0.01053591362,-0.0202863534,0.0409427116,0.0155190146,0.00017915580000000013,0.019681187599999997,-0.024055156199999997,-0.024857091919999998,0.0319335296,-0.0009330952000000002,-0.021622695,-0.0872708484,-0.001590505199999999,0.016045103800000002,-0.0202287898,-0.0289648206,-0.0445606024,-0.017221047,-0.0032603517999999984,0.0231986488,0.00745848574,0.067347844,0.0300591722,-0.083965646,-0.0114915912,0.00953920722,0.025775964600000003,0.131969794,0.07624654,-0.0492853924,0.0016525158000000022,0.008634506600000001,0.026595636600000003,0.0098826185,0.0173034666,0.0017035447999999998,0.073812156,-0.049052778799999995,-0.0108891212,0.0235918875,0.0334575028,0.014509355880000002,0.015086133679999999,-0.013678860599999999,-0.008346003780000001,0.027851584000000002,-0.0109623464,-0.0360163802,0.01205127308,-0.00904363128,0.014677946119999999,0.097608222,-0.022300158,0.00456632268,0.0350294052,-0.023714772400000004,-0.005108647400000001,-0.04924165679999999,-0.0156940182,-0.0177049389,0.024001064599999997,0.005079691002,-0.02044203712,-0.038019544,-0.013067442696,-0.0570922592,0.012819956600000001,0.01139529208,0.0333433344,0.008838064319999998,0.059987732200000005,-0.0546907678,0.004867642400000001,0.028256358000000002,-0.014272882399999999,0.015805699599999997,-0.0415589736,-0.024555124799999998,-0.06291852,0.010304096799999999,0.0101325065,-0.012673075920000002,0.01357075806,0.038062168,0.003891698628,-0.00276602318,-0.008577899799999998,-0.0295111308,-0.0156778832628,-0.077839588,-0.0704734548,-0.0022283733999999998,-0.017554790200000003,0.0166669524,0.11210144799999999,-0.0012843062000000008,0.021632489919999998,-0.0385272352,0.0127496892,-0.0032870441999999995,-0.0509307972,0.048954772800000004,0.0267187056,-0.05694003,-0.057148856799999995,-0.0369642856,-0.0500588808,-0.014426553080000001,-0.0260847508,-0.037796902800000004,0.0262991962,-0.0084283174,0.038815868,-0.0104383036,-0.0577367828,0.0293208118,0.0009786926000000008,-0.018660376399999998,-0.049140272,0.0364039446,0.02335093656,0.004821683,0.03891491800000001,0.016383165,-0.00305249834,-0.0340792932,-0.00099456568,-0.002212754000000001,-0.0771795396,0.0013673636600000003,-0.0014489778000000009,-0.00535773162,0.0314108348,-0.008552553199999997,-0.016106783399999998,-0.009486496,0.024686482200000002,-0.0339630898,-0.027322345799999998,0.0238099868,-0.011252907780000002,0.024161574800000002,0.041326326999999996,0.039654748000000004,0.0455023292,0.009838842200000001,-0.014600506,-0.02669797464,-0.011364078560000001,-0.00692603822,0.0140884828,0.004074495439999999,0.0269911704,-0.0429601762,0.0080790221,-0.017559371,-0.0501494728,0.015456214599999999,-0.047186631,-0.0251504396,-0.017939191931999998,-0.034537251000000005,0.009263994000000001,0.015233456440000001,-0.0326873008,0.006817922160000001,0.002186326800000001,0.014472085959999999,0.0078061724,-0.0221181128,0.02799538128,-0.0232054316,0.0037021184280000004,-0.015969140200000002,-0.0018022030599999998,0.004577303999999999,0.019176424999999997,0.0258894842,0.00534578656,-0.079141429,-0.0425822168,-0.0184905452,0.010047277399999999,-0.0147209877588,0.0392178236,-0.0525201442,0.0014302873999999998,0.0765042688,-0.015619566520000003,0.0016620536000000012,-0.00274610086,0.0171299408,0.027570241399999997,-0.0324351032,0.014400368400000002,-0.0266594992,0.00887335696,-0.026742938,0.0477007382,0.0024398123999999992,-0.044114230000000004,-0.013509883200000001,0.0011760888000000025,-0.0394958904,0.0300058236,-0.02031434892,0.06725027959999999,0.00526739424,-0.0438080056,-0.0222875062,0.00046423867999999995,0.0312304808,-0.011609196919999999,-0.009431896799999999,-0.0743371988,-0.0592297504,-0.0451501638,0.0028045309399999997,-0.0231060508,0.0267769504,0.0351576856,0.0483086706,0.038439862000000005,0.008561049999999999,0.00847332144,0.0174759716,0.01478895488,-0.0367084756,-0.003849089000000001,0.05953916619999999,0.0433958172,-0.0313237344,0.09879590699999999,0.06741121039999999,0.007031321180000001,0.0328495458,-0.01371333004,-0.0280354592,0.0268853354,-0.009720692,-0.052738568,-0.0419604144,0.017847161879999997,0.0183191999,-0.0209070334,-0.00786278346,-0.006175628000000002,-0.021341008095999998,-0.0021071963999999993,0.0009158300000000003,-0.085749036,-0.0435275496,0.037339662,0.0016444979160000004,-0.013939327118,0.00881797288,0.0426462768,0.037822787,0.0011475407999999993,0.01573384912,0.0122583406,-0.0284100496,5.373020000000103e-05,-0.004081936639999999,-0.0210236054,-0.0095805339,0.0303349132,-0.0355832954,0.0170040818,-0.062017533799999996,0.02928371952,0.009000435400000003,-0.01162610496,-0.043409528999999995,0.00285102152,-0.034846063999999996,-0.011293345199999999,0.00852147924,0.0101758728,0.022859498800000003,-0.020305051,0.0460560364,0.009677020000000001,0.038733584,0.022901877599999995,0.0243939928,-0.055218686,-0.0322165106,-0.0194839578,-0.032210373,-0.0202481392,-0.0378237988,0.0327066728,0.00587258184,0.0012937077199999996,0.033531163600000004,0.06438347180000001,0.047337891800000004,0.00670301824,-0.034154194400000004,0.030447639,0.014998518879999999,0.0327501722,0.002139097799999998,-0.040802082,-0.0482360912,-0.027736616,-0.011246922080000001,0.0621580986,-0.02159697852,0.0300561842,0.0048182089600000005,-0.0121516058,-0.036622517,-0.0333530468,0.010604625199999999,-0.030315138,0.0011956547999999977,-0.0237356732,0.010215945580000002,-0.079984298,0.0196707204,-0.0185215432,0.0300505634,0.008574349451999998,-0.004832724599999999,-0.00738452352,-0.029049982199999998,-0.036828206,0.025512748,0.0388858638,-0.012271645000000001,-0.0110927152,-0.01052391584,-0.066821328,-0.00069097276,0.006213025799999998,-0.0024144679999999995,-0.00751069512,-0.0003830789199999996,0.07985621000000001,0.030953111999999998,0.042441343000000006,-0.014464289679999999,0.022819404,-0.0299352814,0.0238050328,-0.0496697932,-0.036073240199999995,-0.027422020999999998""
          ^}

In [None]:
# Save to Neo4j (Unstructured V0.1)

In [None]:
# Ppreprocessing function (Neo4j does not support entities starting with a number)
def sanitize_node_labels(kg):
    for node in kg.entities:
        # If node label starts with a number, prefix with 'N_'
        if node.label[0].isdigit():
            node.label="N_" + node.label
        print(node)
        print("\n")
    return kg

# Apply sanitization before visualization
kg_sanitized = sanitize_node_labels(kg)
graph_integrator.visualize_graph(knowledge_graph=kg_sanitized)

label='Project_approach' name='project approach' properties=EntityProperties(embeddings=array([ 2.00312084e-02,  1.86238598e-02, -1.54179206e-01, -4.01706382e-02,
        1.29347814e-02, -1.63441010e-02,  2.58935140e-02, -3.35554564e-02,
       -3.22125628e-02,  3.70242578e-02,  3.72991220e-02, -8.49881136e-03,
        7.70653704e-02,  4.46910258e-02, -2.01025742e-02, -2.22439560e-02,
        1.12706904e-02, -6.26680160e-02, -4.54972334e-02,  2.21353700e-02,
        2.44593980e-02, -3.70186102e-02,  2.88806856e-02, -1.62740032e-02,
        1.03475774e-01, -5.36636037e-03, -1.40473782e-02, -1.12636489e-02,
       -4.53472092e-03, -5.04384020e-03,  1.27913800e-02, -4.10469700e-02,
       -1.60941062e-03, -7.22077616e-04, -2.93573764e-02, -7.45512720e-02,
        3.39461332e-02,  2.63852712e-02, -3.57755408e-02, -5.29985280e-02,
       -3.86579160e-02, -1.17407898e-02, -1.16858118e-03, -5.49015006e-02,
        1.22921772e-02, -4.43401016e-02,  3.52799586e-02, -9.74585520e-03,
        3.08

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [None]:
sb = semantic_blocks[121:140]

In [None]:
sb.remove(sb[5])

In [None]:
# Continue to create graph

In [None]:
kg = await itext2kg_star.build_graph(
    sections=sb,
    ent_threshold=0.9,
    rel_threshold=0.8,
    existing_knowledge_graph=kg.model_copy(),
)

[2025-08-26 16:06:13] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 1
[2025-08-26 16:06:21] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 2
[2025-08-26 16:06:41] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 3
[2025-08-26 16:07:03] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 4
[2025-08-26 16:07:22] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Entity was matched --- [scrum agile software development:Scrum_Agile_Software_Development] --merged--> [scrum/agile software development:Scrum_Agile_software_development]
[2025-08-26 16:07:22] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Relation was matched --- [relationship_2__Insights_and_Fish_Bone_Diagrams] --merged --> [Relationship_between_Opportunity_Analysis_and_Fish_B

In [None]:
sb = semantic_blocks[141:160]

In [None]:
sb.remove(sb[12])

In [None]:
# Continue to create graph

In [None]:
kg = await itext2kg_star.build_graph(
    sections=sb,
    ent_threshold=0.9,
    rel_threshold=0.8,
    existing_knowledge_graph=kg.model_copy(),
)

[2025-08-26 16:20:09] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 1
[2025-08-26 16:20:25] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 2
[2025-08-26 16:20:50] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 3
[2025-08-26 16:21:04] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 4
[2025-08-26 16:21:14] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 5
[2025-08-26 16:21:19] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 6
[2025-08-26 16:21:42] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 7
[2025-08-26 16:21:54] [    INFO] [itext2kg.itext2kg.graph_matc

In [None]:
sb = semantic_blocks[161:180]

In [None]:
# Continue to create graph

In [None]:
kg = await itext2kg_star.build_graph(
    sections=sb,
    ent_threshold=0.9,
    rel_threshold=0.8,
    existing_knowledge_graph=kg.model_copy(),
)

[2025-08-26 16:28:22] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 1
[2025-08-26 16:28:38] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 2
[2025-08-26 16:29:00] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Entity was matched --- [scrum/agile:Agile_development] --merged--> [scrum/agile software development:Scrum_Agile_Software_Development]
[2025-08-26 16:29:00] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Entity was matched --- [stakeholder matrix:key_concepts] --merged--> [stakeholder matrix:Stakeholder_Matrix]
[2025-08-26 16:29:00] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Relation was matched --- [Relationship_5__Stakeholder_Matrix_and_Success_in_Agile_Project] --merged --> [Relationship__Stakeholder_Matrix_to_Project_Impact_and_Influence]
[2025-08-26 16:29:00] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Relation was 

In [None]:
sb = semantic_blocks[181:200]

In [None]:
sb.remove(sb[12])

In [None]:
# Continue to create graph

In [None]:
kg = await itext2kg_star.build_graph(
    sections=sb,
    ent_threshold=0.9,
    rel_threshold=0.8,
    existing_knowledge_graph=kg.model_copy(),
)

[2025-08-26 16:46:54] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 1
[2025-08-26 16:47:09] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 2
[2025-08-26 16:47:23] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 3
[2025-08-26 16:47:30] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 4
[2025-08-26 16:47:38] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 5
[2025-08-26 16:48:06] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 6
[2025-08-26 16:48:56] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Entity was matched --- [stakeholder engagement matrix:Stakeholder_Engagement_Matrix] --merged--> [stakeholderengagementm

In [None]:
sb = semantic_blocks[201:222]

In [None]:
sb.remove(sb[19])

In [None]:
# Continue to create graph

In [None]:
kg = await itext2kg_star.build_graph(
    sections=sb,
    ent_threshold=0.9,
    rel_threshold=0.8,
    existing_knowledge_graph=kg.model_copy(),
)

[2025-08-26 17:03:35] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 1
[2025-08-26 17:04:14] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 2
[2025-08-26 17:04:26] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 3
[2025-08-26 17:04:46] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 4
[2025-08-26 17:04:55] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 5
[2025-08-26 17:05:04] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 6
[2025-08-26 17:05:10] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 7
[2025-08-26 17:05:18] [    INFO] [itext2kg.itext2kg.graph_matc

In [None]:
# Save to Neo4j (Unstructured V0.2)

In [None]:
# Preprocessing function
def sanitize_node_labels(kg):
    for node in kg.entities:
        # If node label starts with a number, prefix with 'N_'
        if node.label[0].isdigit():
            node.label="N_" + node.label
        print(node)
        print("\n")
    return kg

# Apply sanitization before visualization
kg_sanitized = sanitize_node_labels(kg)
graph_integrator.visualize_graph(knowledge_graph=kg_sanitized)

label='Project_approach' name='project approach' properties=EntityProperties(embeddings=array([ 2.00312084e-02,  1.86238598e-02, -1.54179206e-01, -4.01706382e-02,
        1.29347814e-02, -1.63441010e-02,  2.58935140e-02, -3.35554564e-02,
       -3.22125628e-02,  3.70242578e-02,  3.72991220e-02, -8.49881136e-03,
        7.70653704e-02,  4.46910258e-02, -2.01025742e-02, -2.22439560e-02,
        1.12706904e-02, -6.26680160e-02, -4.54972334e-02,  2.21353700e-02,
        2.44593980e-02, -3.70186102e-02,  2.88806856e-02, -1.62740032e-02,
        1.03475774e-01, -5.36636037e-03, -1.40473782e-02, -1.12636489e-02,
       -4.53472092e-03, -5.04384020e-03,  1.27913800e-02, -4.10469700e-02,
       -1.60941062e-03, -7.22077616e-04, -2.93573764e-02, -7.45512720e-02,
        3.39461332e-02,  2.63852712e-02, -3.57755408e-02, -5.29985280e-02,
       -3.86579160e-02, -1.17407898e-02, -1.16858118e-03, -5.49015006e-02,
        1.22921772e-02, -4.43401016e-02,  3.52799586e-02, -9.74585520e-03,
        3.08

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



label='frequency' name='twice a week' properties=EntityProperties(embeddings=array([-5.83171000e-03,  2.66235356e-02, -1.91138094e-01, -3.27898112e-02,
        1.20154743e-02, -3.88019198e-02,  1.43131552e-02, -2.43655512e-02,
        1.81544708e-02, -4.18990820e-02, -2.49128508e-02,  2.60779722e-02,
        1.57822764e-02,  2.86135212e-02, -2.94753872e-02, -9.68494712e-03,
       -2.65395820e-02, -1.88610944e-02, -2.04827548e-02,  4.24140602e-02,
       -2.06525208e-02,  1.68638032e-02,  3.97482220e-03, -3.54495468e-02,
        6.70778680e-02,  7.36709108e-02,  1.01457185e-02,  9.33145400e-04,
       -9.86863268e-03,  2.69537264e-02, -1.26503088e-02,  6.09471644e-03,
        1.75061198e-02, -4.50820120e-02, -3.32991344e-03, -2.35612138e-02,
        5.04586980e-03, -1.50594570e-02,  5.86159940e-03,  7.04553840e-03,
        2.47052448e-02,  3.93341780e-03,  1.77562936e-02, -2.47694505e-02,
        3.37569976e-02, -5.49528586e-02,  1.00583128e-02,  5.69569760e-02,
        1.00839386e-02,