In [None]:
from itext2kg import itext2kg_star
from langchain_ollama import ChatOllama, OllamaEmbeddings

In [None]:
import time
from datetime import datetime

In [None]:
# Ollama Serve
# Set up connection using langchain

In [None]:
llm = ChatOllama(
    model="llama3.2:1b",
    temperature=0,
)

In [None]:
llm_text = ChatOllama(
    model="gemma2:2b",
    temperature=0,
)
embeddings = OllamaEmbeddings(
    model="nomic-embed-text:latest",
)

In [None]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader(f"./data/input/2020-Scrum-Guide-US.pdf")
pages = loader.load_and_split()

In [None]:
from itext2kg.documents_distiller import DocumentsDistiller, Article
document_distiller = DocumentsDistiller(llm_model = llm)

In [None]:
# RUN llama3.2:1.1b
# Destill text

In [None]:
# Record start time
start_time = time.time()
start_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Process started at: {start_timestamp}")

IE_query = '''
# DIRECTIVES :
- Act like an experienced information extractor.
- You have a document describing the Scrum software development process.
- If you do not find the right information, keep its place empty.
'''
# we have replaced the curly braces with square brackets to avoid the error in the query
distilled_text = await document_distiller.distill(documents=[page.page_content.replace("{", '[').replace("}", "]") for page in pages], IE_query=IE_query, output_data_structure=Article)

end_time = time.time()
end_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
elapsed_time = end_time - start_time

print(f"Process finished at: {end_timestamp}")
print(f"Time elapsed: {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)")

Process started at: 2025-08-20 09:50:24
Process finished at: 2025-08-20 09:52:28
Time elapsed: 123.76 seconds (2.06 minutes)


In [None]:
distilled_text

Article(title="Scrum Guide Purpose of the Scrum Guide Purpose of the Scrum Guide Definition of Done Scrum Software Development Process Scrum Software Development Process Scrum Software Development Process Scrum Master's Role and Responsibilities Scrum Software Development Process Scrum Software Development Process Scrum Software Development Process Scrum Software Development Process # Context: 11 Definition of Done Scrum Software Development Process", authors=[Author(name='Ken Schwaber', affiliation='Scrum.org'), Author(name='Jeff Sutherland', affiliation='Scrum.org'), Author(name='Ken Schwaber', affiliation='Scrum Founder'), Author(name='Jeff Sutherland', affiliation='Scrum Co-Founder'), Author(name='Scrum Guide', affiliation='Scrum.org'), Author(name='Unknown', affiliation=''), Author(name='Scrum Team', affiliation='Scrum Team'), Author(name='Stakeholders', affiliation='Stakeholders'), Author(name='Scrum Team', affiliation='5 Scrum Team'), Author(name='Product Owner', affiliation='5 

In [None]:
semantic_blocks = [f"{key} - {value}".replace("{", "[").replace("}", "]")
                     for key, value in distilled_text.model_dump().items()
                     if value != [] and value != "" and value is not None]

In [None]:
# Run gemma2:2b
# Construct graph

In [None]:
import asyncio
from itext2kg import iText2KG_Star
from itext2kg.logging_config import setup_logging, get_logger

# Initialize iText2KG_Star with the llm model and embeddings model
itext2kg_star = iText2KG_Star(llm_model=llm_text, embeddings_model=embeddings)

# Your text sections (can be facts from document distiller or raw text)
sections=semantic_blocks

logger.info("Starting knowledge graph construction with iText2KG_Star...")

# Build the knowledge graph - entities are automatically derived from relationships
kg = await itext2kg_star.build_graph(
    sections=sections,
    ent_threshold=0.8,      # Higher threshold for more distinct entities
    rel_threshold=0.7,      # Threshold for relationship merging
    observation_date="2025-01-15"  # Optional: add temporal context
)

logger.info(f"Knowledge graph completed! Entities: {len(kg.entities)}, Relationships: {len(kg.relationships)}")

[2025-08-20 09:56:36] [    INFO] [itext2kg.__main__] Starting knowledge graph construction with iText2KG_Star...
[2025-08-20 09:56:36] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 1
[2025-08-20 09:57:06] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 2
[2025-08-20 09:58:01] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Entity was matched --- [scrum guide:authors] --merged--> [scrum guide:title]
[2025-08-20 09:58:01] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Entity was matched --- [scrum software development process:affiliation] --merged--> [scrum software development process:Scrum_Master_s_Role_and_Responsibilities]
[2025-08-20 09:58:01] [    INFO] [itext2kg.itext2kg.itext2kg_star] ------- Extracting Relations and Deriving Entities from Document 3
[2025-08-20 09:58:36] [    INFO] [itext2kg.itext2kg.graph_matching.matcher] Entity was matched

In [None]:
kg

  frac_strs, _, exp_strs = zip(*(s.partition('e') for s in strs))


KnowledgeGraph(entities=[Entity(name=scrum founder, label=affiliation, properties=embeddings=array([ 6.74313120e-03,  4.99462040e-02, -1.72024868e-01, -2.53042780e-03,
        3.60448368e-02, -1.61948824e-02,  7.83391560e-03,  9.71247880e-03,
        1.73319120e-03, -1.10868506e-02, -5.38109972e-02,  2.85127998e-02,
        5.85486180e-02,  3.51381454e-02,  8.07583524e-03, -1.92313200e-02,
       -2.73865800e-03, -2.48687404e-02, -1.77498841e-02,  7.53878980e-02,
       -4.77512284e-02, -1.49457920e-03, -1.44615836e-02, -1.64189876e-03,
        1.44615716e-01, -2.40232206e-02, -1.39157464e-02, -4.89436210e-02,
        1.45671072e-02, -1.51803052e-02,  5.63431588e-02,  4.28511776e-02,
        1.89251439e-02, -1.28549106e-02,  2.58690648e-02, -1.58900797e-02,
        2.02771189e-02,  7.70809560e-03, -1.36530778e-02, -9.13309660e-03,
       -4.33294548e-02, -1.51957268e-02, -3.26094156e-02, -8.59676390e-02,
        1.84562366e-02, -1.72669918e-02,  1.84802286e-02, -2.37657650e-03,
       