In [1]:
import os
from dotenv import load_dotenv
from pprint import pprint

from src.config import LLMConf, KnowledgeGraphConfig, EmbedderConf
from src.graph.graph_model import Ontology
from src.graph.knowledge_graph import KnowledgeGraph
from src.ingestion.embedder import ChunkEmbedder
from src.ingestion.graph_miner import GraphMiner
from src.schema import Chunk, ProcessedDocument

env = load_dotenv('config.env')
env

True

In [2]:
llm_conf = LLMConf(
    type="ollama",
    model="llama3.2:latest", 
    temperature=0.0, 
)

embedder_conf = EmbedderConf(
    type="ollama",
    model="mxbai-embed-large",
)

kg_config = KnowledgeGraphConfig(
    uri=os.getenv("NEO4J_URI"),
    user=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
    index_name="vector"
)


ontology = Ontology(
    allowed_labels=["Person", "City", "Country", "Region"], 
    allowed_relations=["LIVES_IN", "BORN_IN"]
)

## Preparation
Here, we assume to have already ingested some document into a `ProcessedDocument` format, had it chunked and waiting to be embedded

In [3]:
text= """ 
    Meet Marco Rossi, a proud native of Rome, Italy. With a passion for pasta and a love for football, 
    Marco embodies the vibrant spirit of his homeland. 
    Growing up amidst the ancient ruins and bustling piazzas of Rome, Marco developed 
    a deep appreciation for his city's rich history and cultural heritage. 
    From the iconic Colosseum to the majestic Vatican City, he has explored every corner of the Eternal City, 
    finding beauty in its timeless landmarks and hidden gems. 
    Italy, with its delectable cuisine and stunning landscapes, has always been Marco's playground. 
    From the rolling hills of Tuscany to the sparkling waters of the Amalfi Coast,
    he has embraced the diversity and beauty of his country, taking pride in its traditions and way of life.
"""

chunks = [
    "Meet Marco Rossi, a proud native of Rome, Italy. With a passion for pasta and a love for football, Marco embodies the vibrant spirit of his homeland. Growing up amidst the ancient ruins and bustling piazzas of Rome, Marco developed a deep appreciation for his city's rich history and cultural heritage.",
    "From the iconic Colosseum to the majestic Vatican City, he has explored every corner of the Eternal City, finding beauty in its timeless landmarks and hidden gems. Italy, with its delectable cuisine and stunning landscapes, has always been Marco's playground. From the rolling hills of Tuscany to the sparkling waters of the Amalfi Coast, he has embraced the diversity and beauty of his country, taking pride in its traditions and way of life."
]

doc = ProcessedDocument(
    filename="test.pdf", 
    source=text,
    chunks=[
        Chunk(
            chunk_id=1, 
            text=chunks[0], 
            chunk_size=500, 
            chunk_overlap=50,
            embeddings_model='mxbai-embed-large'
        ),
        Chunk(
            chunk_id=2, 
            text=chunks[1], 
            chunk_size=500, 
            chunk_overlap=50,
            embeddings_model='mxbai-embed-large'
        )
    ]
)

In [4]:
# embeddings

embedder = ChunkEmbedder(conf=embedder_conf)

doc = embedder.embed_document_chunks(doc)

In [5]:
# extract the knowledge graph

graph_miner = GraphMiner(conf=llm_conf, ontology=ontology)

doc= graph_miner.mine_graph_from_doc_chunks(doc)

In [6]:
doc.chunks[0].nodes

[Node(id='Marco_Rossi', type='Person', properties={'city': 'Rome', 'country': 'Italy', 'name': 'Marco_Rossi'}),
 Node(id='Rome', type='City', properties={'country': 'Italy', 'name': 'Rome'}),
 Node(id='Italy', type='Country', properties={'name': 'Italy'})]

In [7]:
doc.chunks[0].relationships

[Relationship(source=Node(id='Marco_Rossi', type='Person', properties={'city': 'Rome', 'country': 'Italy', 'name': 'Marco_Rossi'}), target=Node(id='Rome', type='City', properties={'country': 'Italy', 'name': 'Rome'}), type='LIVES_IN', properties={'city': 'Rome'}),
 Relationship(source=Node(id='Rome', type='City', properties={'country': 'Italy', 'name': 'Rome'}), target=Node(id='Italy', type='Country', properties={'name': 'Italy'}), type='BORN_IN', properties={})]

## Interaction with Neo4j (Aura) 

In [8]:
knowledge_graph = KnowledgeGraph(
    conf=kg_config, 
    embeddings_model=embedder.embeddings
)

knowledge_graph._driver.verify_connectivity()

knowledge_graph._driver.verify_authentication()

True

In [9]:
knowledge_graph.number_of_labels

2

In [10]:
knowledge_graph.number_of_relationships

0

In [11]:
knowledge_graph.index_name

'vectors'

In [12]:
knowledge_graph.store_chunks_for_doc(
    doc=doc, 
    embeddings_model=embedder.embeddings
)