In [None]:
import os
from dotenv import load_dotenv
from pprint import pprint

from src.config import Source, ChunkerConf, LLMConf, EmbedderConf, KnowledgeGraphConfig

from src.graph.knowledge_graph import KnowledgeGraph

from src.ingestion.local_ingestor import LocalIngestor
from src.ingestion.chunker import Chunker
from src.ingestion.cleaner import Cleaner
from src.ingestion.embedder import ChunkEmbedder
from src.ingestion.graph_miner import GraphMiner

env = load_dotenv('config.env')

### Configuration


In [None]:
source = Source(folder='source_folder')

kg_config = KnowledgeGraphConfig(
    uri=os.getenv("NEO4J_URI"),
    user=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
    index_name="vectors"
)

chunker_conf = ChunkerConf(
    type="recursive", 
    chunk_size=1000, 
    chunk_overlap=100
)

llm_conf = LLMConf(
    type="ollama",
    model="llama3.2:latest", 
    temperature=0.0, 
)

embedder_conf = EmbedderConf(
    type="ollama",
    model="mxbai-embed-large",
)

### Press Releases
For demonstration purposes, here we ingest into the knowledge graph a couple of EU Commission Press Release in `.pdf` format

### Load Documents from local folder

In [None]:
ingestor = LocalIngestor(source=source)

docs = ingestor.batch_ingest()

docs

In [None]:
print(docs[0].source)

### Clean 

In [None]:
cleaner = Cleaner()

docs = cleaner.clean_documents(docs)

pprint(docs[0].source)

### Chunk 

In [None]:
chunker = Chunker(conf=chunker_conf)

docs = chunker.chunk_documents(docs)

for i in range(len(docs)):
    print(f"Number of chunks in doc {i}: {len(docs[i].chunks)}")

### Embed

In [None]:
embedder = ChunkEmbedder(conf=embedder_conf)

docs = embedder.embed_documents_chunks(docs)

### Extract Nodes and Relationships from Chunk
Here, we use the `GraphMiner` class to extract a graph of relationships and entities from each chunk of the press releases. Say we don't have any experience in this domain, so that we leave the `Ontology` class empty. 

Since we are using a local Llama version, it might take a while.. 

In [None]:
graph_miner = GraphMiner(
    conf=llm_conf, 
    ontology=None
)

docs = graph_miner.mine_graph_from_docs(docs=docs)

In [None]:
docs[0].chunks[0].nodes

In [None]:
docs[0].chunks[0].relationships

### Knowledge Graph Interaction

In [None]:
knowledge_graph = KnowledgeGraph(
    conf=kg_config, 
    embeddings_model=embedder.embeddings
)

In [None]:
knowledge_graph._driver.verify_connectivity()

knowledge_graph._driver.verify_authentication()

In [None]:
knowledge_graph.add_documents(docs)

In [None]:
knowledge_graph.index_name

In [None]:
knowledge_graph.number_of_labels

In [None]:
knowledge_graph.number_of_nodes

In [None]:
knowledge_graph.number_of_relationships

### Results in Neo4j Aura

Connecting to Neo4j Aura confirms visually what we already know: a Knowledge Graph made of Documents, Chunks and the entities + relationships mentioned in those Chunks has been created and it's ready to be queried either via **vector search** or via **graph search**, or even employing a **hybrid approach**.

![img](../assets/graph_from_press_releases.png)