In [1]:
import os
from dotenv import load_dotenv

from pprint import pprint

from src.config import LLMConf, EmbedderConf, KnowledgeGraphConfig
from src.factory.embeddings import get_embeddings
from src.graph.knowledge_graph import KnowledgeGraph
from src.agents.graph_qa import GraphAgentResponder
env = load_dotenv('config.env')

from pgs.utils import get_configuration_from_env

from src.utils.logger import get_logger
logger = get_logger(__name__)

conf = get_configuration_from_env()

2025-04-13 10:23:06.900 
  command:

    streamlit run /Users/dylan.tartarini/Desktop/Dylan/knowledge-graphs/.venv/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]


In [2]:
kg_config = KnowledgeGraphConfig(
    uri=os.getenv("NEO4J_URI"),
    user=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
    index_name='vector'
)

embedder_conf = EmbedderConf(
    type=os.getenv("EMBEDDINGS_TYPE"),
    model=os.getenv("EMBEDDINGS_MODEL_NAME"),
)

model_conf=LLMConf(
    type=os.getenv("RE_MODEL_TYPE"),
    model=os.getenv("RE_MODEL_NAME"), 
    temperature=os.getenv("RE_MODEL_TEMPERATURE"), 
    deployment=os.getenv("RE_MODEL_DEPLOYMENT"),
    api_key=os.getenv("RE_API_KEY"),
    endpoint=os.getenv("RE_MODEL_ENDPOINT"),
    api_version=os.getenv("RE_MODEL_API_VERSION") or None
)

embeddings = get_embeddings(conf=embedder_conf)

kg = KnowledgeGraph(conf=kg_config,embeddings_model=embeddings)

## Cypher Chains
Here we use a method to answer using  only Cypher queries

In [15]:
responder = GraphAgentResponder(
    qa_llm_conf=conf.qa_model,
    cypher_llm_conf=conf.qa_model,
    graph=kg,
    # rephrase_llm_conf=conf.qa_model
)

2025-04-12 17:46:07,632 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-12 17:46:07,679 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'
2025-04-12 17:46:07,679 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-12 17:46:07,712 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'


In [11]:
responder.answer_with_cypher(query="Who works for Europe Direct?")

'Eva Hrncirova and Quentin Cortes work for Europe Direct. \n'

In [12]:
responder.answer_with_cypher(
    query="What legislation has been discussed that regards Apple?", 
    intermediate_steps=True
)

('The Digital Markets Act has been discussed regarding Apple. \n',
 [{'query': "MATCH (c:Chunk)-[:MENTIONS]->(l:Legislation) WHERE c.text CONTAINS 'Apple' RETURN l.id, l.name\n"},
  {'context': [{'l.id': 'Digital Markets Act',
     'l.name': 'Digital Markets Act'},
    {'l.id': 'Digital Markets Act', 'l.name': 'Digital Markets Act'}]}])

In [13]:
# rephraser not working as expected
responder_with_rephrase = GraphAgentResponder(
    qa_llm_conf=conf.qa_model,
    cypher_llm_conf=conf.qa_model,
    graph=kg,
    rephrase_llm_conf=conf.qa_model
)

2025-04-12 17:45:55,032 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-12 17:45:55,081 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'
2025-04-12 17:45:55,082 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-12 17:45:55,115 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'
2025-04-12 17:45:55,115 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-12 17:45:55,146 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'


In [14]:
responder_with_rephrase.answer_with_cypher(query="Who works for Europe Direct?")

2025-04-12 17:45:58,271 - src.agents.graph_qa - INFO - Rephrased Question: FIND Person WHERE Person.WORKS_FOR = 'Europe Direct' 



"I don't know the answer. \n"

In [8]:
responder_with_rephrase.answer_with_cypher(
    query="What legislation has been discussed that regards Apple?", 
    intermediate_steps=True
)

2025-04-06 18:53:42,630 - src.agents.graph_qa - INFO - Rephrased Question: FIND  Legislation  MENTIONS 'Apple'  



("I don't know the answer. \n",
 [{'query': "MATCH (l:Legislation)<-[:MENTIONS]-(c:Chunk) WHERE c.text CONTAINS 'Apple' RETURN l\n"},
  {'context': [{'l': {'community_leiden': 2,
      'closeness': 0.07395602614661552,
      'name': 'Digital Markets Act',
      'id': 'Digital Markets Act',
      'betweenness': 0.013308240354886411,
      'pagerank': 0.005754031618527281,
      'community_louvain': 1}},
    {'l': {'community_leiden': 2,
      'closeness': 0.07395602614661552,
      'name': 'Digital Markets Act',
      'id': 'Digital Markets Act',
      'betweenness': 0.013308240354886411,
      'pagerank': 0.005754031618527281,
      'community_louvain': 1}}]}])

## Vanilla RAG
Uses only vanilla RAG to answer the user's question.  
If `use_adjacent_chunks=True` will query the graph for additional context   
compared to the Chunks retrieved by the similarity search. Latency will be higher due to expanded context. 

In [5]:
responder = GraphAgentResponder(
    qa_llm_conf=conf.qa_model,
    cypher_llm_conf=conf.qa_model,
    graph=kg
    # rephrase_llm_conf=conf.qa_model
)

2025-04-09 11:52:09,281 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-09 11:52:09,332 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'
2025-04-09 11:52:09,333 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-09 11:52:09,370 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'


In [6]:
responder.answer_with_context("Who works for Europe Direct?")

"I don't know.  \n"

In [7]:
responder.answer_with_context(
    query="Who works for Europe Direct?", 
    use_adjacent_chunks=True
)

'The provided text does not say who works for Europe Direct. \n\n\n'

In [6]:
responder.answer_with_context(
    query="What legislation has been discussed that regards Apple?"
)

'The legislation discussed that regards Apple is the Digital Markets Act (DMA).  \n'

In [7]:
responder.answer_with_context(
    query="What legislation has been discussed that regards Apple?",
    use_adjacent_chunks=True
)

'The legislation discussed that regards Apple is the Digital Markets Act (DMA).  \n'

In [8]:
responder.answer_with_context("Who is Eva Hrncirova?")

'Eva Hrncirova is a press contact for Glenn Micallef, Commissioner for Intergenerational Fairness, Youth, Culture and Sport.  \n'

In [5]:
responder.answer_with_context("Who is Eva Hrncirova?", use_adjacent_chunks=True)

'Eva Hrncirova is a press contact for Discover EU.  \n'

In [6]:
responder.answer_with_context("Who is the Commissioner for Intergenerational Fairness?")

"I don't know. \n"

In [9]:
responder.answer_with_context(
    query="Who is the Commissioner for Intergenerational Fairness?",
    use_adjacent_chunks=True
)

'The provided text does not contain the answer to your question. \n'

In [8]:
responder.answer_with_context(query="Who is Glenn Micallef?")

"I don't know.  \n"

In [7]:
responder.answer_with_context(query="Who is Glenn Micallef?", use_adjacent_chunks=True)

'Glenn Micallef is the Commissioner for Intergenerational Fairness, Youth, Culture and Sport.  \n'

In [10]:
responder.answer_with_context("what is the Digital Markets Act?")

'The Digital Markets Act (DMA) aims to ensure contestable and fair markets in the digital sector. It regulates gatekeepers, which are large digital platforms that provide an important gateway between business users and consumers, whose position can grant them the power to create a bottleneck in the digital economy.  \n'

In [8]:
responder.answer_with_context("what is the Digital Markets Act?", use_adjacent_chunks=True)

'The Digital Markets Act (DMA) aims to ensure contestable and fair markets in the digital sector. It regulates gatekeepers, which are large digital platforms that provide an important gateway between business users and consumers, whose position can grant them the power to create a bottleneck in the digital economy.  \n'

## Community Reports  

Queries two vector indexes to get the user's answer out of an ensemble of contexts:
1. one made of a list of `CommunityReport`
2. one made of a list of `Chunk` from the same communities of the reports. 

If `use_adjacent_chunks=True` will query the graph for additional context 
compared to the Chunks retrieved by the similarity search. Latency will be higher due to expanded context. 

In [6]:
responder = GraphAgentResponder(
    qa_llm_conf=conf.qa_model,
    cypher_llm_conf=conf.qa_model,
    graph=kg
    # rephrase_llm_conf=conf.qa_model
)

2025-04-12 17:42:38,205 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-12 17:42:38,248 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'
2025-04-12 17:42:38,248 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-12 17:42:38,280 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'


In [4]:
responder.answer_with_community_reports(
    query="what is the Digital Markets Act?", 
    use_adjacent_chunks=False, 
    community_type="leiden"
)

2025-04-12 17:28:45,288 - src.agents.graph_qa - INFO - Retrieved 1 Community Reports
2025-04-12 17:28:45,458 - src.agents.graph_qa - INFO - Retrieved 4 Chunks for community: 4


'The Digital Markets Act requires Apple to comply with interoperability obligations.  \n'

In [5]:
responder.answer_with_community_reports(
    query="what is the Digital Markets Act?", 
    use_adjacent_chunks=True,
    community_type="leiden"
)

2025-04-12 17:28:55,890 - src.agents.graph_qa - INFO - Retrieved 1 Community Reports
2025-04-12 17:28:56,061 - src.agents.graph_qa - INFO - Retrieved 4 Chunks for community: 4


'The provided text does not contain information about what the Digital Markets Act is. \n\n\n'

In [6]:
responder.answer_with_community_reports(query="What is the European Commission telling Apple to do?")

2025-04-12 17:29:14,713 - src.agents.graph_qa - INFO - Retrieved 2 Community Reports
2025-04-12 17:29:14,894 - src.agents.graph_qa - INFO - Retrieved 4 Chunks for community: 4
2025-04-12 17:29:15,038 - src.agents.graph_qa - INFO - Retrieved 4 Chunks for community: 1


'The European Commission is requiring Apple to improve interoperability between its iOS and iPadOS operating systems and third-party apps and hardware.  \n'

In [7]:
responder.answer_with_community_reports(
    query="What is the European Commission telling Apple to do?", 
    use_adjacent_chunks=True,
    community_type="leiden"
)

2025-04-12 17:29:38,631 - src.agents.graph_qa - INFO - Retrieved 2 Community Reports
2025-04-12 17:29:38,941 - src.agents.graph_qa - INFO - Retrieved 4 Chunks for community: 4
2025-04-12 17:29:39,449 - src.agents.graph_qa - INFO - Retrieved 4 Chunks for community: 1


'The European Commission is requiring Apple to improve interoperability between its iOS and iPadOS operating systems and third-party apps and hardware.  \n'

In [8]:
responder.answer_with_community_reports(
    query="Who is the Commissioner for Intergenerational Fairness?", 
    use_adjacent_chunks=True,
    community_type="leiden"
)

No relevant docs were retrieved using the relevance score threshold 0.8
2025-04-12 17:43:13,631 - src.agents.graph_qa - INFO - Retrieved 0 Community Reports


"I don't know.  \n"

## Answer with Communities Subgraph

Answers after querying for communities:
* read the most relevant community report 
* fetch chunks belonging to the most relevant community (the one from the community report)
* follow the MENTIONS relationship of each Chunk and obtain a dictionary 
* fetch the community subgraph under the form of another dictionary 
* passes the dictionaries + the report to a reconciler agent to decide how to answer

In [3]:
responder = GraphAgentResponder(
    qa_llm_conf=conf.qa_model,
    cypher_llm_conf=conf.qa_model,
    graph=kg
    # rephrase_llm_conf=conf.qa_model
)

2025-04-12 20:36:38,604 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-12 20:36:38,712 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'
2025-04-12 20:36:38,712 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-12 20:36:38,743 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'


In [6]:
responder.answer_with_community_subgraph(query="What is the European Commission telling Apple to do?")

2025-04-12 20:37:36,389 - src.agents.graph_qa - INFO - Retrieved Community Reports of type leiden with community id: 4
2025-04-12 20:37:36,838 - src.agents.graph_qa - INFO - Retrieved 4 Chunks for community: 4
2025-04-12 20:37:36,931 - src.graph.graph_queries - INFO - Retrieved 0 entities for chunk 5
2025-04-12 20:37:37,000 - src.graph.graph_queries - INFO - Retrieved 5 entities for chunk 4
2025-04-12 20:37:37,128 - src.graph.graph_queries - INFO - Retrieved 10 entities for chunk 1
2025-04-12 20:37:37,199 - src.graph.graph_queries - INFO - Retrieved 0 entities for chunk 3


"The European Commission is requiring Apple to comply with the Digital Markets Act's interoperability obligation.  This means Apple must enable interoperability with iOS for third-party connected devices. \n"

In [7]:
responder.answer_with_community_subgraph(query="Who is the Commissioner for Intergenerational Fairness?")

2025-04-12 20:38:14,608 - src.agents.graph_qa - INFO - Retrieved Community Reports of type leiden with community id: 1
2025-04-12 20:38:14,925 - src.agents.graph_qa - INFO - Retrieved 4 Chunks for community: 1
2025-04-12 20:38:15,059 - src.graph.graph_queries - INFO - Retrieved 0 entities for chunk 7
2025-04-12 20:38:15,128 - src.graph.graph_queries - INFO - Retrieved 6 entities for chunk 6
2025-04-12 20:38:15,196 - src.graph.graph_queries - INFO - Retrieved 10 entities for chunk 1
2025-04-12 20:38:15,263 - src.graph.graph_queries - INFO - Retrieved 7 entities for chunk 2


"I don't know.  \n"

## Full Answer

In [3]:
responder = GraphAgentResponder(
    qa_llm_conf=conf.qa_model,
    cypher_llm_conf=conf.qa_model,
    graph=kg
    # rephrase_llm_conf=conf.qa_model
)

2025-04-13 10:23:15,336 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-13 10:23:15,437 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'
2025-04-13 10:23:15,437 - src.factory.llm - INFO - Fetching LLM model 'gemma2-9b-it'..
2025-04-13 10:23:15,468 - src.factory.llm - INFO - Initialized LLM of type: 'ModelType.GROQ'


In [4]:
responder.answer(query="What is Europe doing for remote areas?")

'Additional arrangements are offered to those living in remote areas, islands or outermost regions.  \n'

In [None]:
responder.answer(query="Who is the Commissioner for Intergenerational Fairness?")

"MATCH (c:Trade commissioner)-[:ENGAGED_WITH]->(country:Country) WHERE c.name = 'Commissioner for Intergenerational Fairness' RETURN c, country"
                ^}


'Valdis Dombrovskis is the Commissioner for Economy and Productivity.  \n'

In [6]:
responder.answer(query="Who is the Commissioner for Intergenerational Fairness?", use_adjacent_chunks=True)

"MATCH (c:Trade commissioner)-[:ENGAGED_WITH]->(country:Country) WHERE c.name = 'Commissioner for Intergenerational Fairness' RETURN c, country"
                ^}


'The provided text does not contain the answer to the question.  \n'

In [7]:
responder.answer(query="Who works for Europe Direct?", use_adjacent_chunks=True)

'Eva Hrncirova and Quentin Cortes \n'

In [8]:
responder.answer(query="Who is Eva Hrncirova?", use_adjacent_chunks=True)

'Eva Hrncirova is a person who works at the European Commission.  \n'

In [5]:
responder.answer(query="What is the European Commission telling Apple to do?")

"The European Commission is telling Apple to take measures to comply with the Digital Markets Act's interoperability obligation.  \n"