In [1]:
from dotenv import load_dotenv
import os
from langchain_community.graphs import Neo4jGraph

In [2]:
load_dotenv(".env")

NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")


#### <font color="yellow"> Set up (project the database in a graph)

	•	The graph projection (gds.graph.project) creates an in-memory graph representation used for fast computation.
	•	Algorithms like Leiden run on that in-memory graph.

##### Identify there exist a Graph fot the given database

In [3]:
kg_db_name = "t20"
# kg_db_name = "t30documentsgraph"

In [4]:
kg = Neo4jGraph(
    url=NEO4J_URI, 
    username=NEO4J_USERNAME, 
    password=NEO4J_PASSWORD, 
    database=kg_db_name
)

  kg = Neo4jGraph(


#### Detect Hierarchical communities

In [5]:
from src.hierarchical_community_detector import HierarchicalCommunityDetector

In [6]:
hierarchical_community_detector = HierarchicalCommunityDetector(
    kg=kg,
    kg_db_name=kg_db_name)

In [7]:
hierarchical_community_detector.detect_hierarchical_communities()


▶ Base graph 't20_projection' already projected.

  Skipping Leiden on t20_projection: C3_CommunityId already exists.

▶ Graph 'C3_graph' already projected.

▶ Running Leiden on C3_graph, writing C2_CommunityId...

✅ Full C3 → C0 community hierarchy generated.


#### Old code 

In [5]:
cypher = """
CALL gds.graph.list()
"""

In [6]:
graphs =  kg.query(query=cypher)
print(len(graphs))

3


In [7]:
graphs_db = [graph["database"] for graph in graphs]

In [8]:
# kg_db_name = "t30documentsgraph"
if kg_db_name in graphs_db:
    print("There exist a Graph associated to the database")
    
else:
    print("There is not a Graph associated to the database")
    print("Creating Graph in memory")
    labels = kg.query(query=
        """
        CALL db.labels() YIELD label
        RETURN collect(label) AS allLabels;
        """
    )[0]["allLabels"]
    
    relationships = kg.query(query=
        """
        CALL db.relationshipTypes() YIELD relationshipType
        RETURN collect(relationshipType) AS allTypes;
        """
    )[0]["allTypes"]

    project_query = f"""
    CALL gds.graph.project(
    '{kg_db_name}',
    {labels},
    {relationships}
    )
    """

    # project the database in a graph 
    kg.query(query=project_query)

    print("Graph created succesfully")


There is not a Graph associated to the database
Creating Graph in memory
Graph created succesfully


In [None]:
# we can drop a graph in the following way 
# CALL gds.graph.drop('myGraphProjection');
# it should be executed in the database that contain the graph projected in memory

#### <font color="yellow"> Identify communities and write back resutl into the database

In [9]:
# Run Leiben algorithm to identify communities in the in memory graph, and write 
# to each node the property community id 

# kg_db_name = "t30documentsgraph"

leiben_property_cypher = f"""
    CALL gds.alpha.leiden.write(
    '{kg_db_name}',
    {{writeProperty: 'communityId' }}
    )
    YIELD communityCount, modularity;
"""

kg.query(query=leiben_property_cypher)



[{'communityCount': 63, 'modularity': 0.6137139162605811}]

In [10]:
community_id_cypher = """
MATCH (n)
WHERE exists(n.communityId)
RETURN n.communityId AS communityId, count(*) AS nodesCount
ORDER BY communityId
"""

community_ids = [com['communityId'] for com in kg.query(query=community_id_cypher)]
print(community_ids)
print(len(community_ids))




[0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 60, 62, 63, 64, 65, 66, 67, 68, 69, 72, 73, 74, 76, 79]
63


#### Get Nodes in Each Community

In [72]:
community_nodes_cypher = """
MATCH (n)
WHERE exists(n.communityId)
RETURN n.communityId AS communityId, collect(id(n)) AS nodeIds
"""
community_nodes = {res['communityId']: res['nodeIds'] for res in kg.query(community_nodes_cypher)}



In [73]:
print(community_nodes)

{22: [0, 1, 2, 3, 4, 5, 43, 86, 87, 88, 89, 90, 91, 92, 93, 140, 141, 142, 145], 16: [6, 7, 8, 9, 10, 11, 12], 9: [13, 14, 15, 16, 17, 18, 21, 101, 102, 103, 105, 106, 107, 108], 20: [19], 46: [20], 0: [22, 23, 24, 25, 27], 8: [26], 14: [28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 48], 15: [31, 32, 33, 46, 47, 49, 50, 51, 52, 75, 76, 77, 78, 79, 80, 81, 132, 133, 134, 135, 136, 137, 138, 139], 37: [45], 17: [53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 82, 83, 84], 29: [63, 64, 65, 66, 69, 70], 2: [67], 35: [68], 10: [71, 72, 73, 74], 31: [85], 41: [94, 95, 96, 97, 98, 99, 100], 33: [104], 40: [109, 110, 111, 112, 116, 117, 118], 43: [113, 114, 115], 1: [119, 123, 124, 125, 126, 127, 128, 129, 130, 131], 21: [120, 121, 122], 38: [143], 3: [144], 4: [146, 147, 148, 149, 150, 151, 155, 158, 160, 161, 162, 163, 165], 12: [152], 5: [153], 28: [154], 7: [156], 27: [157], 19: [159], 13: [164], 6: [166]}


In [None]:
# for each node community get the ranked edge 

### Test CommunitySummaries 

In [1]:
from dotenv import load_dotenv
import os
from langchain_community.graphs import Neo4jGraph

In [2]:
load_dotenv(".env")

NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")


In [3]:
from src.utils.community_summnaries_utils import prioritize_edges, format_kg_output

In [4]:
kg_db_name = "t20"
kg = Neo4jGraph(
    url=NEO4J_URI, 
    username=NEO4J_USERNAME, 
    password=NEO4J_PASSWORD, 
    database=kg_db_name
)

  kg = Neo4jGraph(


In [5]:
from src.community_summaries import create_leaf_level_community_summaries

In [6]:
create_leaf_level_community_summaries(
    kg=kg,
    kg_db_name=kg_db_name
)


▶ Summarizing C3 Community 6...


CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Unknown function 'degree' (line 5, column 10 (offset: 131))
"WITH `r` AS `r`, `source` AS `source`, `target` AS `target`, (`degree`((`source`))) + (`degree`((`target`))) AS `combined_degree`"
               ^}

#### old version of the code 

In [6]:
kg_data = prioritize_edges(kg, community_id=12)
entities_str, relationships_str = format_kg_output(kg_data)

# Print or write to file
print(entities_str)
print()
print(relationships_str)



Entities
id,entity,description
0,Paper,Prompt Design And Engineering
2,Method,Chain-Of-Thought
5,Topic,Tools For Prompt Engineers
4,Topic,Principles Behind Building Llm-Based Agents
3,Method,Reflection
1,Topic,Core Concepts
101,Topic,Complex Reasoning Tasks
97,Method,Multi-Agent Systems, Provide more comprehensive solutions by integrating collective intelligence of multiple agents.
103,Topic,Multi-Agent Llms

Relationships
id,source_id,target_id,description
1,0,2,USES_METHOD
4,0,5,DISCUSSES
3,0,4,DISCUSSES
2,0,3,USES_METHOD
0,0,1,DISCUSSES
152,2,101,RELATED_TO
157,2,97,RELATED_TO
153,97,103,RELATED_TO


In [7]:
community_summary = create_communities_to_kg(
    kg=kg,
    kg_db_name=kg_db_name
)



There exist a Graph associated to the database
Entities
id,entity,description
0,Paper,Prompt Design And Engineering
2,Method,Chain-Of-Thought
5,Topic,Tools For Prompt Engineers
4,Topic,Principles Behind Building Llm-Based Agents
3,Method,Reflection
1,Topic,Core Concepts
101,Topic,Complex Reasoning Tasks
97,Method,Multi-Agent Systems, Provide more comprehensive solutions by integrating collective intelligence of multiple agents.
103,Topic,Multi-Agent Llms

Relationships
id,source_id,target_id,description
1,0,2,USES_METHOD
4,0,5,DISCUSSES
3,0,4,DISCUSSES
2,0,3,USES_METHOD
0,0,1,DISCUSSES
152,2,101,RELATED_TO
157,2,97,RELATED_TO
153,97,103,RELATED_TO


In [None]:
import json
import textwrap
from pprint import pprint
from IPython.display import Markdown, display
community_summary = create_communities_to_kg(
    kg=kg,
    kg_db_name=kg_db_name
)

response_text = community_summary.content
response_dict = json.loads(response_text)

wrapper = textwrap.TextWrapper(width=80)

print("Title:\n", response_dict["title"], "\n\n")

print("Summary:")
print(wrapper.fill(response_dict["summary"]), "\n\n")

print("Rating:")
print(response_dict["rating"], "\n\n")

print("Rating Explanation:")
print(wrapper.fill(response_dict["rating explanation"]), "\n\n")

print("Findings:")
pprint(response_dict["findings"], width=80)

Title:
 Prompt Design And Engineering Paper and Its Methodological & Topical Network 


Summary:
The community is anchored by the paper “Prompt Design And Engineering,” which
employs the Chain-Of-Thought and Reflection methods to discuss Core Concepts,
Tools For Prompt Engineers, and Principles Behind Building LLM-Based Agents.
Chain-Of-Thought further relates to Complex Reasoning Tasks and Multi-Agent
Systems, and Multi-Agent Systems in turn connects to Multi-Agent LLMs, forming
an integrated network of reasoning techniques and advanced agent architectures
[Data: Entities (0); Relationships (1,2,3,4,152,157,153)] 


Rating:
7.5 


Rating Explanation:
The impact severity rating is high due to the paper’s central role in advancing
both fundamental prompt-engineering methods and their extensions into complex
reasoning and multi-agent LLM frameworks. 


Findings:
[{'explanation': 'The entity “Prompt Design And Engineering” serves as the hub '
                 'of this community, deploying