In [38]:
from typing import Dict, Any
from neo4j import GraphDatabase
import json
import glob
import ast

In [39]:
def store_element_summary_in_graph(tx, data: Dict[str, Any], doc_id: str, chunks_bounds: tuple):
    """
    Load the summarized graph data into Neo4j.

    Args:
        tx: Neo4j transaction object.
        data (Dict[str, Any]): Summarized nodes and relationships.
        doc_id (str): Document ID.
        chunks_bounds (tuple): Tuple containing the start and end positions
            of the text chunk in the original document.
    """
    
    # Creazione dei nodi
    for node in data["summarized_nodes"]:
        query_create_node = """
        CREATE (n:SummarizedNode {
            title: $title,
            summary: $summary,
            original_ids: $original_ids,
            type: $type,
            keywords: $keywords,
            doc_id : $doc_id,
            chunks_lower_bound: $chunks_lower_bound,
            chunks_upper_bound: $chunks_upper_bound
        })
        """
        tx.run(
            query_create_node,
            title=node.get("title"),
            summary=node.get("summary"),
            original_ids=node.get("original_ids"),
            type=node.get("type"),
            keywords=node.get("keywords"),
            doc_id=doc_id,
            chunks_lower_bound=chunks_bounds[0],
            chunks_upper_bound=chunks_bounds[1]
        )

    # Creazione delle relazioni
    for rel in data["summarized_relationships"]:
        query_create_rel = f"""
        MATCH (source:SummarizedNode {{title: $source_id}})
        MATCH (target:SummarizedNode {{title: $target_id}})
        CREATE (source)-[:RELATIONSHIP_TYPE {{
            type : $relation_type,
            weight: $weight,
            original_relationships: $original_rels
        }}]->(target)
        """
        tx.run(
            query_create_rel,
            source_id=rel["source"],
            target_id=rel["target"],
            relation_type=rel["relation_type"],
            weight=rel.get("weight", 1),  # default=1 if not provided
            original_rels=rel.get("original_relationships", [])
        )

In [40]:
NEO4J_URI = 'bolt://localhost:7690'
NEO4J_USER = 'neo4j'
NEO4J_PASSWORD = '12345678'

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))


In [41]:

doc_id = "0704.2547"
PATH_REGEX = f'../data/backup_extraction_nodes/{doc_id}/element_summary/*.json'

# Perform a regex to obtain all the files that match the pattern
files=glob.glob(PATH_REGEX)


In [42]:

for file in files:
    lim = int(file.split('/')[-1].split('.')[0])

    with open(file, 'r') as f:
        # Step 1: Load the string file into a dictionary
        element_summary = ast.literal_eval(f.read())

        with driver.session() as session:
                    # Step 7: Store the summary
                    session.execute_write(store_element_summary_in_graph, element_summary, doc_id, (lim,lim+25))

In [43]:
# Delete all nodes and relationships
# with driver.session() as session:
#     session.run("MATCH (n) DETACH DELETE n")
#     print("All nodes and relationships have been deleted.")