In [1]:
import json
# from langchain_community.graphs import Neo4jGraph
from langchain_neo4j import Neo4jGraph

# --- Connection Details ---
# Replace with your Neo4j database details
NEO4J_URL = "bolt://127.0.0.1:7687" # Default URL for local Neo4j
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "12345678" # The password you set in Step 1
NEO4J_DATABASE = "callsanalytics" # The database name you created

# Initialize the connection to Neo4j
try:
    graph = Neo4jGraph(
        url=NEO4J_URL,
        username=NEO4J_USERNAME,
        password=NEO4J_PASSWORD,
        database=NEO4J_DATABASE
    )
    print("Successfully connected to Neo4j.")
except Exception as e:
    print(f"Failed to connect to Neo4j: {e}")
    exit()

Successfully connected to Neo4j.


In [10]:
def prepare_entities_for_ingestion(entities: list) -> list:
    prepared_entities = []
    for entity in entities:
        new_entity = entity.copy()
        if 'contact_details' in new_entity and isinstance(new_entity['contact_details'], dict):
            contact_details = new_entity.pop('contact_details')
            for key, value in contact_details.items():
                new_entity[key] = value
        prepared_entities.append(new_entity)
    return prepared_entities

# --- UPGRADED INGESTION LOGIC ---
def ingest_call_data(call_data: dict):
    # 1. Ingest Entities (Nodes) - No changes here
    entities_query = """
    UNWIND $entities AS entity
    MERGE (n {name: entity.name})
    SET n += entity, n:Person, n:Entity
    """
    prepared_entities = prepare_entities_for_ingestion(call_data['entities'])
    graph.query(entities_query, params={'entities': prepared_entities})
    print(f"Ingested {len(prepared_entities)} entities.")

    # 2. Ingest Static Relationships (e.g., WORKS_FOR) - No changes here
    for rel in call_data['relationships']:
        rel_type = "".join(c for c in rel['type'] if c.isalnum() or c == '_').upper()
        if not rel_type: continue
        safe_relationships_query = f"""
        MATCH (source {{name: $source_name}})
        MATCH (target {{name: $target_name}})
        MERGE (source)-[r:{rel_type}]->(target)
        """
        graph.query(safe_relationships_query, params={
            'source_name': rel['source'],
            'target_name': rel['target']
        })
    print(f"Ingested {len(call_data['relationships'])} static relationships.")

    # 3. Create the Central Call Node
    call_node_query = """
    // MERGE helps prevent creating duplicate Call nodes if script is run multiple times
    MERGE (c:Call {call_id: $call_id}) 
    SET c += $summary
    WITH c
    UNWIND $participants AS participant_name
    MATCH (p {name: participant_name})
    MERGE (p)-[:PARTICIPATED_IN]->(c)
    RETURN id(c) AS call_node_id
    """
    # Create a unique ID for the call to prevent duplicates
    # A simple way is to hash the content or use a timestamp + agent name
    import hashlib
    call_id = hashlib.sha256(json.dumps(call_data).encode()).hexdigest()
    
    participant_names = [entity['name'] for entity in call_data['entities']]
    result = graph.query(call_node_query, params={
        'call_id': call_id,
        'summary': call_data['call_summary'],
        'participants': participant_names
    })
    call_node_id = result[0]['call_node_id']
    print(f"Created/updated Call node with ID: {call_id}")

    # 4. **NEW**: Ingest Events as their own Nodes and create dynamic relationships
    for event in call_data['events']:
        event_type = event.get('type')
        speaker_name = event.get('speaker_name')
        
        # We only want to create event nodes for the customer's actions
        if not speaker_name or event.get('speaker_role') == 'Agent':
            continue

        # Create a unique text for the event node to merge on
        event_text = event.get('text', '')
        
        # Map our event types to Graph relationship types
        # This makes our graph model explicit and clean
        relationship_map = {
            "OBJECTION": "HAD_OBJECTION",
            "PAIN_POINT": "MENTIONED_PAIN_POINT",
            "BUYING_SIGNAL": "SHOWED_BUYING_SIGNAL"
        }

        rel_type = relationship_map.get(event_type)

        # If the event is one we want to model explicitly in the graph
        if rel_type:
            event_node_query = f"""
            // Find the Call node we just created
            MATCH (c:Call) WHERE id(c) = $call_node_id
            // Find the person who spoke
            MATCH (p:Person {{name: $speaker_name}})
            // Create a node for the event itself
            MERGE (e:{event_type} {{text: $event_text}})
            // Connect the person to the event, and the event to the call
            MERGE (p)-[r1:{rel_type}]->(e)
            MERGE (e)-[r2:OCCURRED_IN_CALL]->(c)
            """
            graph.query(event_node_query, params={
                "call_node_id": call_node_id,
                "speaker_name": speaker_name,
                "event_text": event_text
            })
    print("Ingested key events as distinct nodes and relationships.")

loop insert (altogether)

In [12]:
files = ['updated_entities_relations_call1.json', 'updated_entities_relations_call2.json', 'updated_entities_relations_call3.json', 'updated_entities_relations_call4.json']

print("Clearing existing graph for a fresh start...")
graph.query("MATCH (n) DETACH DELETE n")

for file_name in files:
    try:
        with open(file_name, 'r') as f:
            print(f"\n--- Ingesting {file_name} ---")
            data = json.load(f)
            ingest_call_data(data)
    except FileNotFoundError:
        print(f"Error: {file_name} not found. Skipping.")
    except Exception as e:
        print(f"An error occurred with {file_name}: {e}")

print("\n--- Ingestion of all files complete ---")
print("Open Neo4j Browser and run 'MATCH (n) RETURN n' to see the nodes.")
print("Or run 'MATCH p = ()-[]->() RETURN p LIMIT 25' to see the graph structure.")

Clearing existing graph for a fresh start...

--- Ingesting updated_entities_relations_call1.json ---
Ingested 3 entities.
Ingested 3 static relationships.




Created/updated Call node with ID: 7cabae53a8f8ce0dd163817f4e5c2284e7b5c82ee4b136321e1d6b7c3d9354ca




Ingested key events as distinct nodes and relationships.

--- Ingesting updated_entities_relations_call2.json ---
Ingested 3 entities.
Ingested 3 static relationships.
Created/updated Call node with ID: 7f6bf3284fdce0e77ace268cee73e054b6ca8ca7174aba9c2d19f533cde11538




Ingested key events as distinct nodes and relationships.

--- Ingesting updated_entities_relations_call3.json ---
Ingested 3 entities.
Ingested 3 static relationships.
Created/updated Call node with ID: 6b40fb3fcb86415428b9a845737671a142b0b955099ddca4aa55be206fbf8814
Ingested key events as distinct nodes and relationships.

--- Ingesting updated_entities_relations_call4.json ---
Ingested 2 entities.
Ingested 1 static relationships.




Created/updated Call node with ID: 518bdbac28fc7bcfee0a434a1dec49d664685504212dc8750b9bc8c1b3a7a614
Ingested key events as distinct nodes and relationships.

--- Ingestion of all files complete ---
Open Neo4j Browser and run 'MATCH (n) RETURN n' to see the nodes.
Or run 'MATCH p = ()-[]->() RETURN p LIMIT 25' to see the graph structure.


1 by 1 insert

In [11]:
try:
    with open('updated_entities_relations_call4.json', 'r') as f:
        data = json.load(f)
        
    # Clear the entire graph before ingesting.
    # This is useful for testing to ensure you start fresh each time.
    # **COMMENT THIS OUT IN PRODUCTION** so you don't delete your data.
    print("Clearing existing graph...")
    graph.query("MATCH (n) DETACH DELETE n")

    # Ingest the loaded data
    ingest_call_data(data)
    
    print("\n--- Ingestion Complete ---")
    print("Open Neo4j Browser to view your graph.")
    print("Run this Cypher query to see everything: MATCH (n) RETURN n")

except FileNotFoundError:
    print("Error: 'output_call_1.json' not found. Make sure the JSON file is in the same directory.")
except Exception as e:
    print(f"An error occurred: {e}")

Clearing existing graph...


In [1]:
import json

def score_extraction_quality(call_data: dict) -> dict:
    """
    Scores the quality of the extracted JSON based on the client's rules.
    This is a reliable, code-based approach to quality control.
    """
    score = 0
    penalties = 0
    notes = []
    
    # Rule: Call summary with correct outcome (+15 points)
    if call_data.get('call_summary', {}).get('outcome'):
        score += 15
    else:
        notes.append("FAIL: Missing call_summary or outcome.")

    entities = call_data.get('entities', [])
    entity_names = [e.get('name') for e in entities]
    relationships = call_data.get('relationships', [])

    # Rule: Gatekeeper presence (+20 points / -30 penalty)
    is_gatekeeper_present = "Unnamed Gatekeeper" in entity_names
    has_transfer_relationship = any(r.get('type') == 'TRANSFERRED_CALL_TO' for r in relationships)
    
    # We assume a transfer happened if a gatekeeper was needed
    if is_gatekeeper_present:
        score += 20
        if not has_transfer_relationship:
            penalties += -15 # Less severe penalty if relationship is just missing
            notes.append("WARNING: Gatekeeper entity exists, but TRANSFERRED_CALL_TO relationship is missing.")
    # This is a heuristic: if we see a transfer rel but no gatekeeper, it's a major failure.
    elif has_transfer_relationship and not is_gatekeeper_present:
        penalties += -30
        notes.append("CRITICAL FAIL: TRANSFERRED_CALL_TO exists, but Gatekeeper entity is missing.")

    # Rule: All other entities identified (+5 each)
    score += len(entities) * 5

    # Rule: Contact info captured (+10 points) / Missing (-10 penalty)
    decision_maker = next((e for e in entities if e.get('subtype') == 'Decision Maker'), None)
    if decision_maker and decision_maker.get('contact_details'):
        score += 10
    elif decision_maker: # if DM exists but has no contact details
        penalties += -10
        notes.append("FAIL: Decision Maker found, but contact_details are missing.")

    # Rule: Relationships mapped (+5 each)
    score += len(relationships) * 5
    
    # Rule: Events categorized (points per type)
    event_scores = {"PAIN_POINT": 7, "BUYING_SIGNAL": 8, "OBJECTION": 5, "GATEKEEPER_INTERACTION": 5, "QUESTION": 3, "TECHNICAL_ISSUE": 2, "RAPPORT_BUILDING": 2, "GENERAL_EVENT": 1, "CONCERN": 4, "CLARIFICATION": 4}
    for event in call_data.get('events', []):
        score += event_scores.get(event.get('type'), 0)

    # Rule: Meeting details (+15 points / -15 penalty)
    if call_data.get('scheduled_meeting'):
        score += 15
    elif call_data.get('call_summary', {}).get('outcome') == 'Meeting Scheduled':
        penalties += -15
        notes.append("CRITICAL FAIL: Outcome is 'Meeting Scheduled' but scheduled_meeting object is null.")

    final_score = score + penalties
    
    # Determine final status
    if final_score >= 100:
        status = "High-confidence extraction"
    elif 80 <= final_score < 100:
        status = "Acceptable extraction, minor gaps"
    elif 60 <= final_score < 80:
        status = "Review recommended"
    else:
        status = "Manual review REQUIRED"

    return {
        "final_score": final_score,
        "status": status,
        "notes": notes,
        "base_score": score,
        "penalties": penalties
    }

In [None]:
files = ['updated_entities_relations_call1.json', 'updated_entities_relations_call2.json', 'updated_entities_relations_call3.json', 'updated_entities_relations_call4.json']

In [None]:
with open('updated_entities_relations_call1.json', 'r') as f:
    extracted_data = json.load(f)

# # Get the quality score
quality_report = score_extraction_quality(extracted_data)
quality_report

{'final_score': 118,
 'status': 'High-confidence extraction',
 'notes': [],
 'base_score': 118,
 'penalties': 0}

In [3]:
with open('updated_entities_relations_call2.json', 'r') as f:
    extracted_data = json.load(f)

# # Get the quality score
quality_report = score_extraction_quality(extracted_data)
quality_report

{'final_score': 128,
 'status': 'High-confidence extraction',
 'notes': [],
 'base_score': 128,
 'penalties': 0}

In [6]:
with open('updated_entities_relations_call3.json', 'r') as f:
    extracted_data = json.load(f)

# # Get the quality score
quality_report = score_extraction_quality(extracted_data)
quality_report

{'final_score': 117,
 'status': 'High-confidence extraction',
 'notes': [],
 'base_score': 117,
 'penalties': 0}

In [7]:
with open('updated_entities_relations_call4.json', 'r') as f:
    extracted_data = json.load(f)

# # Get the quality score
quality_report = score_extraction_quality(extracted_data)
quality_report

{'final_score': 72,
 'status': 'Review recommended',
 'notes': ['FAIL: Decision Maker found, but contact_details are missing.'],
 'base_score': 82,
 'penalties': -10}