In [None]:
# design data model: 
"""
causal_node: {
    id: string,
    name: string,
}
event: {
    id: string,
    name: string,
    description: string,
    timestamp: string,
    sentiment: string,
}
"""

In [33]:
# init instance model

from neo4j import GraphDatabase

# Database connection configuration
URI = "bolt://localhost:7687"
AUTH = ("neo4j", "neo4jneo4j")

# Create driver instance
driver = GraphDatabase.driver(URI, auth=AUTH)

# Test connection
def test_connection():
    try:
        driver.verify_connectivity()
        print("Connection to Neo4j database successful!")
    except Exception as e:
        print(f"Failed to connect to Neo4j database: {e}")

test_connection()





Connection to Neo4j database successful!


In [40]:
import numpy as np

random_causal_graph = np.random.rand(20, 20) < 0.3
print(random_causal_graph)



[[False False  True False False False  True False False False False False
  False False False  True False False False False]
 [False False False False False False False False False False False False
  False False False False False False False False]
 [False False  True False  True False False False  True False False  True
   True False False False False False False False]
 [ True False False False False False  True  True False False False False
  False  True False False  True False  True False]
 [False False False False False  True  True False False False  True False
  False False False  True  True False False False]
 [False False False  True False False  True False False False False  True
  False False  True False  True  True  True False]
 [False False False  True  True  True  True False  True  True  True False
  False False  True  True False False  True False]
 [ True False False  True  True  True False False  True False False False
  False False  True  True  True  True False False]


In [41]:
# Clear the database and insert causal graph

# First, clear all existing data
clear_query = """
MATCH (n)
DETACH DELETE n
"""

try:
    with driver.session() as session:
        session.run(clear_query)
        print("Database cleared successfully!")
except Exception as e:
    print(f"Failed to clear database: {e}")

# Create nodes for the causal graph using a loop
num_nodes = random_causal_graph.shape[0]

try:
    with driver.session() as session:
        for i in range(num_nodes):
            create_node_query = """
            CREATE (n:Node {id: $node_id, name: $node_name})
            """
            session.run(create_node_query, node_id=i, node_name=f'Node_{i}')
        print(f"Created {num_nodes} causal graph nodes successfully!")
except Exception as e:
    print(f"Failed to create nodes: {e}")

# Create edges based on the random causal graph using loops
edge_count = 0
try:
    with driver.session() as session:
        for i in range(num_nodes):
            for j in range(num_nodes):
                if random_causal_graph[i, j]:
                    create_edge_query = """
                    MATCH (n1:Node {id: $from_id}), (n2:Node {id: $to_id})
                    CREATE (n1)-[:CAUSES]->(n2)
                    """
                    session.run(create_edge_query, from_id=i, to_id=j)
                    edge_count += 1
        print(f"Created {edge_count} causal relationships!")
except Exception as e:
    print(f"Failed to create edges: {e}")



Database cleared successfully!
Created 20 causal graph nodes successfully!
Created 120 causal relationships!


In [42]:
# Query all relationships

cypher_query = """
MATCH (n1)-[r]->(n2)
RETURN n1, r, n2
"""

# Execute the query
try:
    with driver.session() as session:
        result = session.run(cypher_query)
        print("All Relationships:")
        print("-" * 40)
        for record in result:
            node1 = record['n1']
            relationship = record['r']
            node2 = record['n2']
            
            # Get node labels and properties
            node1_labels = ':'.join(node1.labels) if node1.labels else 'Node'
            node2_labels = ':'.join(node2.labels) if node2.labels else 'Node'
            node1_props = dict(node1)
            node2_props = dict(node2)
            
            print(f"({node1_labels} {node1_props})-[:{relationship.type}]->({node2_labels} {node2_props})")
except Exception as e:
    print(f"Failed to query data: {e}")


All Relationships:
----------------------------------------
(Node {'name': 'Node_0', 'id': 0})-[:CAUSES]->(Node {'name': 'Node_2', 'id': 2})
(Node {'name': 'Node_0', 'id': 0})-[:CAUSES]->(Node {'name': 'Node_6', 'id': 6})
(Node {'name': 'Node_0', 'id': 0})-[:CAUSES]->(Node {'name': 'Node_15', 'id': 15})
(Node {'name': 'Node_2', 'id': 2})-[:CAUSES]->(Node {'name': 'Node_4', 'id': 4})
(Node {'name': 'Node_2', 'id': 2})-[:CAUSES]->(Node {'name': 'Node_8', 'id': 8})
(Node {'name': 'Node_2', 'id': 2})-[:CAUSES]->(Node {'name': 'Node_11', 'id': 11})
(Node {'name': 'Node_2', 'id': 2})-[:CAUSES]->(Node {'name': 'Node_12', 'id': 12})
(Node {'name': 'Node_2', 'id': 2})-[:CAUSES]->(Node {'name': 'Node_2', 'id': 2})
(Node {'name': 'Node_3', 'id': 3})-[:CAUSES]->(Node {'name': 'Node_0', 'id': 0})
(Node {'name': 'Node_3', 'id': 3})-[:CAUSES]->(Node {'name': 'Node_6', 'id': 6})
(Node {'name': 'Node_3', 'id': 3})-[:CAUSES]->(Node {'name': 'Node_7', 'id': 7})
(Node {'name': 'Node_3', 'id': 3})-[:CAUSES

In [43]:
# Query all nodes in the database

cypher_query = """
MATCH (n)
RETURN n
"""

# Execute the query
try:
    with driver.session() as session:
        result = session.run(cypher_query)
        print("All Nodes in Database:")
        print("-" * 40)
        for record in result:
            node = record['n']
            labels = ':'.join(node.labels)
            properties = dict(node)
            print(f"Labels: {labels}")
            print(f"Properties: {properties}")
            print("-" * 20)
except Exception as e:
    print(f"Failed to query nodes: {e}")


All Nodes in Database:
----------------------------------------
Labels: Node
Properties: {'name': 'Node_0', 'id': 0}
--------------------
Labels: Node
Properties: {'name': 'Node_1', 'id': 1}
--------------------
Labels: Node
Properties: {'name': 'Node_2', 'id': 2}
--------------------
Labels: Node
Properties: {'name': 'Node_3', 'id': 3}
--------------------
Labels: Node
Properties: {'name': 'Node_4', 'id': 4}
--------------------
Labels: Node
Properties: {'name': 'Node_5', 'id': 5}
--------------------
Labels: Node
Properties: {'name': 'Node_6', 'id': 6}
--------------------
Labels: Node
Properties: {'name': 'Node_7', 'id': 7}
--------------------
Labels: Node
Properties: {'name': 'Node_8', 'id': 8}
--------------------
Labels: Node
Properties: {'name': 'Node_9', 'id': 9}
--------------------
Labels: Node
Properties: {'name': 'Node_10', 'id': 10}
--------------------
Labels: Node
Properties: {'name': 'Node_11', 'id': 11}
--------------------
Labels: Node
Properties: {'name': 'Node_12',

In [44]:
import random
from datetime import datetime, timedelta

def random_date(start_date, end_date):
    """Generate a random date between start_date and end_date"""
    time_between = end_date - start_date
    days_between = time_between.days
    random_number_of_days = random.randrange(days_between + 1)
    return start_date + timedelta(days=random_number_of_days)

# Example usage
start = datetime(2025, 1, 1)
end = datetime(2025, 1, 30)
print(f"Random date: {random_date(start, end).strftime('%Y-%m-%d')}")

# %% cell 8 code

# Generate random news for each node
import uuid

def generate_random_news(idx):
    """Generate random news with idx as name, timestamp and sentiment"""
    sentiments = ["positive", "negative", "neutral"]
    
    return {
        "name": f"news_{idx}",
        "timestamp": random_date(datetime(2025, 1, 1), datetime(2025, 1, 10)),
        "sentiment": random.choice(sentiments)
    }

# Query all nodes and create news for each
cypher_query = """
MATCH (n)
RETURN n
"""

try:
    with driver.session() as session:
        result = session.run(cypher_query)
        nodes = list(result)
        
        print(f"Creating 3 random news items for each of {len(nodes)} nodes...")
        print("-" * 60)
        
        news_counter = 1
        
        for record in nodes:
            node = record['n']
            node_id = node.element_id
            labels = ':'.join(node.labels)
            
            print(f"\nNode: {labels} (ID: {node_id})")
            
            # Generate 3 random news items for this node
            for i in range(3):
                news = generate_random_news(news_counter)
                
                # Create news node and relationship
                create_news_query = """
                MATCH (n) WHERE elementId(n) = $node_id
                CREATE (news:News {
                    name: $news_name,
                    timestamp: datetime($timestamp),
                    sentiment: $sentiment
                })
                CREATE (n)-[:HAS_NEWS]->(news)
                RETURN news
                """
                
                session.run(create_news_query, {
                    'node_id': node_id,
                    'news_name': news['name'],
                    'timestamp': news['timestamp'].isoformat(),
                    'sentiment': news['sentiment']
                })
                
                print(f"  News {i+1}: Name={news['name']}")
                print(f"    Timestamp: {news['timestamp'].strftime('%Y-%m-%d %H:%M:%S')}")
                print(f"    Sentiment: {news['sentiment']}")
                
                news_counter += 1
        
        print(f"\n✅ Successfully created {len(nodes) * 3} news items!")
        
except Exception as e:
    print(f"Failed to create news items: {e}")

Random date: 2025-01-29
Creating 3 random news items for each of 20 nodes...
------------------------------------------------------------

Node: Node (ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:80)
  News 1: Name=news_1
    Timestamp: 2025-01-08 00:00:00
    Sentiment: neutral
  News 2: Name=news_2
    Timestamp: 2025-01-05 00:00:00
    Sentiment: negative
  News 3: Name=news_3
    Timestamp: 2025-01-07 00:00:00
    Sentiment: positive

Node: Node (ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:81)
  News 1: Name=news_4
    Timestamp: 2025-01-10 00:00:00
    Sentiment: negative
  News 2: Name=news_5
    Timestamp: 2025-01-08 00:00:00
    Sentiment: negative
  News 3: Name=news_6
    Timestamp: 2025-01-10 00:00:00
    Sentiment: positive

Node: Node (ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:82)
  News 1: Name=news_7
    Timestamp: 2025-01-05 00:00:00
    Sentiment: positive
  News 2: Name=news_8
    Timestamp: 2025-01-06 00:00:00
    Sentiment: positive
  News 3: Name=news_9
    Timestamp

In [45]:
# Select a random news node and explore its causal connections
try:
    with driver.session() as session:
        print("Selecting a random news node and exploring causal connections...")
        print("-" * 60)
        
        # Step 1: Select a random news node and get the causal node connected to it
        random_news_query = """
        MATCH (news:News)<-[:HAS_NEWS]-(causal)
        RETURN news, causal
        ORDER BY rand()
        LIMIT 1
        """
        
        result = session.run(random_news_query)
        record = result.single()
        
        if not record:
            print("No news nodes found!")
        else:
            news_node = record['news']
            causal_node = record['causal']
            
            # Extract the date from the selected news node
            news_timestamp = news_node['timestamp']
            news_date = news_timestamp.date()
            
            print(f"Selected News Node:")
            print(f"  ID: {news_node['id']}")
            print(f"  Timestamp: {news_node['timestamp']}")
            print(f"  Date: {news_date}")
            print(f"  Sentiment: {news_node['sentiment']}")
            
            print(f"\nConnected Causal Node:")
            print(f"  Labels: {':'.join(causal_node.labels)}")
            print(f"  Element ID: {causal_node.element_id}")
            
            # Step 2: Get all 1st and 2nd degree connections for this causal node
            # Step 3: Only keep causal nodes that have news from the same date
            degrees_query = """
            MATCH (start) WHERE elementId(start) = $causal_id
            
            // Get 1st degree connections that are causal nodes with news on the same date
            OPTIONAL MATCH (start)-[r1]-(first_degree)-[:HAS_NEWS]->(first_news:News)
            WHERE date(first_news.timestamp) = date($news_date)
            
            // Get 2nd degree connections that are causal nodes with news on the same date
            OPTIONAL MATCH (start)-[r1]-(intermediate)-[r2]-(second_degree)-[:HAS_NEWS]->(second_news:News)
            WHERE elementId(second_degree) <> elementId(start) 
            AND date(second_news.timestamp) = date($news_date)
            
            RETURN 
                start,
                collect(DISTINCT {
                    node: first_degree,
                    relationship: type(r1),
                    degree: 1,
                    news_count: size([(first_degree)-[:HAS_NEWS]->(n:News) WHERE date(n.timestamp) = date($news_date) | n])
                }) as first_degree_connections,
                collect(DISTINCT {
                    node: second_degree,
                    relationship: type(r2),
                    degree: 2,
                    news_count: size([(second_degree)-[:HAS_NEWS]->(n:News) WHERE date(n.timestamp) = date($news_date) | n])
                }) as second_degree_connections
            """
            
            result = session.run(degrees_query, {
                'causal_id': causal_node.element_id,
                'news_date': news_timestamp
            })
            connections_record = result.single()
            
            if connections_record:
                first_degree = connections_record['first_degree_connections']
                second_degree = connections_record['second_degree_connections']
                
                # Filter out null nodes
                valid_first_degree = [c for c in first_degree if c['node'] is not None]
                valid_second_degree = [c for c in second_degree if c['node'] is not None]
                
                print(f"\n1st Degree Causal Connections with news on {news_date} ({len(valid_first_degree)} nodes):")
                for conn in valid_first_degree:
                    node = conn['node']
                    print(f"  - {':'.join(node.labels)} (ID: {node.element_id}) via {conn['relationship']} ({conn['news_count']} news items)")
                
                print(f"\n2nd Degree Causal Connections with news on {news_date} ({len(valid_second_degree)} nodes):")
                for conn in valid_second_degree:
                    node = conn['node']
                    print(f"  - {':'.join(node.labels)} (ID: {node.element_id}) via {conn['relationship']} ({conn['news_count']} news items)")
                
                print(f"\n✅ Successfully explored causal network for date {news_date}!")
                print(f"Total 1st degree causal nodes with same-date news: {len(valid_first_degree)}")
                print(f"Total 2nd degree causal nodes with same-date news: {len(valid_second_degree)}")
                
                # Generate Neo4j Browser script to visualize the result graph
                print("\n" + "="*80)
                print("NEO4J BROWSER SCRIPT - Copy and paste this into Neo4j Browser:")
                print("="*80)
                
                neo4j_script = f"""
// Visualize causal network for selected news date: {news_date}
MATCH (start) WHERE elementId(start) = "{causal_node.element_id}"

// Get the starting node and its news
OPTIONAL MATCH (start)-[start_has_news:HAS_NEWS]->(start_news:News)
WHERE date(start_news.timestamp) = date("{news_date}")

// Get 1st degree connections with news on the same date
OPTIONAL MATCH (start)-[r1]-(first_degree)-[first_has_news:HAS_NEWS]->(first_news:News)
WHERE date(first_news.timestamp) = date("{news_date}")

// Get 2nd degree connections with news on the same date
OPTIONAL MATCH (start)-[r1]-(intermediate)-[r2]-(second_degree)-[second_has_news:HAS_NEWS]->(second_news:News)
WHERE elementId(second_degree) <> elementId(start) 
AND date(second_news.timestamp) = date("{news_date}")

RETURN start, r1, first_degree, r2, second_degree, intermediate,
    start_news, first_news, second_news,
    start_has_news, first_has_news, second_has_news
"""
                
                print(neo4j_script)
                print("="*80)
            
except Exception as e:
    print(f"Failed to explore causal connections: {e}")


Selecting a random news node and exploring causal connections...
------------------------------------------------------------
Selected News Node:
  ID: None
  Timestamp: 2025-01-03T00:00:00.000000000+00:00
  Date: 2025-01-03
  Sentiment: positive

Connected Causal Node:
  Labels: Node
  Element ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:96

1st Degree Causal Connections with news on 2025-01-03 (3 nodes):
  - Node (ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:94) via CAUSES (1 news items)
  - Node (ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:83) via CAUSES (1 news items)
  - Node (ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:99) via CAUSES (1 news items)

2nd Degree Causal Connections with news on 2025-01-03 (3 nodes):
  - Node (ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:98) via CAUSES (1 news items)
  - Node (ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:94) via CAUSES (1 news items)
  - Node (ID: 4:acc9f7b3-cc38-4a48-b9c6-f3c424f88e4a:99) via CAUSES (1 news items)

✅ Successfully explored cau