In [29]:
text="A federal judge ruled Saturday that President Donald Trump\u2019s firing of a federal workforce watchdog was illegal \u2014 teeing up a Supreme Court showdown over the president\u2019s claim to nearly absolute control of the executive branch.\n\nU.S. District Judge Amy Berman Jackson concluded that Hampton Dellinger \u2014 confirmed last year as head of the Office of Special Counsel \u2014 may continue to serve his five-year term despite Trump\u2019s effort to remove him from the post via a brusque email last month.\n\nA law on the books for more than four decades specifies that the special counsel can be removed only for \u201cinefficiency, neglect of duty, or malfeasance in office,\u201d but the Justice Department argued that provision is unconstitutional because it impinges on the president\u2019s authority to control executive agencies.\n\nJackson ruled that Dellinger\u2019s duties, which include holding executive branch officials accountable for ethics breaches and fielding whistleblower complaints, were meant to be independent from the president, making the position a rare exception to the president\u2019s generally vast domain over the executive branch.\n\nDellinger\u2019s \u201cindependence is inextricably intertwined with the performance of his duties,\u201d Jackson wrote in a 67-page opinion. \u201cThe elimination of the restrictions on plaintiff\u2019s removal would be fatal to the defining and essential feature of the Office of Special Counsel as it was conceived by Congress and signed into law by the President: its independence. The Court concludes that they must stand.\u201d\n\nJustice Department attorneys contended Dellinger had significant power to act unilaterally, making it critical that he be under the control of the president, but Jackson said Trump\u2019s lawyers were exaggerating the special counsel\u2019s scope."



In [None]:
import boto3
import concurrent.futures
import time
import hashlib
import re

# Initialize AWS Comprehend client
client = boto3.client('comprehend', region_name='us-east-1')

# Simple in-memory cache
result_cache = {}

def chunk_text(text, max_length=5000):
    sentences = re.split(r'(?<=[.!?]) +', text)  # Split by sentence boundaries
    chunks = []
    current_chunk = ""
    
    for sentence in sentences:
        if len(current_chunk) + len(sentence) <= max_length:
            current_chunk += sentence + " "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + " "
    
    if current_chunk:
        chunks.append(current_chunk.strip())
    
    return chunks
def process_chunk(chunk, retries=2, backoff_factor=0.5):
    """Process a single chunk with AWS Comprehend with error handling and retry logic"""
    if not chunk or len(chunk.strip()) < 10:  # Skip tiny chunks
        return {'chunk': chunk, 'error': 'Chunk too small', 'size': len(chunk.encode('utf-8'))}
        
    chunk_size = len(chunk.encode('utf-8'))
    if chunk_size > 5000:  # AWS limit
        return {'chunk': chunk, 'error': 'Chunk exceeds size limit', 'size': chunk_size}
    
    # Create a cache key based on the content
    cache_key = hashlib.md5(chunk.encode('utf-8')).hexdigest()
    if cache_key in result_cache:
        return result_cache[cache_key]
    
    attempt = 0
    while attempt <= retries:
        try:
            response = client.detect_targeted_sentiment(
                Text=chunk,
                LanguageCode='en'
            )
            
            result = {'chunk': chunk, 'response': response, 'size': chunk_size}
            result_cache[cache_key] = result
            return result
            
        except Exception as e:
            attempt += 1
            if attempt <= retries:
                # Exponential backoff
                delay = backoff_factor * (2 ** (attempt - 1))
                time.sleep(delay)
            else:
                return {'chunk': chunk, 'error': str(e), 'size': chunk_size}

def analyze_efficiently(text, max_workers=3):
    """More efficient parallel processing of text"""
    start_time = time.time()
    
    # Split text with optimized method
    chunks = chunk_text(text)
    print(f"Split text into {len(chunks)} chunks")
    
    results = []
    
    # Use fewer workers to avoid throttling
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_chunk = {executor.submit(process_chunk, chunk): i for i, chunk in enumerate(chunks)}
        
        for future in concurrent.futures.as_completed(future_to_chunk):
            try:
                result = future.result()
                results.append(result)
            except Exception as e:
                print(f"Unexpected error: {e}")
    
    # Filter out errors
    successful_results = [r for r in results if 'error' not in r]
    error_results = [r for r in results if 'error' in r]
    print(f"Completed processing {(successful_results)} chunks")
    print(f"Errors: {(error_results)}")
    
    end_time = time.time()
    print(f"Completed in {end_time - start_time:.2f} seconds")
    print(f"Successful: {len(successful_results)}, Errors: {len(error_results)}")
    print(f"Total size: {sum(r['size'] for r in successful_results)} bytes")
    return successful_results, error_results

def extract_key_entities(results, min_confidence=0.8):
    """Extract key entities with high confidence scores"""
    entities = {}
    sentiments = {}
    
    for result in results:
        response = client.detect_entities(Text=result['chunk'], LanguageCode='en')
        response_sentitment = client.detect_targeted_sentiment(Text=result['chunk'], LanguageCode='en')
        if 'response' not in result:
            continue
            
            
        for entity_group in response_sentitment['Entities']:
            print(entity_group)
            if entity_group['Mentions'][0]['Score'] >= 0.9:
                print('Entity')
                sentiments[entity_group['Mentions'][0]['Text']] = entity_group['Mentions'][0]['MentionSentiment']['Sentiment']
                
        print(sentiments)
                    
        
        for entity_group in response['Entities']:
            entity_text = entity_group['Text']
            entity_type = entity_group['Type']
            entity_score = entity_group['Score']  # Confidence score
            
            # Filter by confidence score
            if entity_score >= min_confidence:
                key = f"{entity_text}_{entity_type}"
                
                if key not in entities:
                    entities[key] = {
                        'text': entity_text,
                        'type': entity_type,
                        'score': entity_score,
                        'mentions': 1
                    }
                else:
                    entities[key]['mentions'] += 1
                    # Keep highest confidence score if we see this entity again
                    entities[key]['score'] = max(entities[key]['score'], entity_score)
    
    # Convert to list and sort by confidence score
    entity_list = list(entities.values())
    entity_list.sort(key=lambda x: x['score'], reverse=True)
    
    return entity_list, sentiments

# Process the text efficiently
successful_results, error_results = analyze_efficiently(text)
entities = extract_key_entities(successful_results)
    

Split text into 1 chunks
Completed processing [{'chunk': 'A federal judge ruled Saturday that President Donald Trump’s firing of a federal workforce watchdog was illegal — teeing up a Supreme Court showdown over the president’s claim to nearly absolute control of the executive branch.\n\nU.S. District Judge Amy Berman Jackson concluded that Hampton Dellinger — confirmed last year as head of the Office of Special Counsel — may continue to serve his five-year term despite Trump’s effort to remove him from the post via a brusque email last month.\n\nA law on the books for more than four decades specifies that the special counsel can be removed only for “inefficiency, neglect of duty, or malfeasance in office,” but the Justice Department argued that provision is unconstitutional because it impinges on the president’s authority to control executive agencies.\n\nJackson ruled that Dellinger’s duties, which include holding executive branch officials accountable for ethics breaches and fieldin

([{'text': 'Jackson',
   'type': 'PERSON',
   'score': 0.9995377063751221,
   'mentions': 3},
  {'text': 'Trump',
   'type': 'PERSON',
   'score': 0.9993869066238403,
   'mentions': 2},
  {'text': 'Dellinger',
   'type': 'PERSON',
   'score': 0.9989292025566101,
   'mentions': 3},
  {'text': 'last year',
   'type': 'DATE',
   'score': 0.9988424777984619,
   'mentions': 1},
  {'text': 'Saturday',
   'type': 'DATE',
   'score': 0.9984920024871826,
   'mentions': 1},
  {'text': 'last month',
   'type': 'DATE',
   'score': 0.9980170130729675,
   'mentions': 1},
  {'text': 'Office of Special Counsel',
   'type': 'ORGANIZATION',
   'score': 0.9967654347419739,
   'mentions': 2},
  {'text': 'Congress',
   'type': 'ORGANIZATION',
   'score': 0.9965865612030029,
   'mentions': 1},
  {'text': '67-page',
   'type': 'QUANTITY',
   'score': 0.9960376024246216,
   'mentions': 1},
  {'text': 'Supreme Court',
   'type': 'ORGANIZATION',
   'score': 0.9934619069099426,
   'mentions': 1},
  {'text': 'Amy

In [31]:
entity_list = entities[0]
sentiments = entities[1]

NER = []

for dicts in entity_list:
    name = dicts['text']
    type = dicts['type']
    if name in sentiments:
        sentiment = sentiments[name]
    else:
        sentiment = 'NEUTRAL'
    NER.append({'name': name, 'type': type, 'sentiment': sentiment})
    
    
NER

[{'name': 'Jackson', 'type': 'PERSON', 'sentiment': 'NEUTRAL'},
 {'name': 'Trump', 'type': 'PERSON', 'sentiment': 'NEGATIVE'},
 {'name': 'Dellinger', 'type': 'PERSON', 'sentiment': 'NEUTRAL'},
 {'name': 'last year', 'type': 'DATE', 'sentiment': 'NEUTRAL'},
 {'name': 'Saturday', 'type': 'DATE', 'sentiment': 'NEUTRAL'},
 {'name': 'last month', 'type': 'DATE', 'sentiment': 'NEUTRAL'},
 {'name': 'Office of Special Counsel',
  'type': 'ORGANIZATION',
  'sentiment': 'NEUTRAL'},
 {'name': 'Congress', 'type': 'ORGANIZATION', 'sentiment': 'NEUTRAL'},
 {'name': '67-page', 'type': 'QUANTITY', 'sentiment': 'NEUTRAL'},
 {'name': 'Supreme Court', 'type': 'ORGANIZATION', 'sentiment': 'NEUTRAL'},
 {'name': 'Amy Berman Jackson', 'type': 'PERSON', 'sentiment': 'NEUTRAL'},
 {'name': 'Justice Department',
  'type': 'ORGANIZATION',
  'sentiment': 'NEUTRAL'},
 {'name': 'more than four decades',
  'type': 'QUANTITY',
  'sentiment': 'NEUTRAL'},
 {'name': 'five-year term', 'type': 'QUANTITY', 'sentiment': 'NEU

In [32]:
from llama_index.llms.bedrock import Bedrock
from llama_index.core import Settings

llm = Bedrock(model="anthropic.claude-3-sonnet-20240229-v1:0") 
Settings.llm = llm

In [33]:
from llama_index.core import StorageContext
from llama_index.graph_stores.neptune import NeptuneDatabaseGraphStore

graph_store = NeptuneDatabaseGraphStore(
    host="db-neptune-1.cluster-c74u2gy0smnm.us-east-1.neptune.amazonaws.com", 
    port=8182,
    node_label="User"
)
storage_context = StorageContext.from_defaults(graph_store=graph_store)

In [53]:
# Use a more direct approach with requests to communicate with Ollama
import json
import re
import requests

def query_knowledge_graph(question, entities, context_text=None):
    """
    Use Ollama directly to identify relationships between entities instead of langchain.
    
    Args:
        question: Question being asked
        entities: List of entities to analyze
        context_text: Text context about these entities
    
    Returns:
        List of relationship dictionaries
    """
    results = []
    
    # Ollama API endpoint (default for local installation)
    ollama_api_url = "http://localhost:11434/api/chat"
    
    # Create entity pairs for analysis
    entity_pairs = []
    for i, entity1 in enumerate(entities):
        for entity2 in entities[i+1:]:
            entity_pairs.append((entity1, entity2))
    
    print(f"Analyzing {len(entity_pairs)} entity pairs with Ollama")
    
    # For each pair of entities, ask Ollama to determine their relationship
    for entity1, entity2 in entity_pairs:
        if entity1 == entity2:
            continue
        print(f"Determining relationship between {entity1} and {entity2}...")
        
        # Create a prompt for Ollama
        prompt = f"""
        Based on the following context, identify the relationship between "{entity1}" and "{entity2}".
        
        Context information:
        {context_text or "No specific context provided."}
        
        For the entities "{entity1}" and "{entity2}", please determine:
        1. Is there a direct relationship between them? If so, what is it?
        2. What is the direction of this relationship? From which entity to which?
        
        Return your response in this specific JSON format:
        {{
            "source": "The source entity name",
            "target": "The target entity name",
            "relation": "A BRIEF_DESCRIPTIVE_PHRASE_IN_CAPS about their relationship",
            "description": "A one-sentence description of the relationship"
        }}
        
        Only return the JSON with no additional text.
        """
        
        try:
            # Send request to Ollama
            payload = {
                "model": "llama3.2",
                "messages": [{"role": "user", "content": prompt}],
                "temperature": 0,
                "stream": False
            }
            
            response = requests.post(ollama_api_url, json=payload)
            response_data = response.json()
            
            # Extract the content from the response
            content = response_data.get("message", {}).get("content", "")
            
            # Process the response - try to parse as JSON
            try:
                # Extract JSON by finding content between curly braces
                match = re.search(r'\{.*\}', content, re.DOTALL)
                if match:
                    json_str = match.group(0)
                    relation_data = json.loads(json_str)
                    
                    # Add to results with source/target properties 
                    results.append({
                        "source": relation_data.get("source", entity1),
                        "target": relation_data.get("target", entity2),
                        "relation": relation_data.get("relation", "RELATED_TO"),
                        "source_properties": {"entity_type": "person"}, 
                        "target_properties": {"entity_type": "person"},
                        "description": relation_data.get("description", "")
                    })
                else:
                    print(f"  Warning: No JSON data found in Ollama response")
                    # Add a default relationship
                    results.append({
                        "source": entity1,
                        "target": entity2,
                        "relation": "RELATED_TO",
                        "source_properties": {"entity_type": "person"},
                        "target_properties": {"entity_type": "person"},
                        "description": "Entities appear in the same context"
                    })
            except json.JSONDecodeError as e:
                print(f"  Error parsing Ollama response as JSON: {e}")
                print(f"  Raw response: {content}")
                # Add a default relationship
                results.append({
                    "source": entity1,
                    "target": entity2,
                    "relation": "APPEARS_WITH",
                    "source_properties": {"entity_type": "person"},
                    "target_properties": {"entity_type": "person"},
                    "description": "Entities appear in the same context"
                })
        except Exception as e:
            print(f"  Error getting Ollama response: {e}")
            # Add a default relationship on error
            results.append({
                "source": entity1,
                "target": entity2,
                "relation": "UNKNOWN",
                "source_properties": {"entity_type": "person"},
                "target_properties": {"entity_type": "person"},
                "description": "Relationship could not be determined due to an error"
            })
    
    # Optionally, you could also add self-relationships for each entity
    for entity in entities:
        # Determine entity type based on typical names of organizations vs people
        entity_type = "organization" if entity in ["Supreme Court", "Office of Special Counsel"] else "person"
        
        results.append({
            "source": entity,
            "target": entity,
            "relation": "SELF",
            "source_properties": {"entity_type": entity_type},
            "target_properties": {"entity_type": entity_type},
            "description": "Self-reference"
        })
    
    return results

# Example usage:
if __name__ == "__main__":
    article_text = """
    A federal judge ruled Saturday that President Donald Trump's firing of a federal workforce watchdog was illegal — teeing up a Supreme Court showdown over the president's claim to nearly absolute control of the executive branch.

    U.S. District Judge Amy Berman Jackson concluded that Hampton Dellinger — confirmed last year as head of the Office of Special Counsel — may continue to serve his five-year term despite Trump's effort to remove him from the post via a brusque email last month.

    A law on the books for more than four decades specifies that the special counsel can be removed only for "inefficiency, neglect of duty, or malfeasance in office," but the Justice Department argued that provision is unconstitutional because it impinges on the president's authority to control executive agencies.
    """

    entities = ["Donald Trump", "Hampton Dellinger", "Amy Berman Jackson", "Supreme Court", "Office of Special Counsel"]
    question = "What is the relationship between Trump and Dellinger based on the article?"

    # Get relationships  
    relationships = query_knowledge_graph(question, entities, article_text)

    # Print results
    print("\nFound relationships:")
    for rel in relationships:
        print(f"{rel['source']} --[{rel['relation']}]--> {rel['target']}: {rel['description']}")

Analyzing 10 entity pairs with Ollama
Determining relationship between Donald Trump and Hampton Dellinger...
Determining relationship between Donald Trump and Amy Berman Jackson...
Determining relationship between Donald Trump and Supreme Court...
Determining relationship between Donald Trump and Office of Special Counsel...
Determining relationship between Hampton Dellinger and Amy Berman Jackson...
Determining relationship between Hampton Dellinger and Supreme Court...
Determining relationship between Hampton Dellinger and Office of Special Counsel...
Determining relationship between Amy Berman Jackson and Supreme Court...
Determining relationship between Amy Berman Jackson and Office of Special Counsel...
Determining relationship between Supreme Court and Office of Special Counsel...

Found relationships:
Donald Trump --[IS DIRECTLY RELATED AS THE PRESIDENT TO A FEDERAL WORKFORCE WATCHDOG]--> Hampton Dellinger: The President's authority is impeded by the ruling regarding the watchdo

In [40]:
def inspect_relationship_structure(entities):
    """Debug function to examine the structure of relationships"""
    for entity in entities:
        print(f"Entity: {entity}")
        try:
            rel_map = graph_store.get_rel_map([entity], depth=1)
            print(f"  Relationship map has {len(rel_map)} relation types")
            
            for rel_type, rels in rel_map.items():
                print(f"  Relation type: {rel_type}, count: {len(rels)}")
                if rels:
                    sample_rel = rels[0]
                    # Use class name instead of type()
                    obj_class = sample_rel.__class__.__name__ if hasattr(sample_rel, '__class__') else "Unknown"
                    print(f"  Sample relationship class: {obj_class}")
                    print(f"  Value: {sample_rel}")
                    
                    # Get attributes if it's an object
                    if not isinstance(sample_rel, (list, tuple, str, int, float, bool, dict)):
                        try:
                            attrs = [attr for attr in dir(sample_rel) if not attr.startswith('__')]
                            print(f"  Available attributes: {attrs}")
                        except Exception as e:
                            print(f"  Error getting attributes: {e}")
        except Exception as e:
            print(f"  Error examining relationships for {entity}: {e}")
    
    # Test triplet directly
    print("\nTesting direct triplet insertion:")
    try:
        # Add a test triplet
        graph_store.upsert_triplet("TestSubj", "TEST_REL", "TestObj")
        # Get the structure
        test_rel = graph_store.get_rel_map(["TestSubj"], depth=1)
        print(f"  Result structure: {test_rel}")
    except Exception as e:
        print(f"  Test failed: {e}")

In [43]:
query_knowledge_graph(text,[entity['name'] for entity in NER])

Checking relationships for entity: Jackson
  Found relation type: Jackson with 0 relationships
Checking relationships for entity: Trump
  Found relation type: Trump with 1 relationships
  Processing relationship #0: ['Trump', 'RELATED_TO', 'Dellinger']
Checking relationships for entity: Dellinger
  Found relation type: Dellinger with 0 relationships
Checking relationships for entity: last year
  Found relation type: last year with 0 relationships
Checking relationships for entity: Saturday
  Found relation type: Saturday with 0 relationships
Checking relationships for entity: last month
  Found relation type: last month with 0 relationships
Checking relationships for entity: Office of Special Counsel
  Found relation type: Office of Special Counsel with 0 relationships
Checking relationships for entity: Congress
  Found relation type: Congress with 0 relationships
Checking relationships for entity: 67-page
  Found relation type: 67-page with 0 relationships
Checking relationships for e

[{'source': 'Trump',
  'target': 'RELATED_TO',
  'relation': 'Dellinger',
  'source_properties': {'type': 'Entity', 'name': 'Trump', 'occurrences': 2},
  'target_properties': {}},
 {'source': 'Supreme Court',
  'target': 'RELATED_TO',
  'relation': 'Dellinger',
  'source_properties': {'type': 'Entity',
   'name': 'Supreme Court',
   'occurrences': 2},
  'target_properties': {}}]

In [60]:
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.structure.graph import Graph
import asyncio
import nest_asyncio

# This allows nested event loops in Jupyter
nest_asyncio.apply()

# Function to add entities and relationships to Neptune
def add_entities_to_neptune(entities, relationships):
    # Create connection to Neptune
    connection = DriverRemoteConnection("wss://db-neptune-1.cluster-c74u2gy0smnm.us-east-1.neptune.amazonaws.com:8182/gremlin",'g')
    g = Graph().traversal().withRemote(connection)
    
    # Dictionary to keep track of added entities
    added_entities = {}
    
    # Add entities first
    for entity in entities:
        entity_name = entity
        entity_type = "organization" if entity in ["Supreme Court", "Office of Special Counsel"] else "person"
        
        print(f"Adding entity: {entity_name} as {entity_type}")
        
        # Add vertex if it doesn't exist
        vertex = g.V().has('name', entity_name).toList()
        if not vertex:
            result = g.addV(entity_type).property('name', entity_name).next()
            added_entities[entity_name] = result.id
        else:
            added_entities[entity_name] = vertex[0].id
    
    # Now add relationships
    for rel in relationships:
        if rel["relation"] != "SELF":  # Skip self-relationships
            source = rel["source"]
            target = rel["target"]
            relation = rel["relation"]
            description = rel["description"]
            
            print(f"Adding relationship: {source} --[{relation}]--> {target}")
            
            # Add edge between vertices
            g.V().has('name', source).addE(relation).to(
                g.V().has('name', target)
            ).property('description', description).next()
    
    # Close the connection
    connection.close()
    
    return added_entities

# Example usage with the data from query_knowledge_graph
if __name__ == "__main__":
    # Get the entities and relationships from your previous code
    entities = ["Donald Trump", "Hampton Dellinger", "Amy Berman Jackson", "Supreme Court", "Office of Special Counsel"]
    relationships = query_knowledge_graph(question, entities, article_text)
    
    # Add them to Neptune
    

Analyzing 10 entity pairs with Ollama
Determining relationship between Donald Trump and Hampton Dellinger...
Determining relationship between Donald Trump and Amy Berman Jackson...
Determining relationship between Donald Trump and Supreme Court...
Determining relationship between Donald Trump and Office of Special Counsel...
Determining relationship between Hampton Dellinger and Amy Berman Jackson...
Determining relationship between Hampton Dellinger and Supreme Court...
Determining relationship between Hampton Dellinger and Office of Special Counsel...
Determining relationship between Amy Berman Jackson and Supreme Court...
Determining relationship between Amy Berman Jackson and Office of Special Counsel...
Determining relationship between Supreme Court and Office of Special Counsel...


In [61]:
add_entities_to_neptune(entities, relationships)

Adding entity: Donald Trump as person


ClientConnectorError: Cannot connect to host db-neptune-1.cluster-c74u2gy0smnm.us-east-1.neptune.amazonaws.com:8182 ssl:default [Connect call failed ('172.31.87.61', 8182)]

In [None]:
%pip install requests_aws4auth
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.structure.graph import Graph
from requests_aws4auth import AWS4Auth
import os

# Configure with your actual endpoint
neptune_endpoint = "wss://db-neptune-1.cluster-c74u2gy0smnm.us-east-1.neptune.amazonaws.com:8182/gremlin"

def connect_neptune():
    try:
        # For IAM authentication (if enabled)
        auth = AWS4Auth(
            os.environ["AWS_ACCESS_KEY_ID"],
            os.environ["AWS_SECRET_ACCESS_KEY"],
            "us-east-1",  # Match your region
            "neptune-db"
        )
        
        connection = DriverRemoteConnection(
            neptune_endpoint,
            "g",
            ssl=True,
            headers={"Authorization": auth}  # Remove if IAM auth isn't used
        )
        
        g = Graph().traversal().withRemote(connection)
        print("Connection successful. Neptune version:", g.V().limit(1).next())
        return g
        
    except Exception as e:
        print(f"Connection failed: {str(e)}")
        raise

# Usage
graph = connect_neptune()
