In [None]:
import json
import requests

In [None]:
claude_api_key = ""

In [None]:
def get_keyword_context(text, keyword, context_chars=10):
    """
    Find all occurrences of a keyword in text and return surrounding context,
    along with the keyword's start and end indices.
    
    Args:
        text: The text to search in
        keyword: The keyword to find
        context_chars: Number of characters to include on each side (default: 10)
    
    Returns:
        List of lists, where each inner list contains:
        [keyword_start_index, keyword_end_index, context_string]
    """
    results = []
    
    start_search_pos = 0
    while True:
        keyword_start_index = text.find(keyword, start_search_pos)
        if keyword_start_index == -1:
            break
        
        keyword_end_index = keyword_start_index + len(keyword)

        # Calculate context boundaries
        left_start = max(0, keyword_start_index - context_chars)
        right_end = min(len(text), keyword_end_index + context_chars)
        
        # Extract context
        context = text[left_start:right_end]
        results.append([keyword_start_index, keyword_end_index, context])
        
        start_search_pos = keyword_start_index + 1
    
    return results

def dummy_read_job_id(id_):
    """Dummy function to simulate reading a job ID."""
    return input_text

def locate_entity_positions_in_text(
    text_id: str, 
    entity_value_pairs: list[dict],
    context_chars: int = 20
) -> tuple[list[list[dict]], list[dict]]:
    """
    Locate entity positions and extract surrounding contexts from a text document.
    
    Tool for finding the exact positions (start_offset, end_offset) of entities 
    within a text document. The surrounding context helps distinguish between 
    multiple occurrences of the same entity value and verify the correct instance.

    Args:
        text_id: Unique identifier for the text document to search
        entity_value_pairs: List of (label, value) tuples to locate
                          Example:  [{"label": "PERSON", "value": "Alice"}, {"label": "ORG", "value": "TechCorp"}]
        context_chars: Number of characters to include on each side of the entity.
                      Helps identify the correct occurrence when the same entity
                      appears multiple times. Default is 20 characters left and right.

    Returns:
        Tuple containing:
        - found_contexts: List of annotation lists, one per found entity value.
                         Each annotation contains:
                         - 'label': Entity category
                         - 'value': Entity text
                         - 'start_offset': Start position in text (primary output)
                         - 'end_offset': End position in text (primary output)
                         - 'surround_context': Text excerpt for disambiguation
        - not_found: List of {'label', 'value'} dicts for entities not found in the text
        
    Example:
        >>> found, not_found = locate_entities_in_text(
        ...     'doc_123',
        ...     [('PERSON', 'Alice'), ('ORG', 'TechCorp')],
        ...     context_chars=30
        ... )
        >>> # Use context to identify which "Alice" occurrence is correct
    """
    text = dummy_read_job_id(text_id)
    found_contexts = []
    not_found = []

    for entity_value_pair in entity_value_pairs:
        label, value = entity_value_pair['label'], entity_value_pair['value']
        annotations = []
        found = False
        
        occurrences = get_keyword_context(text, value, context_chars=context_chars)
        for start, end, context in occurrences:
            annotations.append({
                'label': label,
                'value': value,
                'start_offset': start,
                'end_offset': end,
                'surround_context': context
            })
            found = True

        if found:
            found_contexts.append(annotations)
        else:
            not_found.append({'label': label, 'value': value})
            
    return found_contexts, not_found

In [None]:
input_text = """The environmental policy analyst Dr. Lena Amazon has been appointed as an external advisor for Amazon, the e-commerce giant, as the company prepares its latest climate-impact report. In a briefing yesterday, Amazon stated that Dr. Amazon will lead a review of deforestation-risk models for operations near the Amazon, where several logistics suppliers are planning new routes along the river basin. The company emphasized that the collaboration between Amazon (the firm), Dr. Amazon (the researcher), and local conservation groups in the Amazon (the region) aims to clarify carbon-offset accounting disputes that surfaced earlier this year. Officials added that the final recommendations will determine whether Amazon expands its pilot program deeper into the Amazon later in 2025."""
entities = "PERSON, LOCATION, ORGANIZATION"

input_text = """Steve Jobs founded Apple in 1976. And he love eating Apple ans so he named the company Apple."""
entities = "ORGANISATION, FRUIT"

In [None]:
text_id = "1234"

prompt = f"""You are a named entity recognition expert who specializes in identifying and extracting specific entities from text with high precision.

Here is the text from which you need to extract entities:

<text>
text-id: {text_id}
{input_text}
</text>

Here is the list of entities you need to extract:

<entities>
{entities}
</entities>

Your task is to identify the exact values of each entity from the given text and return them in a structured format.

Follow these guidelines:

1. **Exact Matching**: Extract the entity values exactly as they appear in the text, preserving capitalization, spacing, and punctuation.

2. **Offset Calculation**: 
   - start_offset: The character position where the entity begins (0-indexed, counting from the start of the text)
   - end_offset: The character position where the entity ends (the position after the last character of the entity)

3. **Entity Identification**:
   - Only extract entities that are explicitly mentioned in the text
   - If an entity appears multiple times, extract all occurrences
   - If an entity is not found in the text, do not include it in the output

4. **Output Format**: Return a list of JSON objects, where each object has these keys:
   - entity_name: The name of the entity type (from the entities list)
   - entity_value: The exact text extracted from the input
   - start_offset: Starting character position
   - end_offset: Ending character position


After your analysis, provide your final answer as a valid JSON array. Your output should contain ONLY the JSON array with the extracted entities, with no additional explanation or text outside the array.
"""


In [None]:
messages = [
    {
        "role": "user", 
        "content": prompt
    }
]

tool_definition = {
    "name": "locate_entity_positions_in_text",
    "description": (
        "Locate entity positions and extract surrounding contexts from a text "
        "document. Takes a text document ID and a list of entity (label, value) "
        "pairs. Returns the offsets and context of each match, along with "
        "lists of not-found entities."
    ),
    "input_schema": {
        "type": "object",
        "properties": {
            "text_id": {
                "type": "string",
                "description": "Unique identifier for the text document to search."
            },
            "entity_value_pairs": {
                "type": "array",
                "description": (
                    "List of entities to locate. Each item must contain a "
                    "'label' (e.g. PERSON, ORG) and 'value' (the text to search)."
                ),
                "items": {
                    "type": "object",
                    "properties": {
                        "label": {
                            "type": "string",
                            "description": "Entity category, e.g. PERSON, ORG."
                        },
                        "value": {
                            "type": "string",
                            "description": "Entity value text to locate."
                        }
                    },
                    "required": ["label", "value"]
                }
            },
            "context_chars": {
                "type": "integer",
                "description": (
                    "Number of characters of surrounding context to include "
                    "for each entity match. Default is 20."
                ),
                "default": 20
            }
        },
        "required": ["text_id", "entity_value_pairs"]
    }
}


In [None]:
data = {
    "model": "claude-sonnet-4-20250514",
    "max_tokens": 4096,
    "tools": [tool_definition],
    "tool_choice": {"type": "tool", "name": "locate_entity_positions_in_text"},
    "messages": messages
}

claude_url = "https://api.anthropic.com/v1/messages"
headers = {
    "Content-Type": "application/json",
    "x-api-key": claude_api_key,
    "anthropic-version": "2023-06-01"
}

# First API call
response = requests.post(claude_url, headers=headers, data=json.dumps(data))
response_data = response.json()

if 'error' in response_data:
    raise Exception(f"Claude API error: {response_data['error']}")

# Loop to handle multiple tool calls (max 3 times)
max_tool_calls = 3
tool_call_count = 0
while tool_call_count < max_tool_calls:
    # Check if Claude wants to use a tool
    content = response_data.get('content', [])
    tool_use = None
    if not content:
        break
    
    # Find tool_use block if it exists
    tool_use = None
    block = content[-1]
    if block.get('type') == 'tool_use':
        tool_use = block

    
    if not tool_use:
        # No tool use, extract and print text response
        if block.get('type') == 'text':
            print(block['text'])
        break
    
    # Execute the tool
    print("CALLING TOOL")
    
    tool_call_count += 1
    func_name = tool_use['name']
    input_params = tool_use['input']
    
    # Call the actual function
    found_contexts, not_found = eval(func_name)(**input_params)
    tool_result = {
        "found_contexts": found_contexts,
        "not_found": not_found
    }
    
    # Add assistant's tool use to messages
    messages.append({"role": "assistant", "content": content})
    
    # Add tool result to messages
    messages.append({
        "role": "user",
        "content": [{
            "type": "tool_result",
            "tool_use_id": tool_use['id'],
            "content": json.dumps(tool_result)
        }]
    })
    
    # Next API call with tool results
    data['messages'] = messages
    data['tool_choice'] = {"type": "auto"}
    response = requests.post(claude_url, headers=headers, data=json.dumps(data))
    response_data = response.json()
    
    if 'error' in response_data:
        raise Exception(f"Claude API error: {response_data['error']}")

if tool_call_count >= max_tool_calls:
    print(f"Warning: Maximum tool calls ({max_tool_calls}) reached.")
