In [1]:
# Import necessary libraries
import os
import boto3
import json
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Load environment variables
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_REGION = os.getenv("AWS_REGION")
BEDROCK_INFERENCE_PROFILE_ARN = os.getenv("BEDROCK_INFERENCE_PROFILE_ARN")

# Validate environment variables
assert AWS_ACCESS_KEY_ID, "AWS_ACCESS_KEY_ID is not set"
assert AWS_SECRET_ACCESS_KEY, "AWS_SECRET_ACCESS_KEY is not set"
assert AWS_REGION, "AWS_REGION is not set"
assert BEDROCK_INFERENCE_PROFILE_ARN, "BEDROCK_INFERENCE_PROFILE_ARN is not set"

# Initialize AWS Bedrock client
bedrock = boto3.client(
    service_name="bedrock-runtime",
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=AWS_REGION,
)

logging.info("AWS Bedrock client initialized successfully.")

2024-12-07 11:41:13,915 - INFO - AWS Bedrock client initialized successfully.


In [8]:
def create_prompt(query, context):
    """
    Create a prompt for AWS Bedrock to ensure a single JSON response.

    Args:
        query (str): User's query.
        context (list): List of interactions (source, target, integration, context).

    Returns:
        str: Formatted prompt for Bedrock.
    """
    formatted_context = "\n".join([
        f"Source: {item['Source']}, Target: {item['Target']}, Integration: {item['Integration']}, Context: {item['Context']}"
        for item in context
    ])

    prompt = f"""
<s>[INST] <<SYS>>
You are a structured data generation assistant. Your task is to generate a single cohesive JSON response based on the given context and query.

Rules:
1. The response must be a single JSON object with the following structure:
   {{
       "nodes": [
           {{"id": "System Name"}},
           ...
       ],
       "edges": [
           {{"source": "System A", "target": "System B", "integration": "Type", "context": "Details"}},
           ...
       ]
   }}
2. Do not split the JSON into multiple sections.
3. Do not include any text outside the JSON object.
4. Deduplicate nodes and edges in the response.

Query: What are the systems that interact with "{query}"?

Context:
{formatted_context}

Response:
</SYS> </INST>
"""
    return prompt

In [3]:
# Define a function to test AWS Bedrock
def test_bedrock_connection(prompt):
    """
    Test AWS Bedrock connection by sending a prompt and receiving a response.

    Args:
        prompt (str): The prompt to send to AWS Bedrock.

    Returns:
        dict: The response from AWS Bedrock.
    """
    try:
        logging.info("Sending prompt to AWS Bedrock...")
        response = bedrock.invoke_model(
            body=json.dumps({
                "prompt": prompt,
                "max_gen_len": 1500,
                "temperature": 0.1,
                "top_p": 0.9
            }),
            modelId=BEDROCK_INFERENCE_PROFILE_ARN,
            contentType="application/json",
            accept="application/json",
        )

        response_body = json.loads(response["body"].read())
        logging.info("Received response from AWS Bedrock.")
        return response_body
    except Exception as e:
        logging.error(f"Error invoking AWS Bedrock: {e}")
        raise

In [10]:
import json

def extract_json_from_response(response):
    """
    Extract valid JSON from Bedrock's response, either directly or using delimiters.

    Args:
        response (dict): The response from AWS Bedrock.

    Returns:
        dict: Extracted JSON object.
    """
    try:
        # Get the raw response text
        response_text = response.get("generation", "").strip()
        logging.info(f"Raw Bedrock Response Text: {response_text}")

        # First, try parsing the response as direct JSON
        try:
            parsed_response = json.loads(response_text)
            logging.info("Parsed direct JSON response successfully.")
            return parsed_response
        except json.JSONDecodeError:
            logging.info("Response is not direct JSON, falling back to delimiter-based parsing.")

        # Use fallback parsing for responses with delimiters
        start_index = response_text.find('{')
        end_index = response_text.rfind('}')
        if start_index == -1 or end_index == -1:
            raise ValueError("Valid JSON object not found in the response.")

        # Extract JSON string and parse it
        json_string = response_text[start_index:end_index + 1].strip()
        return json.loads(json_string)

    except Exception as e:
        logging.error(f"Error extracting JSON: {e}")
        raise

In [12]:
def pretty_print_json(json_data):
    """
    Pretty print a JSON object.
    
    Args:
        json_data (dict): JSON object to pretty print.
    
    Returns:
        None
    """
    try:
        pretty_json = json.dumps(json_data, indent=4)
        print(pretty_json)
    except Exception as e:
        print(f"Error in pretty printing JSON: {e}")

# Example usage
extracted_json = {
    "nodes": [
        {"id": "Finance System"}
    ],
    "edges": [
        {"source": "Finance System", "target": "Inventory System", "integration": "ETL", "context": "Finance"},
        {"source": "Finance System", "target": "HR System", "integration": "CDC-Stream", "context": "Alumni Relations"},
        {"source": "Finance System", "target": "CRM System", "integration": "ETL", "context": "Finance"}
    ]
}

pretty_print_json(extracted_json)

{
    "nodes": [
        {
            "id": "Finance System"
        }
    ],
    "edges": [
        {
            "source": "Finance System",
            "target": "Inventory System",
            "integration": "ETL",
            "context": "Finance"
        },
        {
            "source": "Finance System",
            "target": "HR System",
            "integration": "CDC-Stream",
            "context": "Alumni Relations"
        },
        {
            "source": "Finance System",
            "target": "CRM System",
            "integration": "ETL",
            "context": "Finance"
        }
    ]
}


In [14]:
# Define test query and context
test_query = "What are the systems that interact with Finance System?"
test_context = [
    {"Source": "Finance System", "Target": "Inventory System", "Integration": "ETL", "Context": "Finance"},
    {"Source": "Finance System", "Target": "HR System", "Integration": "CDC-Stream", "Context": "Alumni Relations"},
    {"Source": "Finance System", "Target": "CRM System", "Integration": "ETL", "Context": "Finance"}
]

# Create the prompt
test_prompt = create_prompt(test_query, test_context)

# Test Bedrock connection and extract JSON
try:
    response = test_bedrock_connection(test_prompt)
    extracted_json = extract_json_from_response(response)
    logging.info(f"Extracted JSON: {extracted_json}")

    # Pretty print the extracted JSON
    pretty_print_json(extracted_json)

except Exception as e:
    logging.error(f"Test failed: {e}")

2024-12-07 11:50:35,459 - INFO - Sending prompt to AWS Bedrock...
2024-12-07 11:50:36,741 - INFO - Received response from AWS Bedrock.
2024-12-07 11:50:36,741 - INFO - Raw Bedrock Response Text: ```json
{
  "nodes": [
    {"id": "Finance System"}
  ],
  "edges": [
    {"source": "Finance System", "target": "Inventory System", "integration": "ETL", "context": "Finance"},
    {"source": "Finance System", "target": "HR System", "integration": "CDC-Stream", "context": "Alumni Relations"},
    {"source": "Finance System", "target": "CRM System", "integration": "ETL", "context": "Finance"}
  ]
}
```
2024-12-07 11:50:36,741 - INFO - Response is not direct JSON, falling back to delimiter-based parsing.
2024-12-07 11:50:36,741 - INFO - Extracted JSON: {'nodes': [{'id': 'Finance System'}], 'edges': [{'source': 'Finance System', 'target': 'Inventory System', 'integration': 'ETL', 'context': 'Finance'}, {'source': 'Finance System', 'target': 'HR System', 'integration': 'CDC-Stream', 'context': 'Al

{
    "nodes": [
        {
            "id": "Finance System"
        }
    ],
    "edges": [
        {
            "source": "Finance System",
            "target": "Inventory System",
            "integration": "ETL",
            "context": "Finance"
        },
        {
            "source": "Finance System",
            "target": "HR System",
            "integration": "CDC-Stream",
            "context": "Alumni Relations"
        },
        {
            "source": "Finance System",
            "target": "CRM System",
            "integration": "ETL",
            "context": "Finance"
        }
    ]
}


In [15]:
# Simulated Bedrock response
bedrock_response = {
    "generation": """
    ## Step 1: Identify the query and context
    The query is "What are the systems that interact with Finance System?"
    ...

    The final answer is:
    {
      "nodes": [
        {"id": "Finance System"},
        {"id": "Inventory System"},
        {"id": "HR System"},
        {"id": "CRM System"}
      ],
      "edges": [
        {"source": "Finance System", "target": "Inventory System", "integration": "ETL", "context": "Finance"},
        {"source": "Finance System", "target": "HR System", "integration": "CDC-Stream", "context": "Alumni Relations"},
        {"source": "Finance System", "target": "Inventory System", "integration": "SFTP", "context": "Student Services"},
        {"source": "Finance System", "target": "CRM System", "integration": "ETL", "context": "Finance"}
      ]
    }
    """
}

In [16]:
import json
import re

def extract_json_from_response(response):
    """
    Extract valid JSON from Bedrock's response, handling explanatory text.

    Args:
        response (dict): The response from AWS Bedrock.

    Returns:
        dict: Extracted JSON object.
    """
    try:
        # Get the raw response text
        response_text = response.get("generation", "").strip()
        print(f"Raw Bedrock Response Text:\n{response_text}\n")

        # Use regex to extract the JSON object between curly braces
        json_match = re.search(r"\{.*\}", response_text, re.DOTALL)
        if not json_match:
            raise ValueError("Valid JSON object not found in the response.")

        # Parse the extracted JSON
        json_string = json_match.group(0).strip()
        return json.loads(json_string)

    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        raise ValueError("Error parsing JSON from response.")
    except Exception as e:
        print(f"Error extracting JSON: {e}")
        raise

In [17]:
try:
    # Test the function with the simulated response
    extracted_json = extract_json_from_response(bedrock_response)
    print("Extracted JSON:")
    print(json.dumps(extracted_json, indent=4))
except Exception as e:
    print(f"Test failed: {e}")

Raw Bedrock Response Text:
## Step 1: Identify the query and context
    The query is "What are the systems that interact with Finance System?"
    ...

    The final answer is:
    {
      "nodes": [
        {"id": "Finance System"},
        {"id": "Inventory System"},
        {"id": "HR System"},
        {"id": "CRM System"}
      ],
      "edges": [
        {"source": "Finance System", "target": "Inventory System", "integration": "ETL", "context": "Finance"},
        {"source": "Finance System", "target": "HR System", "integration": "CDC-Stream", "context": "Alumni Relations"},
        {"source": "Finance System", "target": "Inventory System", "integration": "SFTP", "context": "Student Services"},
        {"source": "Finance System", "target": "CRM System", "integration": "ETL", "context": "Finance"}
      ]
    }

Extracted JSON:
{
    "nodes": [
        {
            "id": "Finance System"
        },
        {
            "id": "Inventory System"
        },
        {
            "