In [1]:
# Step 1: Load CSV File
import pandas as pd

def load_csv(file_path):
    """Load a CSV file and return a DataFrame."""
    try:
        data = pd.read_csv(file_path)
        print("CSV loaded successfully!")
        return data
    except Exception as e:
        print(f"Error loading CSV: {e}")
        return None

# Test loading the CSV
csv_file = "sample_integration_data.csv"  # Replace with your actual CSV path
data = load_csv(csv_file)

# Drop the 'Context-Domain' column
data = data.drop(columns=['Context-Domain'], errors='ignore')

# Display the first few rows to confirm the column is dropped
print("DataFrame after dropping 'Context-Domain':")

# Display the first few rows of the data
if data is not None:
    print(data.head())

CSV loaded successfully!
DataFrame after dropping 'Context-Domain':
            Consumer             Producer Integration Type
0     Payroll System    Research Database    REST-JSON-RPC
1     Finance System  Analytics Dashboard             SFTP
2     Finance System            HR System              ETL
3  IT Support System  Analytics Dashboard             SFTP
4     Finance System     Inventory System              ETL


In [2]:
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, PointStruct

# Initialize SentenceTransformer model
vectorizer = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Set up Qdrant
def setup_vector_db(data):
    """
    Create and populate an in-memory Qdrant vector database with graph data.
    """
    try:
        # Initialize Qdrant client (in-memory)
        client = QdrantClient(":memory:")
        
        # Define vector collection schema
        collection_name = "systems"
        if client.get_collections().collections:
            client.delete_collection(collection_name)
        
        client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=384, distance="Cosine")
        )

        # Populate the database
        points = []
        for idx, row in data.iterrows():
            # Create a descriptive text for the relationship
            interaction_text = f"{row['Consumer']} interacts with {row['Producer']} via {row['Integration Type']}"
            
            # Encode text into a vector
            embedding = vectorizer.encode(interaction_text).tolist()
            
            # Add point with metadata
            points.append(
                PointStruct(
                    id=idx,  # Unique ID
                    vector=embedding,
                    payload={
                        "consumer": row['Consumer'],
                        "producer": row['Producer'],
                        "integration": row['Integration Type']
                    }
                )
            )
        
        # Upsert points into Qdrant
        client.upsert(collection_name=collection_name, points=points)
        print(f"Vector database populated with {len(points)} points!")
        return client
    except Exception as e:
        print(f"Error setting up vector database: {e}")
        return None

# Set up the vector database
vector_db_client = setup_vector_db(data)

Vector database populated with 100 points!


In [3]:
def query_vector_db(client, query, role="consumer", top_k=5):
    """
    Query the Qdrant vector database for relevant interactions.
    
    Args:
        client (QdrantClient): Qdrant client instance.
        query (str): Search query (e.g., "HR system").
        role (str): Specify whether to prioritize as 'consumer' or 'producer'.
        top_k (int): Number of top results to retrieve.
    
    Returns:
        list: Relevant interactions from the database.
    """
    try:
        # Encode the query into a vector
        query_embedding = vectorizer.encode(query).tolist()
        
        # Perform a similarity search in Qdrant
        results = client.search(
            collection_name="systems",
            query_vector=query_embedding,
            limit=top_k
        )
        
        # Filter results based on the role (consumer/producer)
        filtered_results = []
        for result in results:
            if role == "consumer" and query in result.payload["consumer"]:
                filtered_results.append(result.payload)
            elif role == "producer" and query in result.payload["producer"]:
                filtered_results.append(result.payload)
        
        return filtered_results
    except Exception as e:
        print(f"Error querying vector database: {e}")
        return []

# Example Query
query = "HR"
role = "consumer"  # Can be "consumer" or "producer"
retrieved_context = query_vector_db(vector_db_client, query, role=role, top_k=5)

# Display Results
print(f"Results for query '{query}' as a {role}:")
for context in retrieved_context:
    print(context)

Results for query 'HR' as a consumer:
{'consumer': 'HR System', 'producer': 'Admissions Portal', 'integration': 'REST-API'}
{'consumer': 'HR System', 'producer': 'Admissions Portal', 'integration': 'REST-API'}
{'consumer': 'HR System', 'producer': 'Finance System', 'integration': 'REST-API'}
{'consumer': 'HR System', 'producer': 'Finance System', 'integration': 'REST-API'}
{'consumer': 'HR System', 'producer': 'Payroll System', 'integration': 'SFTP'}


In [4]:
def interpret_query(query):
    """
    Interpret a natural language query to extract the entity and role.
    
    Args:
        query (str): User's query.
    
    Returns:
        tuple: Extracted entity and role (e.g., ('HR', 'consumer')).
    """
    query = query.lower()  # Normalize to lowercase for consistent matching

    # Check if 'consumer', 'producer', or 'integration' is mentioned
    if "consumer" in query:
        role = "consumer"
        entity = query.split("consumer")[0].strip().split()[-1]  # Extract entity before 'consumer'
    elif "producer" in query:
        role = "producer"
        entity = query.split("producer")[0].strip().split()[-1]  # Extract entity before 'producer'
    elif "integration" in query:
        role = None  # No direct consumer/producer context
        entity = query.split("integration")[0].strip().split()[-1]  # Extract entity before 'integration'
    else:
        # Default case: assume the last word as the entity and 'consumer' as the role
        role = "consumer"
        entity = query.strip().split()[-1]

    return entity, role

# Example query
test_query = "What are the systems HR is a producer for?"
entity, role = interpret_query(test_query)
print(f"Extracted Entity: {entity}, Role: {role}")

Extracted Entity: a, Role: producer


In [5]:
# Query the vector database with a natural language input
test_query = "What are the systems HR is a producer for?"
entity, role = interpret_query(test_query)

# Perform the query
retrieved_context = query_vector_db(vector_db_client, entity, role=role, top_k=5)

# Display results
print(f"Results for query '{test_query}':")
for context in retrieved_context:
    print(context)

Results for query 'What are the systems HR is a producer for?':
{'consumer': 'Analytics Dashboard', 'producer': 'Admissions Portal', 'integration': 'Event-Queue'}
{'consumer': 'Inventory System', 'producer': 'Research Database', 'integration': 'REST-API'}


In [6]:
def create_prompt(query, context):
    """
    Create a prompt for AWS Bedrock to ensure a structured JSON response.

    Args:
        query (str): User's query.
        context (list): List of interactions retrieved from Qdrant.

    Returns:
        str: Formatted prompt.
    """
    formatted_context = "\n".join([
        f"Consumer: {item['consumer']}, Producer: {item['producer']}, Integration: {item['integration']}"
        for item in context
    ])

    prompt = f"""
<s>[INST] <<SYS>>
You are a JSON generation assistant. Based on the query and context provided, generate a JSON object with the following structure:
{{
    "nodes": [
        {{"id": "System A"}},
        ...
    ],
    "edges": [
        {{"consumer": "System A", "producer": "System B", "integration": "Type"}},
        ...
    ]
}}
Do not include any text outside of the JSON object.

Query: {query}
Context:
{formatted_context}
</SYS> </INST>
"""
    return prompt

In [16]:
import os
import boto3

# Load environment variables
BEDROCK_INFERENCE_PROFILE_ARN = os.getenv("BEDROCK_INFERENCE_PROFILE_ARN")
AWS_BEDROCK_AGENT_ARN = os.getenv("AWS_BEDROCK_AGENT_ARN")
AWS_REGION = os.getenv("AWS_REGION", "us-east-1")  # Default to us-east-1

# Validate environment variables
assert BEDROCK_INFERENCE_PROFILE_ARN, "BEDROCK_INFERENCE_PROFILE_ARN is not set"
assert AWS_BEDROCK_AGENT_ARN, "AWS_BEDROCK_AGENT_ARN is not set"

# Initialize Bedrock Runtime client
bedrock_client_runtime = boto3.client(
    "bedrock-runtime",
    region_name=AWS_REGION,
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
)

print("Bedrock Runtime client initialized successfully!")



Bedrock Runtime client initialized successfully!


In [20]:
import os

# Load environment variables
AWS_REGION = os.getenv("AWS_REGION", "us-east-2")
AWS_AGENT_ID = os.getenv("AWS_AGENT_ID")
AWS_AGENT_ALIAS_ID = os.getenv("AWS_BEDROCK_AGENT_ALIAS_ID")

# Validate environment variables
assert AWS_AGENT_ID, "AWS_AGENT_ID is not set"
assert AWS_AGENT_ALIAS_ID, "AWS_BEDROCK_AGENT_ALIAS_ID is not set"

# Initialize Bedrock Agent Runtime client
bedrock_agent_runtime_client = boto3.client(
    service_name="bedrock-agent-runtime",
    region_name=AWS_REGION,
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
)

print("Bedrock Agent Runtime client initialized successfully!")

Bedrock Agent Runtime client initialized successfully!


In [28]:
def invoke_agent_with_runtime(query, context):
    """
    Invoke the Bedrock agent using agent ID and alias ID.

    Args:
        query (str): The user's query.
        context (list): Contextual information for the agent.

    Returns:
        dict: The response from the agent.
    """
    try:
        # Format context into a readable string
        formatted_context = "\n".join([
            f"Consumer: {item['consumer']}, Producer: {item['producer']}, Integration: {item['integration']}"
            for item in context
        ])
        
        # Construct input for the agent
        input_text = f"Query: {query}\nContext:\n{formatted_context}"

        # Log input
        print("Input Text Sent to Agent:")
        print(input_text)

        # Invoke the agent
        response = bedrock_agent_runtime_client.invoke_agent(
            agentId=AWS_AGENT_ID,
            agentAliasId=AWS_AGENT_ALIAS_ID,
            sessionId="test-session",
            inputText=input_text
        )

        # Log raw response
        print("Raw Agent Response Metadata:")
        print(response.get("ResponseMetadata"))

        # Process the EventStream
        completion = response.get("completion")
        agent_output = ""
        for event in completion:
            chunk = event["chunk"]
            agent_output += chunk.decode("utf-8")  # Decode the byte stream into a string

        # Log the extracted agent output
        print("Extracted Agent Output:")
        print(agent_output)

        # Return the final processed response
        return agent_output

    except Exception as e:
        print(f"Error invoking agent: {e}")
        return None

In [29]:
# Example query and context
test_query = "What systems does HR interact with?"
test_context = [
    {"consumer": "HR", "producer": "Payroll System", "integration": "API"},
    {"consumer": "HR", "producer": "Attendance System", "integration": "Database"}
]

# Invoke the agent
response = invoke_agent_with_runtime(test_query, test_context)

# Print the response
print("Agent Response:")
print(response)

Input Text Sent to Agent:
Query: What systems does HR interact with?
Context:
Consumer: HR, Producer: Payroll System, Integration: API
Consumer: HR, Producer: Attendance System, Integration: Database
Raw Agent Response Metadata:
{'RequestId': '783d2493-d0ab-44e3-be7f-141054513341', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Wed, 11 Dec 2024 20:14:22 GMT', 'content-type': 'application/vnd.amazon.eventstream', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'x-amzn-requestid': '783d2493-d0ab-44e3-be7f-141054513341', 'x-amz-bedrock-agent-session-id': 'test-session', 'x-amzn-bedrock-agent-content-type': 'application/json'}, 'RetryAttempts': 0}
Error invoking agent: An error occurred (validationException) when calling the InvokeAgent operation: The provided model identifier is invalid.
Agent Response:
None
