# LLM-SLM Hybrid QA Router
This notebook demonstrates a routing mechanism using AWS Lambda to dynamically direct user questions to either a lightweight SLM model or a more powerful LLM endpoint depending on question complexity.

## Step 1: Define Simple Routing Logic Based on Question Complexity

In [None]:
def classify_question(question: str) -> str:
    if "how" in question.lower() or len(question.split()) > 15:
        return "LLM"
    else:
        return "SLM"

# Example
sample_qs = [
    "What is the capital of France?",
    "How does AWS SageMaker work with Transformers models for QA tasks?"
]

for q in sample_qs:
    print(f"Question: {q} -> Route: {classify_question(q)}")


## Step 2: Define Lambda Handler for Hybrid Routing

In [None]:
import json
import boto3
import torch
from transformers import AutoModelForQuestionAnswering, AutoTokenizer

# Load SLM model in Lambda container
tokenizer_slm = AutoTokenizer.from_pretrained("deepset/minilm-uncased-squad2")
model_slm = AutoModelForQuestionAnswering.from_pretrained("deepset/minilm-uncased-squad2")

# Optional: mock Bedrock/GPT-4 response for demonstration
def mock_bedrock_response(question, context):
    return f"[LLM Answer] for: {question}"

def lambda_handler(event, context):
    body = json.loads(event.get("body", "{}"))
    question = body.get("question")
    context_text = body.get("context")

    if classify_question(question) == "SLM":
        inputs = tokenizer_slm(question, context_text, return_tensors='pt')
        outputs = model_slm(**inputs)
        answer_start = torch.argmax(outputs.start_logits)
        answer_end = torch.argmax(outputs.end_logits) + 1
        answer = tokenizer_slm.convert_tokens_to_string(
            tokenizer_slm.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
        )
    else:
        # Call to Bedrock or other LLM here
        answer = mock_bedrock_response(question, context_text)

    return {
        "statusCode": 200,
        "body": json.dumps({"answer": answer})
    }


## Step 3: (Optional) Replace Mock with Real Bedrock Call

In [None]:
# Example Bedrock client call (works only if you have permissions and access):
# bedrock = boto3.client("bedrock-runtime")
# response = bedrock.invoke_model(
#     modelId="anthropic.claude-v2",
#     contentType="application/json",
#     body=json.dumps({"prompt": f"Answer this question: {question} \nContext: {context}"})
# )
# result = json.loads(response['body'].read().decode())
# return result['completion']


## Step 4: Deploying the Lambda Router (Terraform Reference)

In [None]:
terraform_code = '''
resource "aws_lambda_function" "qa_router_lambda" {
  function_name = "qa-router"
  role          = aws_iam_role.lambda_exec.arn
  package_type  = "Image"
  image_uri     = "<ecr_image_with_router_logic>"
  timeout       = 15
  memory_size   = 1024
}
'''
print(terraform_code)
