# Deploying MiniLM on AWS Lambda for Real-Time QA
This notebook walks through packaging and deploying a MiniLM model to AWS Lambda using container images for low-latency question answering.

## Step 1: Install Dependencies and Prepare Environment

In [None]:
!pip install transformers torch --quiet
import os
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

model_name = "deepset/minilm-uncased-squad2"
os.makedirs("minilm_model", exist_ok=True)

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

tokenizer.save_pretrained("minilm_model")
model.save_pretrained("minilm_model")


## Step 2: Create `handler.py` for Lambda

In [None]:
handler_code = '''
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch, json

model = AutoModelForQuestionAnswering.from_pretrained("/var/task/minilm_model")
tokenizer = AutoTokenizer.from_pretrained("/var/task/minilm_model")

def lambda_handler(event, context):
    body = json.loads(event.get("body", "{}"))
    question = body.get("question", "")
    context_text = body.get("context", "")

    inputs = tokenizer(question, context_text, return_tensors="pt")
    outputs = model(**inputs)
    answer_start = torch.argmax(outputs.start_logits)
    answer_end = torch.argmax(outputs.end_logits) + 1
    answer = tokenizer.convert_tokens_to_string(
        tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
    )

    return {
        "statusCode": 200,
        "body": json.dumps({"answer": answer})
    }
'''

with open("minilm_model/handler.py", "w") as f:
    f.write(handler_code)


## Step 3: Create a Dockerfile for Lambda Container

In [None]:
dockerfile = '''
FROM public.ecr.aws/lambda/python:3.9

# Install dependencies
RUN pip install torch transformers --quiet

# Copy model and handler
COPY minilm_model /var/task/minilm_model

CMD ["minilm_model.handler.lambda_handler"]
'''
with open("minilm_model/Dockerfile", "w") as f:
    f.write(dockerfile)


## Step 4: Build and Push Docker Image to ECR (Manual Steps)
> These steps must be executed in a terminal with Docker and AWS CLI configured.

In [None]:
# Shell commands (to be run manually in terminal):
# aws ecr create-repository --repository-name minilm-lambda
# $(aws ecr get-login --no-include-email)
# docker build -t minilm-lambda .
# docker tag minilm-lambda:latest <aws_account_id>.dkr.ecr.<region>.amazonaws.com/minilm-lambda:latest
# docker push <aws_account_id>.dkr.ecr.<region>.amazonaws.com/minilm-lambda:latest


## Step 5: Create Lambda + API Gateway via Terraform (Reference Only)

In [None]:
terraform_code = '''
resource "aws_lambda_function" "qa_minilm_lambda" {
  function_name = "qa-minilm"
  role          = aws_iam_role.lambda_exec.arn
  package_type  = "Image"
  image_uri     = "<your_ecr_image_uri>"
  timeout       = 15
  memory_size   = 1024
}

resource "aws_apigatewayv2_api" "qa_api" {
  name          = "QAAPI"
  protocol_type = "HTTP"
}

resource "aws_apigatewayv2_integration" "qa_integration" {
  api_id           = aws_apigatewayv2_api.qa_api.id
  integration_type = "AWS_PROXY"
  integration_uri  = aws_lambda_function.qa_minilm_lambda.invoke_arn
  integration_method = "POST"
}
'''
print(terraform_code)
