Skip to content

Commit

Permalink
add download from s3 to lambda api
Browse files Browse the repository at this point in the history
  • Loading branch information
diegoquintanav committed Oct 8, 2022
1 parent d3684b7 commit 50c65be
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 9 deletions.
3 changes: 3 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ services:
image: "${DOCKER_IMAGE_LAMBDA?Variable not set}:${TAG-latest}"
environment:
- WANDB_API_KEY=${WANDB_API_KEY}
- AWS_REGION=${AWS_REGION}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
build:
context: ./services/lambda/api
networks:
Expand Down
65 changes: 56 additions & 9 deletions services/lambda/api/app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import json
import logging
import os
from typing import Dict, Union

import boto3
import numpy as np
import torch
from transformers import AutoTokenizer

logger = logging.getLogger()
logger.setLevel(logging.INFO)
Expand All @@ -11,6 +15,48 @@
CLASSES = ["World", "Sports", "Business", "Sci/Tech"]


class NewsTextClassifier:
"""Classify news text."""

def __init__(self, model_path):
self.model = torch.jit.load(model_path)
self.tokenizer_checkpoint = "distilbert-base-uncased"
self.tokenizer = AutoTokenizer.from_pretrained(
self.tokenizer_checkpoint
)

@torch.no_grad()
def predict(self, text: str) -> torch.Tensor:
tokenized_text = self.tokenizer(
text, truncation=True, return_tensors="pt"
)
# returns tokenized_text
y_pred = self.model(**tokenized_text)[0].softmax(dim=-1)
return y_pred


def load_model_from_s3(
bucket_name: str = "fsdl-model-test",
s3_filename: str = "model.pt",
) -> NewsTextClassifier:
"""Load model from S3"""
logger.info(f"Loading model from S3: {bucket_name}/{s3_filename}")

s3 = boto3.client(
"s3",
os.environ.get("AWS_REGION"),
aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
)

# upload file from local directory to s3 bucket
destination = "/tmp/model.pt"

s3.download_file(bucket_name, s3_filename, destination)

return NewsTextClassifier(destination)


def _json_str_to_dict(string_json: Union[dict, str]) -> dict:
if isinstance(string_json, str):
return json.loads(string_json)
Expand All @@ -28,27 +74,28 @@ def load_text(event_dict: dict) -> Union[str, None]:
return text


def model_predict(text: str) -> np.ndarray:
"""Returns random predictions"""
logger.info("Predicting text labels")
return rng.dirichlet(np.ones(4), size=1)


def get_predicted_labels(text: str) -> Dict[str, float]:
def get_predicted_labels(
model: NewsTextClassifier, text: str
) -> Dict[str, float]:
"""Get predictions for each label"""
predictions = model_predict(text)
predictions = model.predict(text=text).numpy()
return dict(zip(CLASSES, predictions.reshape(-1)))


def lambda_handler(event, context):

s3_model = load_model_from_s3()
event_dict = _json_str_to_dict(event)
text = load_text(event_dict)

if text is None:
return {
"statusCode": 400,
"body": {"message": "'text' not found in body of request"},
}
predictions_dict = get_predicted_labels(text=text)

predictions_dict = get_predicted_labels(model=s3_model, text=text)

return {
"statusCode": 200,
"body": json.dumps({"predicted_labels": predictions_dict}),
Expand Down

0 comments on commit 50c65be

Please sign in to comment.