In [1]:
!pip install torch
from sagemaker.pytorch import PyTorch as torch
from sagemaker.huggingface import HuggingFaceModel
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting torch
  Using cached torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch)
  Using cached nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch)
  Using cached nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)
Collecting nvidia-cuda-cupti-cu11==11.7.101 (from torch)
  Using cached nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)
Collecting nvidia-cudnn-cu11==8.5.0.96 (from torch)
  Using cached nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)
Collecting nvidia-cublas-cu11==11.10.3.66 (from torch)
  Using cached nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)
Collecting nvidia-cufft-cu11==10.9.0.58 (from torch)
  Using cached nvidia_cufft_cu11-10.9.0.58-py3-none-manyli

In [2]:
# For notebook instances (Amazon Linux)
!sudo yum update -y 
!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash
!sudo yum install git-lfs git -y


!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash
!sudo yum-config-manager --add-repo https://packagecloud.io/github/git-lfs/el/7/x86_64
!sudo yum install -y git-lfs --nogpgcheck

Loaded plugins: dkms-build-requires, extras_suggestions, langpacks, priorities,
              : update-motd, versionlock
amzn2-core                                               | 3.7 kB     00:00     
amzn2extra-docker                                        | 3.0 kB     00:00     
amzn2extra-kernel-5.10                                   | 3.0 kB     00:00     
amzn2extra-python3.8                                     | 3.0 kB     00:00     
centos-extras                                            | 2.9 kB     00:00     
copr:copr.fedorainfracloud.org:vbatts:shadow-utils-newxi | 3.3 kB     00:00     
https://download.docker.com/linux/centos/2/x86_64/stable/repodata/repomd.xml: [Errno 14] HTTPS Error 404 - Not Found
Trying other mirror.
libnvidia-container/x86_64/signature                     |  833 B     00:00     
libnvidia-container/x86_64/signature                     | 2.1 kB     00:00 !!! 
neuron                                                   | 2.9 kB     00:00     
(1/2): libnv

In [3]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::390771086433:role/service-role/AmazonSageMaker-ExecutionRole-20230605T175839
sagemaker bucket: sagemaker-us-east-1-390771086433
sagemaker session region: us-east-1


In [6]:
!mkdir code

In [7]:
%%writefile code/inference.py

import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch

def model_fn(model_dir):
    # Load model from HuggingFace Hub
    processor = BlipProcessor.from_pretrained(model_dir)
    model = BlipForConditionalGeneration.from_pretrained(model_dir)
    
    return model, processor

def predict_fn(data, model_and_processor):
    # Destruct model and tokenizer
    model, processor = model_and_processor
    
    # Check if 'inputs' key exists in the dictionary
    if 'inputs' in data:
        inputs = data['inputs']
        # Extract 'img_url' and 'text'
        img_url = inputs.get('img_url')
        text = inputs.get('text')
        max_new_tokens = inputs.get('max_new_tokens', 20)
        skip_special_tokens = inputs.get('skip_special_tokens', True) 
        # Raise error if 'img_url' is missing
        if img_url is None:
            raise ValueError("Dictionary is missing 'img_url' key. It should be formatted as {'inputs' : {'img_url' : '<URL>', 'text': '<Text>' }}")
    else:
        raise ValueError("Dictionary is missing 'inputs' key. It should be formatted as {'inputs' : {'img_url' : '<URL>', 'text': '<Text>' }}")
    
    # Load the image
    raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')

    # Conditional image captioning
    if text:
        inputs = processor(raw_image, text, return_tensors="pt")
        out = model.generate(**inputs, max_new_tokens=max_new_tokens)
        caption = {'generated text' : processor.decode(out[0], skip_special_tokens=skip_special_tokens)}
    else:
        # Unconditional image captioning
        inputs = processor(raw_image, return_tensors="pt")
        out = model.generate(**inputs, max_new_tokens=max_new_tokens)
        caption = {'generated text' : processor.decode(out[0], skip_special_tokens=skip_special_tokens)}
        
    return caption



Writing code/inference.py


In [8]:
repository = "Salesforce/blip-image-captioning-base"
model_id=repository.split("/")[-1]
s3_location=f"s3://{sess.default_bucket()}/custom_inference/{model_id}/model.tar.gz"

In [9]:
!git lfs install
!git clone https://huggingface.co/$repository

Git LFS initialized.
Cloning into 'blip-image-captioning-base'...
remote: Enumerating objects: 70, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 70 (delta 6), reused 15 (delta 5), pack-reused 52[K
Unpacking objects: 100% (70/70), 324.78 KiB | 7.92 MiB/s, done.
Filtering content: 100% (2/2), 1.84 GiB | 54.79 MiB/s, done.


In [10]:
!cp -r code/ $model_id/code/


In [11]:
%cd $model_id
!tar zcvf model.tar.gz *

/home/ec2-user/SageMaker/blip-image-captioning-base
code/
code/inference.py
config.json
preprocessor_config.json
pytorch_model.bin
README.md
special_tokens_map.json
tf_model.h5
tokenizer_config.json
tokenizer.json
vocab.txt


In [12]:
!aws s3 cp model.tar.gz $s3_location

upload: ./model.tar.gz to s3://sagemaker-us-east-1-390771086433/custom_inference/blip-image-captioning-base/model.tar.gz


In [16]:
from sagemaker.huggingface.model import HuggingFaceModel


# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=s3_location,       # path to your model and script
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.26",  # transformers version used
   pytorch_version="1.13",        # pytorch version used
   py_version='py39',            # python version used
)

# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g4dn.xlarge"
    )

--------!

In [20]:
data = {
    "inputs": {
        "img_url": "https://cdn.pixabay.com/photo/2015/04/23/22/00/tree-736885_960_720.jpg",
        "text" : "An image of ",
    }
}

res = predictor.predict(data=data)
print(res)

{'generated text': 'an image of a tree in the middle of'}


In [21]:
import boto3
import json

# Create a SageMaker runtime client
client = boto3.client('sagemaker-runtime')

# Specify the SageMaker endpoint name
endpoint_name = "huggingface-pytorch-inference-2023-06-19-19-51-40-819"

# Provide the payload you want to use for prediction
data = {
    "inputs": {
        "img_url": "https://cdn.pixabay.com/photo/2015/04/23/22/00/tree-736885_960_720.jpg",
        "text" : "An image of ",
    }
}
payload = json.dumps(data)

# Specify the content type and accept headers
content_type = "application/json"
accept = "application/json"

# Invoke the endpoint
response = client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Accept=accept,
    Body=payload
)

# Print the prediction result
print(response['Body'].read().decode())

{"generated text":"an image of a tree in the middle of a lake"}
