In [23]:
#Deploying a -AWS SageMaker real-time endpoints with HuggingFace Embedding Models

# !pip install huggingface_hub
# !pip install langchain
# !pip install chromadb
# !pip install InstructorEmbedding
# !pip install sentence_transformers
# !pip install unstructured



# # Document references ::

# [1] https://www.philschmid.de/custom-inference-huggingface-sagemaker
# [2] https://medium.com/@domemue/deploy-bge-embedding-models-via-aws-sagemaker-8e8bbe08b558
# [3] http://webcache.googleusercontent.com/search?q=cache:https://medium.com/@ryanntk/deploying-hugging-face-embedding-models-on-aws-sagemaker-a-comprehensive-tutorial-with-langchain-af8e0b405b51&sca_esv=574740318&strip=1&vwsrc=0

Collecting langchain
  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/e0/b9/600ac0fccd1dcfa6c56a45210a08724f293602338b520044a2b209f5991d/langchain-0.0.318-py3-none-any.whl.metadata
  Downloading langchain-0.0.318-py3-none-any.whl.metadata (15 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Obtaining dependency information for SQLAlchemy<3,>=1.4 from https://files.pythonhosted.org/packages/26/54/6f2a9b21a9dc921181ae1084c35391c51b57daa11f88c830332a69298a62/SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading SQLAlchemy-2.0.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.4 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Obtaining dependency information for aiohttp<4.0.0,>=3.8.3 from https://files.pythonhosted.org/packages/41/8e/4c48881316bbced3d13089c4d0df4be321ce79a0c695d82dee9996aaf56b/aiohttp-3.8.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.

In [24]:
# # For notebook instances (Amazon Linux)
# !sudo yum update -y
# !curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash
# !sudo yum install git-lfs git -y



In [39]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::718735351344:role/SagmakerPublicInternetOnlyRole
sagemaker bucket: sagemaker-us-east-1-718735351344
sagemaker session region: us-east-1


In [40]:
# Hub Model configuration. https://huggingface.co/models
HF_MODEL_ID = 'hkunlp/instructor-large'

In [27]:
# from InstructorEmbedding import INSTRUCTOR

# model = INSTRUCTOR('hkunlp/instructor-large')
# sentence = "3D ActionSLAM: wearable person tracking in multi-floor environments"
# instruction = "Represent the Science title:"
# embeddings = model.encode([[instruction,sentence]])
# print(embeddings)

In [41]:
# Custom Model Packaging (with custom inference.py)
import os
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory
from huggingface_hub import snapshot_download

model_tar_dir = Path(HF_MODEL_ID.split("/")[-1])

# delete model dir if exist
if os.path.exists(model_tar_dir) and os.path.isdir(model_tar_dir):
    shutil.rmtree(model_tar_dir)

# setup temporary directory
with TemporaryDirectory() as tmpdir:
    # download snapshot
    snapshot_dir = snapshot_download(repo_id=HF_MODEL_ID, cache_dir=tmpdir)
    print(os.listdir(snapshot_dir))
    # copy snapshot to model dir
    print(snapshot_dir + " -> " + str(model_tar_dir))
    shutil.copytree(snapshot_dir, str(model_tar_dir))

Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

(…)3b07dc819fb15c7233/1_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

(…)443b07dc819fb15c7233/2_Dense/config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

(…)de506e59443b07dc819fb15c7233/config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

(…)e506e59443b07dc819fb15c7233/modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

(…)06e59443b07dc819fb15c7233/.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

(…)84de506e59443b07dc819fb15c7233/README.md:   0%|          | 0.00/66.3k [00:00<?, ?B/s]

(…)5c7233/config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

(…)dc819fb15c7233/sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.15M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

(…)3b07dc819fb15c7233/tokenizer_config.json:   0%|          | 0.00/2.41k [00:00<?, ?B/s]

(…)07dc819fb15c7233/special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

(…)06e59443b07dc819fb15c7233/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

['1_Pooling', '2_Dense', 'modules.json', 'config.json', '.gitattributes', 'config_sentence_transformers.json', 'README.md', 'sentence_bert_config.json', 'tokenizer_config.json', 'special_tokens_map.json', 'spiece.model', 'tokenizer.json', 'pytorch_model.bin']
/tmp/tmpnhk_9sl8/models--hkunlp--instructor-large/snapshots/54e5ffb8d484de506e59443b07dc819fb15c7233 -> instructor-large


In [42]:
!mkdir code

In [43]:
%%writefile code/inference.py
from InstructorEmbedding import INSTRUCTOR

def model_fn(model_dir):
  model = INSTRUCTOR(model_dir)
  return model

def predict_fn(data, model):
  embeddings = model.encode(data)
  return {"vector_embedding": embeddings.tolist()}

Writing code/inference.py


In [44]:
%%writefile code/requirements.txt 
InstructorEmbedding
sentence_transformers

Writing code/requirements.txt


In [45]:
#Copy the custom inference script into the model directory
import shutil

code_in_model_dir = str(model_tar_dir.joinpath("code"))

# delete code dir in model dir if exist
if os.path.exists(code_in_model_dir) and os.path.isdir(code_in_model_dir):
    shutil.rmtree(code_in_model_dir)
    
# copy code/bge_base/ to model dir
shutil.copytree("code", code_in_model_dir)

'instructor-large/code'

In [46]:
## Create Archive
import tarfile
import os

# helper to create the model.tar.gz
def compress(tar_dir=None,output_file="model.tar.gz"):
    parent_dir=os.getcwd()
    os.chdir(tar_dir)
    with tarfile.open(os.path.join(parent_dir, output_file), "w:gz") as tar:
        for item in os.listdir('.'):
          print(item)
          tar.add(item, arcname=item)
    os.chdir(parent_dir)

compress(str(model_tar_dir))

config_sentence_transformers.json
code
spiece.model
special_tokens_map.json
tokenizer.json
1_Pooling
README.md
sentence_bert_config.json
config.json
tokenizer_config.json
modules.json
pytorch_model.bin
.gitattributes
2_Dense


In [47]:
# Define the endpoint name for Sagemaker
endpoint_name = "instructor-large"

In [None]:
#Upload the packaged model

# Upload Model
from sagemaker.s3 import S3Uploader

# upload model.tar.gz to s3
s3_model_uri = S3Uploader.upload(local_path="model.tar.gz", desired_s3_uri=f"s3://{sess.default_bucket()}/{endpoint_name}")

print(f"model uploaded to: {s3_model_uri}")

In [None]:
# embeddings_model_endpoint_name = endpoint_name
# content_handler = ContentHandler()
# embeddings_endpoint = create_sagemaker_embeddings_from_js_model(endpoint_name, 'us-east-1')

In [50]:
#Create a custom Huggingface Model

from sagemaker.huggingface.model import HuggingFaceModel
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=s3_model_uri,       # path to your model and script
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.26",  # transformers version used
   pytorch_version="1.13",        # pytorch version used
   py_version='py39',            # python version used
)
# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    endpoint_name=endpoint_name
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
-----!

In [51]:
predictor.predict("Thank you for contacting AWS support")

{'vector_embedding': [-0.06406575441360474,
  -0.010961291380226612,
  -0.009230881929397583,
  0.0032556140795350075,
  0.04987090453505516,
  0.027427753433585167,
  0.009788299910724163,
  -0.002328293863683939,
  -0.016451440751552582,
  0.04547063261270523,
  0.0665033757686615,
  0.011851438321173191,
  0.037036314606666565,
  0.06573906540870667,
  -0.040147945284843445,
  -0.016894791275262833,
  -0.020808979868888855,
  0.0034083102364093065,
  -0.0436234250664711,
  -0.019721517339348793,
  0.05280964821577072,
  -0.008028345182538033,
  -0.014136145822703838,
  0.024149907752871513,
  -0.02009374089539051,
  0.0312423687428236,
  -0.03576130419969559,
  0.026694923639297485,
  0.047513484954833984,
  -0.0354355126619339,
  0.04019827023148537,
  -0.028161438181996346,
  -0.030150409787893295,
  -0.04683360829949379,
  -0.019436122849583626,
  0.024104785174131393,
  0.028700822964310646,
  0.004619315732270479,
  0.028101621195673943,
  0.028504708781838417,
  -0.00489278417