## Installing relevent modules

In [1]:
%pip install sagemaker --upgrade
import sagemaker


Note: you may need to restart the kernel to use updated packages.
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [5]:
!sudo yum update -y
!sudo amazon-linux-extras install epel -y
!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | sudo bash
!sudo yum install git-lfs git -y

Loaded plugins: dkms-build-requires, extras_suggestions, kernel-livepatch,
              : langpacks, priorities, update-motd, versionlock
https://download.docker.com/linux/centos/2/x86_64/stable/repodata/repomd.xml: [Errno 14] HTTPS Error 404 - Not Found
Trying other mirror.
neuron                                                   | 2.9 kB     00:00     
63 packages excluded due to repository priority protections
No packages marked for update
Installing epel-release
Loaded plugins: dkms-build-requires, extras_suggestions, kernel-livepatch,
              : langpacks, priorities, update-motd, versionlock
Cleaning repos: amzn2-core amzn2extra-docker amzn2extra-epel
              : amzn2extra-kernel-5.10 amzn2extra-livepatch amzn2extra-python3.8
              : centos-extras
              : copr:copr.fedorainfracloud.org:vbatts:shadow-utils-newxidmap
              : docker-ce-stable github_git-lfs github_git-lfs-source
              : libnvidia-container neuron
57 metadata files removed
2


## Defining IAM permissions

In [6]:

#defining permission for sagemaker

import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")


sagemaker role arn: arn:aws:iam::882095275715:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole
sagemaker bucket: sagemaker-eu-north-1-882095275715
sagemaker session region: eu-north-1


In [7]:
#rm -r code

In [8]:
#making a new directory for the inference codes
!mkdir code



## Below code creates a inference.py file"

In [9]:
%%writefile code/inference.py

import torch
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModel

# Helper: Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

def model_fn(model_dir,context=None):
  # Load model from HuggingFace Hub
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    model = AutoModel.from_pretrained(model_dir)
    return model, tokenizer



def predict_fn(data, model_and_tokenizer,context=None):
    # Destructure model and tokenizer
    model, tokenizer = model_and_tokenizer
    
    # Tokenize sentences
    text1 = data["text1"]
    text2 = data["text2"]
    encoded_input1 = tokenizer(text1, padding=True, truncation=True, return_tensors='pt')
    encoded_input2 = tokenizer(text2, padding=True, truncation=True, return_tensors='pt')

    # Compute token embeddings
    with torch.no_grad():
        model_output1 = model(**encoded_input1)
        model_output2 = model(**encoded_input2)

    # Perform pooling
    sentence_embeddings1 = mean_pooling(model_output1, encoded_input1['attention_mask'])
    sentence_embeddings2 = mean_pooling(model_output2, encoded_input2['attention_mask'])

    # Normalize embeddings
    sentence_embeddings1 = F.normalize(sentence_embeddings1, p=2, dim=1)
    sentence_embeddings2 = F.normalize(sentence_embeddings2, p=2, dim=1)

    # Compute cosine similarity between the embeddings
    similarity_score = util.cos_sim(sentence_embeddings1, sentence_embeddings2)
    similarity_score_normalized = (similarity_score.item() + 1) / 2
    
    # Return a dictionary containing the similarity score
    return {"similarity score": similarity_score_normalized}




    
              

    
    

Writing code/inference.py


## Sending the model to s3 bucket

In [10]:
#defining necessary paths
repository = "doomnova/Finetuned_STS_part_A_final"
model_id = repository.split("/")[-1]
s3_location=f"s3://{sess.default_bucket()}/custom_inference/{model_id}/model.tar.gz"




In [12]:
#rm -rf Finetuned_STS_part_A_final

In [13]:
#download model A from hugging face
!git lfs install
!git clone https://huggingface.co/$repository


Git LFS initialized.
Cloning into 'Finetuned_STS_part_A_final'...
remote: Enumerating objects: 20, done.[K
remote: Counting objects: 100% (17/17), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 20 (delta 1), reused 0 (delta 0), pack-reused 3[K
Unpacking objects: 100% (20/20), 317.11 KiB | 1.47 MiB/s, done.


In [15]:
## requirements file
%%writefile code/requirements.txt
sentence-transformers==2.5.1

Writing code/requirements.txt


In [16]:
!cp -r code/ $model_id/code/


In [17]:
#creating the model zip file
%cd $model_id
!tar zcvf model.tar.gz *


/home/ec2-user/SageMaker/Finetuned_STS_part_A_final
1_Pooling/
1_Pooling/config.json
code/
code/inference.py
code/requirements.txt
config.json
config_sentence_transformers.json
model.safetensors
modules.json
README.md
requirements.txt
sentence_bert_config.json
special_tokens_map.json
tokenizer_config.json
tokenizer.json
vocab.txt


In [18]:
"sending the model zip to s3 bucket"
!aws s3 cp model.tar.gz $s3_location


upload: ./model.tar.gz to s3://sagemaker-eu-north-1-882095275715/custom_inference/Finetuned_STS_part_A_final/model.tar.gz


In [19]:
#creating a custom HuggingfaceModel class

from sagemaker.huggingface.model import HuggingFaceModel

huggingface_model = HuggingFaceModel(model_data=s3_location,
                                     role=role, # iam role with permissions to create an Endpoint
                                     transformers_version="4.37", # transformers version used
                                     pytorch_version="2.1", # pytorch version used
                                     py_version ="py310", # python version of the DLC
                                     
                                   
                                     
                                     
                                    )

In [20]:
#deploying the endpoint
predictor = huggingface_model.deploy(initial_instance_count=1,
                                     instance_type="ml.g4dn.xlarge",
                                     endpoint_name ="partBSTS")


-----------!

In [23]:
#testing endpoint
data = {"text1" : "The new movie is top", "text2" : "The new movie is so great"}

#request
predictor.predict(data)

{'similarity score': 0.8439590036869049}

In [None]:
#predictor.delete_model()
#predictor.delete_endpoint()