### Deploy model

In [None]:
!pip install -U sagemaker


In [None]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri, HuggingFace

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

role

In [None]:
from sagemaker.huggingface import HuggingFace

############## Final Deployment #################
#to_deploy = 'biomistralFIN-3class-k0-2024-04-30-23-07-32-592'
#to_deploy = 'biomistralFINRAG-3class-k0-2024-05-01-14-54-54-877'
to_deploy = 'biomistralFINLLM-3class-k0-2024-05-01-14-51-44-260'
#to_deploy = 'biomistralFINALL-3class-k0-2024-05-01-14-45-16-880'

print('Model to deploy: {}'.format(to_deploy))

# create the Estimator
huggingface_estimator = HuggingFace(
    entry_point          = 'train_aws.py',    # train script
    source_dir           = 'scripts',      # directory which includes all the files needed for training
    instance_type        = 'ml.g5.12xlarge',   # instances type used for the training job
    instance_count       = 1,                 # the number of instances used for training
    max_run              = 2*24*60*60,        # maximum runtime in seconds (days * hours * minutes * seconds)
    #base_job_name        = job_name,          # the name of the training job
    role                 = role,              # Iam role used in training job to access AWS ressources, e.g. S3
    volume_size          = 300,               # the size of the EBS volume in GB
    transformers_version = '4.37',            # the transformers version used in the training job
    pytorch_version      = '2.0',             # the pytorch_version version used in the training job
    py_version           = 'py310',           # the python version used in the training job
    #hyperparameters      =  hyperparameters,  # the hyperparameters passed to the training job
    environment          = { "HUGGINGFACE_HUB_CACHE": "/tmp/.cache" }, # set env variable to cache models in /tmp
    disable_output_compression = True         # not compress output to save training time and cost
).attach(to_deploy)

In [None]:
get_huggingface_llm_image_uri("huggingface",version="1.4")

In [None]:
# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID': '/opt/ml/model',#'salangarica/finetune-mistral-DA', #'salangarica/finetune-mistral-DA',
	'SM_NUM_GPUS': json.dumps(1),
    'MAX_INPUT_LENGTH': json.dumps(3500),  # Max length of input text
    'MAX_TOTAL_TOKENS': json.dumps(4000),
    'HF_TASK':'text-generation',
    #'HF_MODEL_REVISION':'23486089ab7ba741b34adc69ab7555885f8abe71',

}


# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4"), #version="1.1.0"
    env=hub,                                                # configuration for loading model from Hub
    model_data=huggingface_estimator.model_data,
    role=role,
)


# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.2xlarge",
	container_startup_health_check_timeout=300,
  )
  
# send request
predictor.predict({
	"inputs": "<|system|>\nYou are a pirate chatbot who always responds with Arr!</s>\n<|user|>\nThere's a llama on my lawn, how can I get rid of him?</s>\n<|assistant|>\n",
})

In [None]:
predictor.endpoint_name

In [None]:
predictor.predict({
	"inputs": """<s>[INST] You are an expert microbiologist who given an excerpt from a research paper can easily 
identify the type of relation between a microbe and a disease. Doesn't create new information, but is completely faithful to the information provided, and always gives concise answers.
Given the following meaning of the labels, answer the following question with the appropiate label.
positive: This type is used to annotate microbe-disease entity pairs with positive correlation, such as microbe will cause or aggravate the disease, the microbe will increase when disease occurs.
negative: This type is used to annotate microbe-disease entity pairs that have a negative correlation, such as microbe can be a treatment for a disease, or microbe will decrease when disease occurs. 
na: This type is used when the relation between a microbe and a disease is not clear from the context or there is no relation. In other words, use this label if the relation is not positive and not negative.

Based on the above description, evidence is as follows: 
E.coli is positively correlated with diabetes but not with pneumonia

What is the relationship between E.coli and diabetes?
 [/INST]"""
})