In [None]:
!pip install python-dotenv
!pip install --upgrade sagemaker

In [43]:
import sagemaker
import os
from dotenv import load_dotenv

sess = sagemaker.Session()
role = sagemaker.get_execution_role()
SAGEMAKER_S3_BUCKET="sagemaker-us-west-2-412356575323"

load_dotenv()
hugging_face_api_token = os.getenv('HUGGING_FACE_API_TOKEN')

In [44]:
import json
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

environment = {
    "HF_API_TOKEN": hugging_face_api_token,
    "HF_MODEL_ID": "mistralai/Mistral-7B-Instruct-v0.2",
    "HF_MODEL_QUANTIZE": "bitsandbytes",
    "TRUST_REMOTE_CODE": json.dumps(True),
    'SM_NUM_GPUS': json.dumps(1),
    "HF_TASK": "text-generation"
}

environment_finetuned = {
    "HF_API_TOKEN": hugging_face_api_token,
    "HF_MODEL_QUANTIZE": "bitsandbytes",
    "TRUST_REMOTE_CODE": json.dumps(True),
    'SM_NUM_GPUS': json.dumps(1),
    "HF_TASK": "text-generation"
}

image_uri = get_huggingface_llm_image_uri(
  backend="huggingface",
  region=sess.boto_region_name
)

base_model = HuggingFaceModel(
   role=role,
   transformers_version="4.37",
   pytorch_version="2.1",
   py_version='py310',
   env=environment,
   model_server_workers=1,
   image_uri=image_uri
)
finetuned_model = HuggingFaceModel(
    model_data="s3://sagemaker-us-west-2-412356575323/models/zephyrus-v02.tar.gz",
   role=role,
   transformers_version="4.37",
   pytorch_version="2.1",
   py_version='py310',
   env=environment_finetuned,
   model_server_workers=1,
   image_uri=image_uri
)

In [45]:
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.g5.xlarge",
)
finetuned_predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.g5.xlarge",
)

---------------!

In [None]:
data = {
   "inputs": "<s>[INST] What is apache airflow? [/INST]",
    "parameters": {
        "max_new_tokens": 200,
        "temperature": 0.9
    }
}
base_result = predictor.predict(data)
finetuned_result = finetuned_predictor.predict(data)
print(result)
print("----------Fine Tuned------------")
print(result_finetuned)

In [56]:
finetuned_predictor.delete_endpoint()
predictor.delete_endpoint()