In [None]:
#pip install codecarbon

Use this notebook in the Sagemaker jupyter instances

In [1]:
import sagemaker
import boto3

In [2]:
RESULTS_DIR = '/home/fjdur/cloud-api/results/'

In [3]:
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagamaker_role')['Role']['Arn']

print(f"sagemaker role arn: {role}")

sagemaker role arn: arn:aws:iam::150660304444:role/sagamaker_role


In [4]:
models = [ 'codet5-base', 'codet5p-220', 'codegen-350-mono', 'gpt-neo-125m', 'codeparrot-small', 'pythia-410m'] # bloom, pythia
model_checkpoint = {'codet5-base':"Salesforce/codet5-base", 'codet5p-220':'Salesforce/codet5p-220m', 
                    'codegen-350-mono':"Salesforce/codegen-350M-mono", 'gpt-neo-125m':"EleutherAI/gpt-neo-125M",
                    'codeparrot-small':'codeparrot/codeparrot-small', 'pythia-410m':"EleutherAI/pythia-410m"} # model:checkpoint

model_name = models[1]
checkpoint = model_checkpoint[model_name]
print(f'checkpoint: {checkpoint}')

checkpoint: Salesforce/codet5p-220m


In [5]:
# Code sagemaker.huggingface
#https://github.com/aws/sagemaker-python-sdk/blob/c3a5fb01827fdd2cdad66a2b659a2a9a574153a2/src/sagemaker/huggingface/model.py
from sagemaker.huggingface import HuggingFaceModel

# Hub Model configuration. https://huggingface.co/models
hub = {
  'HF_MODEL_ID' : checkpoint, # model_id from hf.co/models
  'HF_TASK' : 'text-generation' # NLP task you want to use for predictions
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   env=hub,
   role=role, # iam role with permissions to create an Endpoint
   transformers_version="4.26", # transformers version used
   pytorch_version="1.13", # pytorch version used
   py_version="py39", # python version of the DLC
)
print(huggingface_model)

<sagemaker.huggingface.model.HuggingFaceModel object at 0x7fdc3b3b0310>


In [20]:
huggingface_model.name

'huggingface-pytorch-inference-2023-08-22-12-36-01-988'

In [6]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.m5.xlarge"
)


-----!

In [12]:
import sagemaker
predictor_reuse=sagemaker.predictor.Predictor(
    endpoint_name="huggingface-pytorch-inference-2023-08-22-12-01-20-881",
    #sagemaker_session=sagemaker.Session(),
    #serializer=sagemaker.serializers.CSVSerializer()
)

In [17]:
# use this predictor to reuse the same endpoint and instance than the notebook
predictor_reuse2=sagemaker.huggingface.model.HuggingFacePredictor(
    endpoint_name="huggingface-pytorch-inference-2023-08-22-12-01-20-881",
    #sagemaker_session=sagemaker.Session(),
    #serializer=sagemaker.serializers.CSVSerializer()
)

In [15]:
predictor

<sagemaker.huggingface.model.HuggingFacePredictor at 0x7fdc3996dde0>

In [18]:
predictor_reuse2

<sagemaker.huggingface.model.HuggingFacePredictor at 0x7fdc333b03a0>

In [7]:
# example request, you always need to define "inputs"
data = {
"inputs":  "def hello_world():"
}

# request
from codecarbon import track_emissions

#response = predictor.predict(data)

#@track_emissions(project_name = "codet5p-220_sm", output_file = RESULTS_DIR + "emissions_codet5p-220.csv")
@track_emissions(project_name = "codet5p-220_sm",output_file = "emissions_codet5p-220.csv")
def infer(predictor, data):
    return predictor.predict(data)

In [8]:
response = infer(predictor, data)

print(response)

#predictor.delete_model()
#predictor.delete_endpoint()



[codecarbon INFO @ 12:23:01] [setup] RAM Tracking...
[codecarbon INFO @ 12:23:01] [setup] GPU Tracking...
[codecarbon INFO @ 12:23:01] No GPU found.
[codecarbon INFO @ 12:23:01] [setup] CPU Tracking...
[codecarbon INFO @ 12:23:01] CPU Model on constant consumption mode: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
[codecarbon INFO @ 12:23:01] >>> Tracker's metadata:
[codecarbon INFO @ 12:23:01]   Platform system: Linux-5.10.178-162.673.amzn2.x86_64-x86_64-with-glibc2.26
[codecarbon INFO @ 12:23:01]   Python version: 3.10.10
[codecarbon INFO @ 12:23:01]   CodeCarbon version: 2.3.1
[codecarbon INFO @ 12:23:01]   Available RAM : 15.325 GB
[codecarbon INFO @ 12:23:01]   CPU count: 4
[codecarbon INFO @ 12:23:01]   CPU model: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
[codecarbon INFO @ 12:23:01]   GPU count: None
[codecarbon INFO @ 12:23:01]   GPU model: None
[codecarbon INFO @ 12:23:03] 
Graceful stopping: collecting and writing information.
Please wait a few seconds...
[codecarbon I

[{'generated_text': 'def hello_world():def hello_world_world()def'}]


In [19]:
response = infer(predictor_reuse2, data)

print(response)

[codecarbon INFO @ 12:45:22] [setup] RAM Tracking...
[codecarbon INFO @ 12:45:22] [setup] GPU Tracking...
[codecarbon INFO @ 12:45:22] No GPU found.
[codecarbon INFO @ 12:45:22] [setup] CPU Tracking...
[codecarbon INFO @ 12:45:23] CPU Model on constant consumption mode: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
[codecarbon INFO @ 12:45:23] >>> Tracker's metadata:
[codecarbon INFO @ 12:45:23]   Platform system: Linux-5.10.178-162.673.amzn2.x86_64-x86_64-with-glibc2.26
[codecarbon INFO @ 12:45:23]   Python version: 3.10.10
[codecarbon INFO @ 12:45:23]   CodeCarbon version: 2.3.1
[codecarbon INFO @ 12:45:23]   Available RAM : 15.325 GB
[codecarbon INFO @ 12:45:23]   CPU count: 4
[codecarbon INFO @ 12:45:23]   CPU model: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz
[codecarbon INFO @ 12:45:23]   GPU count: None
[codecarbon INFO @ 12:45:23]   GPU model: None
[codecarbon INFO @ 12:45:24] 
Graceful stopping: collecting and writing information.
Please wait a few seconds...
[codecarbon I

[{'generated_text': 'def hello_world():def hello_world_world()def'}]
