## Step 1: Install Required Packages

In [1]:
!pip install sagemaker huggingface_hub -q

## Step 2: Import Libraries and Set Up SageMaker Session

In [6]:
import huggingface_hub

huggingface_hub.notebook_login()

In [11]:
import sagemaker
from sagemaker.huggingface import HuggingFaceModel
import boto3

# Set up SageMaker session
sess = sagemaker.Session()
role = sagemaker.get_execution_role()

print(f"SageMaker role: {role}")
print(f"SageMaker session region: {sess.boto_region_name}")

sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
SageMaker role: arn:aws:iam::992382832262:role/datazone_usr_role_bsy8e2eigl2nio_di2fxgcsdouhlc
SageMaker session region: us-west-2


## Step 3: Define the Hugging Face Model

Choose a model from Hugging Face Hub. This example uses `distilbert-base-uncased-finetuned-sst-2-english` for sentiment analysis.

In [17]:
# Create Hugging Face Model configuration
huggingface_model = HuggingFaceModel(
    model_data=None,  # Not required when using model from Hub
    role=role,
    transformers_version="4.37",
    pytorch_version="2.1",
    py_version="py310",
    env={
        'HF_MODEL_ID': 'gpt2',
        'HF_TASK': 'text-generation'
    }
)

print("Model configuration created successfully!")

sagemaker.config INFO - Applied value from config key = SageMaker.Model.VpcConfig
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
Model configuration created successfully!


## Step 4: Deploy the Model to a SageMaker Endpoint

This creates a real-time inference endpoint. **Note: This will incur AWS charges while the endpoint is running.**

In [21]:
# Clean up any existing endpoint and config with the same name (run this if you get conflicts)
import boto3

sagemaker_client = boto3.client('sagemaker')
endpoint_name = "huggingface-demo-endpoint"

# Delete endpoint if it exists
try:
    sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
    print(f"Deleted existing endpoint: {endpoint_name}")
    # Wait for deletion to complete
    import time
    time.sleep(10)
except Exception as e:
    print(f"No existing endpoint to delete (or error): {e}")

# Delete endpoint config if it exists
try:
    sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
    print(f"Deleted existing endpoint config: {endpoint_name}")
except Exception as e:
    print(f"No existing endpoint config to delete (or error): {e}")

print("\nCleanup complete! You can now run the deployment cell.")

No existing endpoint to delete (or error): An error occurred (ValidationException) when calling the DeleteEndpoint operation: Could not find endpoint "huggingface-demo-endpoint".
No existing endpoint config to delete (or error): An error occurred (AccessDeniedException) when calling the DeleteEndpointConfig operation: User: arn:aws:sts::992382832262:assumed-role/datazone_usr_role_bsy8e2eigl2nio_di2fxgcsdouhlc/SageMaker is not authorized to perform: sagemaker:DeleteEndpointConfig on resource: arn:aws:sagemaker:us-west-2:992382832262:endpoint-config/huggingface-demo-endpoint because no identity-based policy allows the sagemaker:DeleteEndpointConfig action

Cleanup complete! You can now run the deployment cell.


In [22]:
# Deploy model to SageMaker endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",  # Cost-effective instance for testing
    endpoint_name="huggingface-demo-endpoint"
)

print(f"Endpoint deployed successfully: {predictor.endpoint_name}")

------!Endpoint deployed successfully: huggingface-demo-endpoint


## Step 5: Test the Endpoint with Sample Data

In [24]:
# Test the endpoint with sample inputs
test_data = {
    "inputs": "I love using SageMaker for deploying models! Yo mama is"
}

# Make prediction
result = predictor.predict(test_data)
print("Prediction result:")
print(result)

Prediction result:
[{'generated_text': 'I love using SageMaker for deploying models! Yo mama is amazing, he makes things simple by giving you the simplest way of creating your models.\n\nWhen I started coding for SageMaker and creating my models I came across a bunch of great'}]


In [26]:
# Test with multiple inputs
test_data_batch = {
    "inputs": [
        "This is amazing!",
        "I don't like this at all.",
        "It's okay, nothing special."
    ]
}

result_batch = predictor.predict(test_data_batch)
print("\nBatch prediction results:")
for i, res in enumerate(result_batch):
    print(f"Input {i+1}: {res}")


Batch prediction results:
Input 1: [{'generated_text': "This is amazing! My dad and I were together about 30 years ago. We went out and had a great dinner. I'd seen the new Disney movie. I was standing in a bar that night. As I thought about it, I saw Mickey"}]
Input 2: [{'generated_text': "I don't like this at all. When it was brought back you heard about those who had been killed. I'm sorry folks. I'm sure they'll be sorry.\n\nWe are not going to let it get this far. At least"}]
Input 3: [{'generated_text': 'It\'s okay, nothing special. Just one word from here!"\n\nHe took his bag of papers out from under the table and opened them up. "The only thing they\'re calling it now (The Three Broomsticks!) are two of'}]


## Step 6: Clean Up - Delete the Endpoint

**Important:** Always delete your endpoint when you're done to avoid ongoing charges!

In [None]:
# Delete the endpoint to stop incurring charges
predictor.delete_endpoint()
print("Endpoint deleted successfully!")

In [10]:
import boto3
sagemaker_client = boto3.client('sagemaker')

# Delete old endpoint config
try:
    sagemaker_client.delete_endpoint_config(EndpointConfigName='huggingface-demo-endpoint')
    print("Old endpoint config deleted")
except:
    print("No existing config to delete")

No existing config to delete


---

## Alternative: Use a Different Model

You can easily swap out the model by changing the `HF_MODEL_ID` and `HF_TASK`. Here are some examples:

**Text Generation:**
```python
env={
    'HF_MODEL_ID': 'gpt2',
    'HF_TASK': 'text-generation'
}
```

**Question Answering:**
```python
env={
    'HF_MODEL_ID': 'distilbert-base-cased-distilled-squad',
    'HF_TASK': 'question-answering'
}
```

**Translation:**
```python
env={
    'HF_MODEL_ID': 't5-small',
    'HF_TASK': 'translation'
}
```

**Named Entity Recognition:**
```python
env={
    'HF_MODEL_ID': 'dslim/bert-base-NER',
    'HF_TASK': 'token-classification'
}
```

In [None]:
# Get temporary credentials from the notebook instance IAM role
import boto3
import json

# Get credentials from the session
session = boto3.Session()
credentials = session.get_credentials()
frozen_credentials = credentials.get_frozen_credentials()

print("=" * 60)
print("COPY THESE CREDENTIALS TO YOUR LAPTOP")
print("=" * 60)
print("\nOption A: Add to your laptop's ~/.aws/credentials file:")
print("\n[default]")
print(f"aws_access_key_id = YOUR_ACCESS_KEY_ID")
print(f"aws_secret_access_key = YOUR_SECRET_ACCESS_KEY")
if frozen_credentials.token:
    print(f"aws_session_token = YOUR_SESSION_TOKEN")

print("\n" + "=" * 60)
print("\nOption B: Set as environment variables in your laptop notebook:")
print("\nimport os")
print(f"os.environ['AWS_ACCESS_KEY_ID'] = 'YOUR_ACCESS_KEY_ID'")
print(f"os.environ['AWS_SECRET_ACCESS_KEY'] = 'YOUR_SECRET_ACCESS_KEY'")
if frozen_credentials.token:
    print(f"os.environ['AWS_SESSION_TOKEN'] = 'YOUR_SESSION_TOKEN'")
print(f"os.environ['AWS_DEFAULT_REGION'] = '{session.region_name}'")

print("\n" + "=" * 60)
print(f"\nRegion: {session.region_name}")
print(f"Credentials method: {credentials.method}")
print("\n⚠️  Note: These are temporary credentials and will expire!")
print("=" * 60)

COPY THESE CREDENTIALS TO YOUR LAPTOP

Option A: Add to your laptop's ~/.aws/credentials file:

[default]
aws_access_key_id = ASIA6ODVAJ2DDV4PBTOF
aws_secret_access_key = h6v9Fgzz/cbraAMlLrL+jCFW+mS0ELaiFpRccCpU
aws_session_token = IQoJb3JpZ2luX2VjEGwaCXVzLXdlc3QtMiJGMEQCIGt6lxIloyEoSplL9tEUOOFvSSswiscDK/HOn+JtKBveAiAvY+K7Yu9arAhb4vhIOAhhEzGr+f/CMsLUAMnmOQlSMSrLBQg1EAAaDDk5MjM4MjgzMjI2MiIMRHYddxdX+IErfAkLKqgFyw7Cpj1X6c4Kf2ZWteqOBqYoXLt+w4Kigi7jW3LsI0Foj1z5PTNrBpput568log7zo/me7pON1jYj8xxdBMotnihOi7ZeCZaf2v/l+Okvd3w/QVN2X2E5rhg1KHcFlpZ1ymfNjd9oLMY0xvVpCD6F5InnkMPN6XJYx2eoiIjUVncdvqxy8FRnilCJRX5ji5ChZEXMCHGOLHRZAtcSijgebchNCuPX1vh+yRu4B6VNC3GxP+AwnuaI3o/Cl/RINxFPtARJljnMONC9Ah4Q3iOB4X38CqlptvCamaLImiU0dgqFmvvMf+Vl0TFEtDpyLjmEucZ4KMVeXtm3yuXSJ7HBHvoM/AbS/1bngfPtZQ7kKh4ADdzm3nErkfrDRtP13RsNtSASU1jFaHxm5oetoSvi4erV7wyHD32+Xj4QgiFIBzk8IFByTB3QOgfcUkNlb/MpmfaSEmapYV/tSYMDfp548GVFNRWMkChGbl9cboGnopEzLBFvKpVVhQdAh8eUdzY/0Pms9XZ6CaSUyzO5+XESr2QP5ZyXAUEWCLzvLqe4dZ0fSLWVvLypIXF9F64DMyA8PwpNcinrYqz