In [1]:
!pip install --upgrade sagemaker

Collecting sagemaker
  Downloading sagemaker-2.244.2-py3-none-any.whl.metadata (17 kB)
Collecting graphene<4,>=3 (from sagemaker)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4,>=3->sagemaker)
  Downloading graphql_core-3.2.6-py3-none-any.whl.metadata (11 kB)
Collecting graphql-relay<3.3,>=3.1 (from graphene<4,>=3->sagemaker)
  Downloading graphql_relay-3.2.0-py3-none-any.whl.metadata (12 kB)
Downloading sagemaker-2.244.2-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading graphene-3.4.3-py2.py3-none-any.whl (114 kB)
Downloading graphql_core-3.2.6-py3-none-any.whl (203 kB)
Downloading graphql_relay-3.2.0-py3-none-any.whl (16 kB)
Installing collected packages: graphql-core, graphql-relay, graphene, sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.243.3
    Uninstalling 

### Deploying endpoint

In [2]:

from sagemaker.model import Model
from sagemaker import get_execution_role
import boto3


region = "us-east-2"
AWS_ACCOUNT_ID = "225725557140"

role = get_execution_role()
region = boto3.Session().region_name
s3_model_uri = "s3://unsloth-llama3/llama3-model/model.tar.gz"  

model = Model(
    image_uri=f"{AWS_ACCOUNT_ID}.dkr.ecr.{region}.amazonaws.com/llama3-unsloth:latest",
    model_data=s3_model_uri,
    role=role,
    name="llama3-unsloth-model-v1"
)

predictor = model.deploy(
    instance_type="ml.g5.xlarge",
    initial_instance_count=1,
    endpoint_name="llama3-unsloth-endpoint-v1",
    wait=True
)



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


-----------!

In [29]:

# region = "us-east-2"
# account_id = "225725557140"
# repository_name = "llama3-unsloth"
# image_uri = f"{account_id}.dkr.ecr.{region}.amazonaws.com/{repository_name}:latest"
# endpoint_name = "llama3-unsloth-endpoint"
# role = get_execution_role()
# instance_type = "ml.g5.xlarge" 



# # import boto3
# # ec2 = boto3.client("ec2")
# # print(ec2.describe_subnets())  # and filter from here

# import boto3
# ec2 = boto3.client("ec2")
# response = ec2.describe_security_groups(Filters=[{"Name": "vpc-id", "Values": ["vpc-0cdadb06ee43a1112"]}])
# print([sg["GroupId"] for sg in response["SecurityGroups"]])


### Testing endpoint

In [5]:

import json
import boto3

# Initialize the SageMaker runtime client
runtime = boto3.client('sagemaker-runtime')

# Input should be a list
input_data = {
    "inputs": [
"Invoice\nINVOICE #4567\nDate: 2024-11-12\nSeller: Alpha Electronics Ltd.\nBuyer: Tech World Co.\nItems:\n- 10x SSD 1TB @ $100\n- 5x Monitor 24\" @ $150\nTotal Amount Due: $1,750\nPayment Terms: Net 30 Days",
"Mill Certificate\nCertificate No: 9982\nManufacturer: SteelCorp Industries\nProduct: Cold Rolled Steel Sheets\nSpecification: ASTM A1008\nHeat No: 558930\nMechanical Properties:\n- Yield Strength: 280 MPa\n- Tensile Strength: 420 MPa\nCertified by: QA Engineer - John Smith"
]
}




# Invoke the endpoint
response = runtime.invoke_endpoint(
    EndpointName='llama3-unsloth-endpoint-v1',
    ContentType='application/json',
    Body=json.dumps(input_data)
)

# Parse and print the response
result = json.loads(response['Body'].read().decode())
print(result)

### check why not entire string is acceptable here (check inference.py)

{'invoice': [[0]], 'mill_certificate': [[1]]}


### Deleting endpoint

In [6]:

import boto3

sm = boto3.client("sagemaker", region_name="us-east-2")

# Names used
model_name = "llama3-unsloth-endpoint-v1"
endpoint_config_name = "llama3-unsloth-endpoint-v1"
endpoint_name = "llama3-unsloth-endpoint-v1"

# Delete endpoint
try:
    sm.delete_endpoint(EndpointName=endpoint_name)
    print(f"Deleted endpoint: {endpoint_name}")
except sm.exceptions.ClientError as e:
    print(f"Endpoint not found: {e}")

# Delete endpoint config
try:
    sm.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
    print(f"Deleted endpoint config: {endpoint_config_name}")
except sm.exceptions.ClientError as e:
    print(f"Endpoint config not found: {e}")

# Delete model
try:
    sm.delete_model(ModelName=model_name)
    print(f"Deleted model: {model_name}")
except sm.exceptions.ClientError as e:
    print(f"Model not found: {e}")


Deleted endpoint: llama3-unsloth-endpoint-v1
Deleted endpoint config: llama3-unsloth-endpoint-v1
Model not found: An error occurred (ValidationException) when calling the DeleteModel operation: Could not find model "llama3-unsloth-endpoint-v1".


#### Dloading model from hf repo

In [3]:
# !pip install huggingface_hub

In [4]:

# from huggingface_hub import snapshot_download
# import shutil
# import os

# # Config
# repo_id = "zeerakwyne/test2_doc-splitter-llama-3-2-3B-20-epoch_merged"  
# local_dir = "hf_tmp_model" 
# hf_token = "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"

# # Clean existing model directory if exists
# if os.path.exists(local_dir):
#     shutil.rmtree(local_dir)

# # Download model snapshot (includes all .bin parts, tokenizer, config, etc.)
# snapshot_download(
#     repo_id=repo_id,
#     local_dir=local_dir,
#     token=hf_token,
#     local_dir_use_symlinks=False  # makes sure all files are copied instead of symlinked
# )

# print(f"Model downloaded to: {local_dir}")
# print("Files:")
# for root, _, files in os.walk(local_dir):
#     for f in files:
#         print(f"- {os.path.join(root, f)}")


For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

README.md:   0%|          | 0.00/605 [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/922 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Model downloaded to: hf_tmp_model
Files:
- hf_tmp_model/pytorch_model-00002-of-00002.bin
- hf_tmp_model/tokenizer.json
- hf_tmp_model/pytorch_model.bin.index.json
- hf_tmp_model/config.json
- hf_tmp_model/pytorch_model-00001-of-00002.bin
- hf_tmp_model/generation_config.json
- hf_tmp_model/.gitattributes
- hf_tmp_model/tokenizer_config.json
- hf_tmp_model/special_tokens_map.json
- hf_tmp_model/README.md
- hf_tmp_model/.cache/huggingface/.gitignore
- hf_tmp_model/.cache/huggingface/download/pytorch_model-00002-of-00002.bin.lock
- hf_tmp_model/.cache/huggingface/download/tokenizer.json.metadata
- hf_tmp_model/.cache/huggingface/download/generation_config.json.lock
- hf_tmp_model/.cache/huggingface/download/.gitattributes.metadata
- hf_tmp_model/.cache/huggingface/download/tokenizer_config.json.lock
- hf_tmp_model/.cache/huggingface/download/pytorch_model-00001-of-00002.bin.metadata
- hf_tmp_model/.cache/huggingface/download/README.md.metadata
- hf_tmp_model/.cache/huggingface/download/to

In [None]:
### cleaning

# !sudo rm -rf /opt/ml/model/*
# !sudo rm -rf /opt/ml/output/*
# !rm -rf ~/.cache/huggingface
# !rm -rf ~/.cache/pip
# !sudo rm -rf /tmp/*
# !docker system prune -af
# !df -h
