In [1]:
!pip install transformers



In [2]:
!pip install -U sagemaker

Collecting sagemaker
  Using cached sagemaker-2.251.0-py3-none-any.whl.metadata (17 kB)
Collecting attrs<26,>=24 (from sagemaker)
  Using cached attrs-25.3.0-py3-none-any.whl.metadata (10 kB)
Collecting boto3<2.0,>=1.39.5 (from sagemaker)
  Using cached boto3-1.40.16-py3-none-any.whl.metadata (6.7 kB)
Collecting botocore<1.41.0,>=1.40.16 (from boto3<2.0,>=1.39.5->sagemaker)
  Using cached botocore-1.40.16-py3-none-any.whl.metadata (5.7 kB)
Collecting s3transfer<0.14.0,>=0.13.0 (from boto3<2.0,>=1.39.5->sagemaker)
  Using cached s3transfer-0.13.1-py3-none-any.whl.metadata (1.7 kB)
Using cached sagemaker-2.251.0-py3-none-any.whl (1.7 MB)
Using cached attrs-25.3.0-py3-none-any.whl (63 kB)
Using cached boto3-1.40.16-py3-none-any.whl (140 kB)
Using cached botocore-1.40.16-py3-none-any.whl (14.0 MB)
Using cached s3transfer-0.13.1-py3-none-any.whl (85 kB)
Installing collected packages: attrs, botocore, s3transfer, boto3, sagemaker
[2K  Attempting uninstall: attrs
[2K    Found existing insta

In [3]:
!pip install s3fs

Collecting botocore<1.37.2,>=1.37.0 (from aiobotocore<3.0.0,>=2.5.4->s3fs)
  Using cached botocore-1.37.1-py3-none-any.whl.metadata (5.7 kB)
Using cached botocore-1.37.1-py3-none-any.whl (13.4 MB)
Installing collected packages: botocore
  Attempting uninstall: botocore
    Found existing installation: botocore 1.40.16
    Uninstalling botocore-1.40.16:
      Successfully uninstalled botocore-1.40.16
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sagemaker-studio-analytics-extension 0.2.0 requires sparkmagic==0.22.0, but you have sparkmagic 0.21.0 which is incompatible.
s3transfer 0.13.1 requires botocore<2.0a.0,>=1.37.4, but you have botocore 1.37.1 which is incompatible.
boto3 1.40.16 requires botocore<1.41.0,>=1.40.16, but you have botocore 1.37.1 which is incompatible.[0m[31m
[0mSuccessfully installed botocore-1.37.1


In [4]:
!pip install loguru

Collecting loguru
  Using cached loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Using cached loguru-0.7.3-py3-none-any.whl (61 kB)
Installing collected packages: loguru
Successfully installed loguru-0.7.3


In [5]:
import time
import json
import os
import sys
import sagemaker
import boto3
import s3fs
from sagemaker.huggingface import HuggingFaceModel

import pandas as pd
from datasets import Dataset

# Add the project root directory to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    
from src.utils.data_generation import nested_split_dataset, generate_responses_concurrently_deployed
from src.prompts.llama_prompts import MathQAPrompt, ContextualQAPrompt

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


### Get execution role

In [6]:
try:
	role = sagemaker.get_execution_role()
    # role = "arn:aws:iam::551529993308:role/service-role/AmazonSageMaker-ExecutionRole-20250711T075198"
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']


### Model setup

In [7]:
# env variables for model creation
env = {
    "LOGLEVEL": "INFO"
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    model_data="s3://self-corrective-llm-data/tuned_model/model.tar.gz",
    role=role,
    env=env,
    transformers_version="4.49",
    pytorch_version="2.6",
    py_version='py312',
)


In [8]:
# deploy model to SageMaker Inference
model = huggingface_model.deploy(
	initial_instance_count=1,
    instance_type="ml.g5.2xlarge",
	container_startup_health_check_timeout=300,
  )

------------!

In [12]:
SPECIAL_INSTRUCTION = "\nAs you write your answer, you can correct yourself using these tools: Use <DEL_W> to take back the word before this token, <DEL_S> to remove the entire sentence before this token, and <DEL_A> to scrap everything you've written and start again."

In [23]:
prompt_1 = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a specialized question-answering AI. Your task is to give a concise answer to the question using *only* the provided context.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nContext:\n'''\nThe Pew Forum on Religion & Public Life ranks Egypt as the fifth worst country in the world for religious freedom. The United States Commission on International Religious Freedom, a bipartisan independent agency of the US government, has placed Egypt on its watch list of countries that require close monitoring due to the nature and extent of violations of religious freedom engaged in or tolerated by the government. According to a 2010 Pew Global Attitudes survey, 84% of Egyptians polled supported the death penalty for those who leave Islam; 77% supported whippings and cutting off of hands for theft and robbery; and 82% support stoning a person who commits adultery.\n'''\n\nQuestion: What percentage of Egyptians polled support death penalty for those leaving Islam?<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
prompt_2 = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a specialized question-answering AI. Your task is to give a concise answer to the question using *only* the provided context. Make sure to always give an answer.{SPECIAL_INSTRUCTION}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nContext:\n'''\nThere is a sizeable Greek minority of about 105,000 (disputed, sources claim higher) people, in Albania. The Greek minority of Turkey, which numbered upwards of 200,000 people after the 1923 exchange, has now dwindled to a few thousand, after the 1955 Constantinople Pogrom and other state sponsored violence and discrimination. This effectively ended, though not entirely, the three-thousand-year-old presence of Hellenism in Asia Minor. There are smaller Greek minorities in the rest of the Balkan countries, the Levant and the Black Sea states, remnants of the Old Greek Diaspora (pre-19th century).\n'''\n\nQuestion: Are there any Greek who still live in the area around the dark sea in any majority of numbers?<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
prompt_3 = f"Output text deletion tokens: <DEL_W>, <DEL_S> and <DEL_A>."

In [24]:
prompt_3

'Output text deletion tokens: <DEL_W>, <DEL_S> and <DEL_A>.'

In [26]:
# send request
response = model.predict({"inputs": [prompt_3]*10, "parameters": {"temperature": 0.7, "max_new_tokens": 256}})

# print(response["responses"])
for response in response["responses"]:
    print(response)





 (1)







In [27]:
model.delete_model()
model.delete_endpoint()