In [1]:
!pip install transformers



In [2]:
!pip install -U sagemaker

Collecting sagemaker
  Using cached sagemaker-2.248.2-py3-none-any.whl.metadata (17 kB)
Collecting attrs<26,>=24 (from sagemaker)
  Using cached attrs-25.3.0-py3-none-any.whl.metadata (10 kB)
Using cached sagemaker-2.248.2-py3-none-any.whl (1.7 MB)
Using cached attrs-25.3.0-py3-none-any.whl (63 kB)
Installing collected packages: attrs, sagemaker
[2K  Attempting uninstall: attrs
[2K    Found existing installation: attrs 23.2.0
[2K    Uninstalling attrs-23.2.0:
[2K      Successfully uninstalled attrs-23.2.0
[2K  Attempting uninstall: sagemaker
[2K    Found existing installation: sagemaker 2.245.0
[2K    Uninstalling sagemaker-2.245.0:
[2K      Successfully uninstalled sagemaker-2.245.0━━━━━━━━━━━━━━━━━━[0m [32m1/2[0m [sagemaker]
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [sagemaker]/2[0m [sagemaker]
[1A[2K[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of 

In [3]:
!pip install s3fs



In [10]:
!pip install loguru

Collecting loguru
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Downloading loguru-0.7.3-py3-none-any.whl (61 kB)
Installing collected packages: loguru
Successfully installed loguru-0.7.3


In [None]:
import time
import json
import os
import sys
import sagemaker
import boto3
import s3fs
from sagemaker.huggingface import HuggingFaceModel

import pandas as pd
from datasets import Dataset

# Add the project root directory to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    
from src.utils.data_generation import split_dataset, concurrent_data_generation
from src.prompts.llama_prompts import MathQAPrompt, ContextualQAPrompt

In [12]:
PROC_NUM = 1 # number of processes to use for data generation
DATA_SPLIT = "train" # "train" or "test"
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"

### Get execution role

In [13]:
try:
	# role = sagemaker.get_execution_role()
    role = "arn:aws:iam::551529993308:role/service-role/AmazonSageMaker-ExecutionRole-20250711T075198"
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']


### Model setup

In [14]:
# env variables for model creation
env = {
    "LOGLEVEL": "INFO"
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    model_data="s3://self-corrective-llm-data/initial_model/model.tar.gz",
    role=role,
    env=env,
    transformers_version="4.49",
    pytorch_version="2.6",
    py_version='py312',
)


In [15]:
# deploy model to SageMaker Inference
model = huggingface_model.deploy(
	initial_instance_count=1,
	# instance_type="ml.g4dn.xlarge",
    instance_type="ml.g5.2xlarge",
    # instance_type="ml.g6.4xlarge",
	container_startup_health_check_timeout=300,
  )

-------------!

### SQUAD

In [16]:
dataset_name = "rajpurkar_squad"
s3 = s3fs.S3FileSystem()
path = f"s3://self-corrective-llm-data/dataset/raw_data/{dataset_name}/{DATA_SPLIT}.parquet"

try:
    print(f"Loading Parquet file from: {path}")
    with s3.open(path, 'rb') as f:
        df = pd.read_parquet(f)
    print("File loaded successfully into pandas DataFrame.")
except Exception as e:
    print(f"Failed to read Parquet file. Error: {e}")

data = Dataset.from_pandas(df)
data = data.shuffle(seed=42).select(range(100))
data_chunks = split_dataset(data, PROC_NUM)

Loading Parquet file from: s3://self-corrective-llm-data/dataset/raw_data/rajpurkar_squad/train.parquet
File loaded successfully into pandas DataFrame.


In [17]:
response_dict = {
    "task_info": {
        "type": "Contextual QA",
        "dataset": dataset_name,
    },
    "additional_info": {
        "model": MODEL_NAME,
    }
}

In [18]:
def process_data_chunk(data_chunk: dict) -> tuple[list[dict], list[dict]]:
    model_input = []
    additional_info = []
    for i in range(len(data_chunk["question"])):
        model_input.append({
            "query": data_chunk["question"][i],
            "context": data_chunk["context"][i]
        })
        additional_info.append({
            "question": data_chunk["question"][i],
            "context": data_chunk["context"][i],
            "answer": data_chunk["answers"][i]["text"],
            "title": data_chunk["title"][i],
        })
    return model_input, additional_info

In [None]:
start_time = time.time()
all_results = await concurrent_data_generation(
    model=model,
    prompt_class=ContextualQAPrompt,
    data_chunks=data_chunks,
    response_dict_format=response_dict,
    data_processing_function=process_data_chunk,
    prompt_repetitions=10,
)
end_time = time.time()
print(f"Time taken: {end_time - start_time}")

Time taken: 50.428736209869385


In [23]:
all_results[:2]

[{'task_info': {'type': 'Contextual QA', 'dataset': 'rajpurkar_squad'},
  'additional_info': {'model': 'meta-llama/Llama-3.2-1B-Instruct',
   'question': 'What percentage of Egyptians polled support death penalty for those leaving Islam?',
   'context': 'The Pew Forum on Religion & Public Life ranks Egypt as the fifth worst country in the world for religious freedom. The United States Commission on International Religious Freedom, a bipartisan independent agency of the US government, has placed Egypt on its watch list of countries that require close monitoring due to the nature and extent of violations of religious freedom engaged in or tolerated by the government. According to a 2010 Pew Global Attitudes survey, 84% of Egyptians polled supported the death penalty for those who leave Islam; 77% supported whippings and cutting off of hands for theft and robbery; and 82% support stoning a person who commits adultery.',
   'answer': ['84%'],
   'title': 'Egypt'},
  'input': "<|begin_of_te

In [15]:
output_path = f"s3://self-corrective-llm-data/dataset/raw_model_responses/{DATA_SPLIT}/{DATA_SPLIT}_{dataset_name}.json"
json_string = json.dumps(all_results, indent=4)
s3 = s3fs.S3FileSystem()

print(f"Saving file to: {output_path}")
with s3.open(output_path, 'w') as f:
    f.write(json_string)

print("File saved successfully to S3!")

Saving file to: s3://self-corrective-llm-data/dataset/raw_model_responses/train/train_rajpurkar_squad.json
File saved successfully to S3!


### UMWP

In [16]:
dataset_name = "UMWP"
s3 = s3fs.S3FileSystem()
path = f"s3://self-corrective-llm-data/dataset/raw_data/{dataset_name}/{DATA_SPLIT}.json"

try:
    with s3.open(path, 'r') as f:
        df = pd.read_json(f, lines=True)
except Exception as e:
    print(f"Failed to read as JSONL, trying as regular JSON. Error: {e}")
    with s3.open(path, 'r') as f:
        df = pd.read_json(f)


data = Dataset.from_pandas(df)
data = data.shuffle(seed=42).select(range(100)) # take smaller sample for testing
data_chunks = split_dataset(data, PROC_NUM)

Failed to read as JSONL, trying as regular JSON. Error: Expected object or value


In [17]:
response_dict = {
    "task_info": {
        "type": "QA",
        "dataset": dataset_name,
    },
    "additional_info": {
        "model": MODEL_NAME,
        "domain": "Math"
    }
}

In [18]:
def process_data_chunk(data_chunk: dict) -> tuple[list[dict], list[dict]]:
    model_input = []
    additional_info = []
    for i in range(len(data_chunk["question"])):
        model_input.append({
            "query": data_chunk["question"][i]
        })
        additional_info.append({
            "question": data_chunk["question"][i],
            "answer": data_chunk["answer"][i],
            "answerable": data_chunk["answerable"][i],
            "source": data_chunk["source"][i]
        })
    return model_input, additional_info

In [None]:
start_time = time.time()
all_results = await concurrent_data_generation(
    model=model,
    prompt_class=MathQAPrompt,
    data_chunks=data_chunks,
    response_dict_format=response_dict,
    data_processing_function=process_data_chunk,
    prompt_repetitions=10,
)
end_time = time.time()
print(f"Time taken: {end_time - start_time}")

Time taken: 642.0189321041107


In [20]:
all_results[:2]

[{'task_info': {'type': 'QA', 'dataset': 'UMWP'},
  'additional_info': {'model': 'meta-llama/Llama-3.2-1B-Instruct',
   'domain': 'Math',
   'question': 'Because of the decision Sofia asked the students to suggest specific types of food. If 479 students suggested adding mashed potatoes while 489 suggested adding bacon to the menu.How many more students suggested bacon than those that suggested mashed potatoes?',
   'answer': [10.0],
   'answerable': True,
   'source': 'SVAMP'},
  'input': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a meticulous AI mathematician. Your task is to solve the following math problem.\nThink step by step. Show all your reasoning and calculations. After you have solved the problem, clearly state the final numerical answer at the end of your reasoning.\nIf the question is unanswerable (e.g., it is illogical or missing information), you must clearly state that it cannot be answered and briefly explain why. Do not attempt to solve it.<

In [21]:
output_path = f"s3://self-corrective-llm-data/dataset/raw_model_responses/{DATA_SPLIT}/{DATA_SPLIT}_{dataset_name}.json"
json_string = json.dumps(all_results, indent=4)
s3 = s3fs.S3FileSystem()

print(f"Saving file to: {output_path}")
with s3.open(output_path, 'w') as f:
    f.write(json_string)

print("File saved successfully to S3!")

Saving file to: s3://self-corrective-llm-data/dataset/raw_model_responses/train/train_UMWP.json
File saved successfully to S3!


In [None]:
# prompt_1 = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a specialized question-answering AI. Your task is to give a concise answer to the question using *only* the provided context.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nContext:\n'''\nThe Pew Forum on Religion & Public Life ranks Egypt as the fifth worst country in the world for religious freedom. The United States Commission on International Religious Freedom, a bipartisan independent agency of the US government, has placed Egypt on its watch list of countries that require close monitoring due to the nature and extent of violations of religious freedom engaged in or tolerated by the government. According to a 2010 Pew Global Attitudes survey, 84% of Egyptians polled supported the death penalty for those who leave Islam; 77% supported whippings and cutting off of hands for theft and robbery; and 82% support stoning a person who commits adultery.\n'''\n\nQuestion: What percentage of Egyptians polled support death penalty for those leaving Islam?<|eot_id|><|start_header_id|>assistant<|end_header_id|>"

In [None]:
# # send request
# response = predictor.predict({"inputs": [prompt_1]*10, "parameters": {"temperature": 0.7, "max_new_tokens": 256}})

# # print(response["responses"])
# for response in response["responses"]:
#     print(response)

In [22]:
model.delete_model()
model.delete_endpoint()