In [22]:
import pandas as pd
from datasets import load_dataset

ds = load_dataset("nvidia/HelpSteer3", "feedback")

In [None]:
PROMPT_TEMPLATE = """
    Feedback: {feedback}

    Generate a list of principles that the response is evaluated against in the feedback.
    For each principle, identify a text span from the feedback relating to this principle and then state whether the text span suggests that the response satisfies the principle yes/no/partially.

    Return it as a json dictionary in the format {{"<principle 1>": "<supporting text span>-<yes/no/partially>", "<principle 2>": "<supporting text span>-<yes/no/partially>".}}
"""

: 

In [None]:
train_data = ds['train']

: 

In [None]:
train_data

Dataset({
    features: ['domain', 'language', 'context', 'response1', 'response2', 'feedback1', 'feedback2'],
    num_rows: 38782
})

: 

In [None]:
train_df = pd.DataFrame(train_data)

: 

In [None]:
train_df.keys()

Index(['domain', 'language', 'context', 'response1', 'response2', 'feedback1',
       'feedback2'],
      dtype='object')

: 

: 

In [None]:
idx = 10 

print(f"domain: {train_df['domain'][idx]}")
print("-----------" * 30)
print(f"language: {train_df['language'][idx]}")
print("-----------" * 30)
print(f"context: {train_df['context'][idx]}")
print("-----------" * 30)
print(f"response1: {train_df['response1'][idx]}")
print("-----------" * 30)
print(f"response2: {train_df['response2'][idx]}")
print("-----------" * 30)
print(f"feedback1: {train_df['feedback1'][idx]}")
print("-----------" * 30)
print(f"feedback1: {train_df['feedback2'][idx]}")


domain: code
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
language: python
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
context: [{'role': 'user', 'content': "i'd like to create a docker-compose file that sets up our net core backend with azurite emulator and a react frontend"}, {'role': 'assistant', 'content': 'To create a `docker-compose.yml` file that sets up your .NET Core backend, Azure Storage Emulator (Azurite), and a

: 

In [None]:
import os
import time
import json
from huggingface_hub import InferenceClient
from dotenv import load_dotenv

: 

In [None]:
load_dotenv()

True

: 

In [None]:
HF_TOKEN = os.getenv("HF_TOKEN")

MODEL_ID = "Qwen/Qwen2.5-72B-Instruct"

OUTPUT_FILE = "extracted_principles.jsonl"  
CHECKPOINT_FILE = "checkpoint_ids.txt"

: 

In [None]:
client = InferenceClient(api_key=HF_TOKEN)

: 

In [None]:
MAX_RETRIES = 5             # How many times to retry an API error
BASE_SLEEP_SEC = 1          # Normal sleep between calls
ERROR_SLEEP_SEC = 30        # Initial sleep after an error (doubles each retry)

: 

In [None]:
def load_processed_ids():
    """Reads the checkpoint file to see what we have already done."""
    if not os.path.exists(CHECKPOINT_FILE):
        return set()
    with open(CHECKPOINT_FILE, "r") as f:
        return set(line.strip() for line in f)

: 

In [None]:
def save_result(result_dict, prompt_id):
    """Saves one result to disk immediately."""
    # 1. Append JSON object to JSONL file
    with open(OUTPUT_FILE, "a", encoding="utf-8") as f:
        f.write(json.dumps(result_dict) + "\n")
    
    # 2. Add ID to checkpoint file
    with open(CHECKPOINT_FILE, "a", encoding="utf-8") as f:
        f.write(f"{prompt_id}\n")

: 

In [None]:
def call_api_robust(feedback_text):
    """Calls HF API with automatic retry logic for rate limits/crashes."""
    formatted_prompt = PROMPT_TEMPLATE.format(feedback=feedback_text)
    messages = [{"role": "user", "content": formatted_prompt}]
    
    retries = 0
    current_sleep = ERROR_SLEEP_SEC

    while retries < MAX_RETRIES:
        try:
            response = client.chat_completion(
                model=MODEL_ID,
                messages=messages,
                temperature=0.0,            # Greedy decoding [cite: 92]
                max_tokens=1024,
                response_format={"type": "json_object"} # Force JSON
            )
            return response.choices[0].message.content
            
        except Exception as e:
            error_msg = str(e)
            print(f"\n[!] API Error: {error_msg}")
            
            # Check for common temporary errors
            if "429" in error_msg or "503" in error_msg or "504" in error_msg:
                print(f"    Rate limit or Model loading. Sleeping {current_sleep}s...")
                time.sleep(current_sleep)
                current_sleep *= 2  # Exponential backoff
                retries += 1
            else:
                # If it's a weird error (like 400 Bad Request), log it and skip
                print("    Unrecoverable error. Skipping this sample.")
                return "{}"
    
    print("    Max retries reached. Skipping.")
    return "{}"

: 

In [23]:
from groq import Groq
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

client = Groq(api_key=GROQ_API_KEY)
MODEL_ID = "llama-3.3-70b-versatile"

def call_groq_robust(feedback_text):
    formatted_prompt = PROMPT_TEMPLATE.format(feedback=feedback_text)
    
    retries = 0
    while retries < MAX_RETRIES:
        try:
            chat_completion = client.chat.completions.create(
                messages=[{"role": "user", "content": formatted_prompt}],
                model="llama-3.3-70b-versatile",
                temperature=0.0,
                response_format={"type": "json_object"} # JSON mode
            )
            return chat_completion.choices[0].message.content
            
        except Exception as e:
            error_msg = str(e)
            print(f"\n[!] Groq Error: {error_msg}")
            
            # Rate Limit (Groq usually resets every minute)
            if "429" in error_msg:
                print("    Rate limit hit. Sleeping 60 seconds...")
                time.sleep(60)
                retries += 1
            else:
                return None # Return None on hard error
    MODEL_ID = "llama-3.3-70b-versatile"
    return None

In [24]:
def main():
    processed_ids = load_processed_ids()
    print(f"Resuming... {len(processed_ids)} samples already processed.")
    
    total_samples = len(ds)
    
    for i, sample in enumerate(ds['train']):
        # Unique ID for checkpointing
        # HelpSteer3 usually has 'prompt_id' or we can combine index
        p_id = str(sample.get('prompt_id', f"idx_{i}"))
        
        # SKIP if already done
        if p_id in processed_ids:
            continue
            
        feedback_text = sample.get('feedback1', '')
        if not feedback_text:
            continue

        print(f"[{i+1}/{total_samples}] Processing ID: {p_id}...", end="", flush=True)
        
        # Call API
        extracted_json = call_groq_robust(feedback_text)
        
        # Prepare Result
        result_entry = {
            "prompt_id": p_id,
            "original_feedback": feedback_text,
            "extracted_json": extracted_json
        }
        
        # Save immediately
        save_result(result_entry, p_id)
        
        print(" Done.")
        
        # Respect Rate Limits 
        time.sleep(BASE_SLEEP_SEC)

In [None]:
main()

Resuming... 566 samples already processed.
[567/2] Processing ID: idx_566... Done.
[568/2] Processing ID: idx_567... Done.
[569/2] Processing ID: idx_568... Done.
[570/2] Processing ID: idx_569... Done.
[571/2] Processing ID: idx_570... Done.
[572/2] Processing ID: idx_571... Done.
[573/2] Processing ID: idx_572... Done.
[574/2] Processing ID: idx_573... Done.
[575/2] Processing ID: idx_574... Done.
[576/2] Processing ID: idx_575... Done.
[577/2] Processing ID: idx_576... Done.
[578/2] Processing ID: idx_577... Done.
[579/2] Processing ID: idx_578... Done.
[580/2] Processing ID: idx_579... Done.
[581/2] Processing ID: idx_580... Done.
[582/2] Processing ID: idx_581... Done.
[583/2] Processing ID: idx_582... Done.
[584/2] Processing ID: idx_583... Done.
[585/2] Processing ID: idx_584... Done.
[586/2] Processing ID: idx_585... Done.
[587/2] Processing ID: idx_586... Done.
[588/2] Processing ID: idx_587... Done.
[589/2] Processing ID: idx_588... Done.
[590/2] Processing ID: idx_589... Don

: 