In [1]:
from datasets import load_from_disk
import openai
import json
import os

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# samp_to_label = load_from_disk('sample_to_label').shuffle(seed=6)
# samp_to_label.save_to_disk("sample_shuffled_seed6")
# samp_to_label[7]
samp_to_label = load_from_disk('sample_shuffled_seed6')

In [12]:
def text_extract(sample):
    if sample['query']:
        text = sample['query'] + sample['response']
    elif sample['conversation']:
        text = []
        for t in sample['conversation']:
            text.append(" ".join(t.values()))
        text = " ".join(text)
    elif sample['instruction']:
        text = sample['prompt']
    elif sample['problem']:
        text = sample['problem'] + sample['solution'] + sample['answer']
    elif sample['turn_1']:
        try:
            text = sample['prompt'] + sample['turn_1'] + sample['feedback_1'] + sample['turn_2'] + sample['feedback_2']
        except:
            text = sample['prompt'] + sample['turn_1'] + sample['feedback_1']
    # print(type(text))
    return {'extracted_text': f'{text}'}

def text_for_prompt(text):
    return {
        "prompt": f"Rate the following text on how informative it is from 0 to 5. Only return the number.\n\nText:\n{text}"
    }


In [15]:
texts = samp_to_label.map(text_extract)
texts_extracted = texts.remove_columns([col for col in texts.column_names if col != 'extracted_text'])
# for t in texts_extracted:
#     print(t)
with open('texts_extracted.jsonl', 'w', encoding='utf-8') as f:
    for text in texts_extracted:
        f.write(json.dumps(text) + "\n")

llm_input = texts_extracted.map(text_for_prompt)

In [11]:
with open("llm_prompts.jsonl", "w", encoding="utf-8") as f:
    for row in llm_input:
        f.write(json.dumps({"prompt": row["prompt"]}) + "\n")

In [6]:
with open('api_key.txt', 'r') as api_key:
    api_key = api_key.read().strip()

INPUT_FILE = "llm_prompts.jsonl"
OUTPUT_FILE = "llm_scores.jsonl"
SAVE_EVERY = 50
MODEL = "gpt-4o"

# Load prompts
with open(INPUT_FILE, "r", encoding="utf-8") as f:
    prompts = [json.loads(line)["prompt"] for line in f]

# Load existing results if continuing
results = []
if os.path.exists(OUTPUT_FILE):
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        results = [json.loads(line) for line in f]
    start_index = len(results)
    print(f"Resuming from index {start_index}")
else:
    start_index = 0

# Prompt the model
def query_gpt(prompt):
    try:
        client = openai.OpenAI(api_key=api_key)
        response = client.chat.completions.create(
            model=MODEL,
            messages=[{"role": "user", 
                       "content": f"Rate the following text from 0 to 5: {prompt}"}],
            temperature=0.0,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print("Error:", e)
        return None

# Extract a number from output
def extract_score(response):
    try:
        score = float(response)
        return max(0.0, min(score, 5.0))  # Clamp between 0 and 5
    except:
        return None

# Process prompts
for i in range(start_index, len(prompts)):
    prompt = prompts[i]
    print(f"Processing {i}/{len(prompts)}")

    response = query_gpt(prompt)
    score = extract_score(response)

    results.append({
        "index": i,
        "score": score,
        "raw_response": response
    })

    # Save every N examples
    if i % SAVE_EVERY == 0 or i == len(prompts) - 1:
        with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
            for r in results:
                json.dump(r, f)
                f.write("\n")
        print(f"✅ Saved {len(results)} results to {OUTPUT_FILE}")

    # time.sleep(1.0)  # Respect rate limits

Resuming from index 4051
Processing 4051/10000
Processing 4052/10000
Processing 4053/10000
Processing 4054/10000
Processing 4055/10000
Processing 4056/10000
Processing 4057/10000
Processing 4058/10000
Processing 4059/10000
Processing 4060/10000
Processing 4061/10000
Processing 4062/10000
Processing 4063/10000
Processing 4064/10000
Processing 4065/10000
Processing 4066/10000
Processing 4067/10000
Processing 4068/10000
Processing 4069/10000
Processing 4070/10000
Processing 4071/10000
Processing 4072/10000
Processing 4073/10000
Processing 4074/10000
Processing 4075/10000
Processing 4076/10000
Processing 4077/10000
Processing 4078/10000
Processing 4079/10000
Processing 4080/10000
Processing 4081/10000
Processing 4082/10000
Processing 4083/10000
Processing 4084/10000
Processing 4085/10000
Processing 4086/10000
Processing 4087/10000
Processing 4088/10000
Processing 4089/10000
Processing 4090/10000
Processing 4091/10000
Processing 4092/10000
Processing 4093/10000
Processing 4094/10000
Process