# GRPO Training for Italian Exercise Generator

## Setup

In [1]:
# --- Cell 1: Setup and Imports ---

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Navigate to your project directory
# Make sure this path is correct for your Google Drive setup
import os
project_path = '/content/drive/MyDrive/Colab Notebooks/italian_teacher'
os.chdir(project_path)
print(f"Changed directory to: {os.getcwd()}")

# Install dependencies
!pip install -q transformers trl accelerate peft datasets spacy sentence-transformers bitsandbytes json5 openai tqdm nest_asyncio
!python -m spacy download it_core_news_sm

# Standard library imports
import json
import random
from getpass import getpass

# Third-party imports
import torch
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import GRPOConfig, GRPOTrainer

# Local module imports
from src.rl.multi_reward_async import create_async_multi_reward
from src.rl.prompt_formatter import format_prompt_with_chat_template
from src.rl.reward_function import ExerciseRewardFunction

# Environment setup
os.environ["WANDB_DISABLED"] = "true"

print("\n--- Environment Setup ---")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Changed directory to: /content/drive/MyDrive/Colab Notebooks/italian_teacher
Collecting it-core-news-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/it_core_news_sm-3.8.0/it_core_news_sm-3.8.0-py3-none-any.whl (13.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('it_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.

--- Environment Setup ---
PyTorch version: 2.8.0+cu126
CUDA available: True
GPU: NVIDIA A100-SXM4-80GB


In [2]:
# --- Cell 2: Configuration ---
# All training parameters are here for easy modification.

# --- General Configuration ---
USE_OPENAI = True
BASE_MODEL_PATH = "./models/italian_v8_grpo_round2"  # Input model for this training run
OUTPUT_DIR = "./models/TeacherPet_italian_grpo"           # Where the new model will be saved
NUM_SAMPLES = 2000                                # Number of training requests to use
RANDOM_SEED = 44                                  # Seed for reproducibility
DISABLED_SCORERS = ["fluency"]                    # List of scorers to disable

# --- GRPO Configuration ---
# This uses the same optimized settings you provided, with corrections.
grpo_config = GRPOConfig(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,

    # CORRECTED: A lower learning rate for stable fine-tuning.
    learning_rate=5e-6,  # 5e-6 is lower than the previous 9e-6.

    warmup_steps=50,
    logging_steps=5,
    save_steps=100,
    save_total_limit=3,
    bf16=True,
    remove_unused_columns=False,
    report_to="none",

    # GRPO-specific generation settings
    num_generations=4,
    max_completion_length=350,  # This is the primary controller for max new tokens.
    temperature=0.7,
    generation_batch_size=32,

    # Cleaned-up generation_kwargs
    generation_kwargs={
      "bos_token_id": 128000,
      "do_sample": True,
      # CORRECTED: Removed duplicate eos_token_id
      "eos_token_id": [128009, 128001],
      "pad_token_id": 128009,
      # REMOVED: temperature and max_new_tokens are controlled by top-level args.
      "top_p": 0.9,
      "padding_side": "left"
    }
)

print("✅ Configuration loaded and corrected.")


✅ Configuration loaded and corrected.


In [3]:
# --- Cell 3: Helper Functions ---

def configure_openai(use_openai: bool):
    """Configures the OpenAI API key if enabled."""
    if not use_openai:
        print("ℹ️ OpenAI usage is disabled. LLM-based scorers will be skipped.")
        return

    api_key = os.environ.get("OPENAI_API_KEY")
    if not api_key:
        print("OpenAI API key not found in environment variables.")
        try:
            api_key = getpass("Enter your OpenAI API key: ")
        except (IOError, EOFError):
            print("Could not read API key. Disabling OpenAI usage.")
            return

    if api_key:
        os.environ["OPENAI_API_KEY"] = api_key
        print("✅ OpenAI API key configured.")
    else:
        print("⚠️ No OpenAI API key provided. LLM-based scorers will be skipped.")

def load_training_data(tokenizer, num_samples: int, seed: int):
    """Loads or generates training requests and prepares the dataset."""
    requests_path = "src/rl/training_requests.json"
    if os.path.exists(requests_path):
        print(f"Loading existing training requests from {requests_path}...")
        with open(requests_path, "r") as f:
            training_requests = json.load(f)
    else:
        from src.rl.generate_training_requests import generate_training_requests
        print(f"Generating {num_samples} new training requests...")
        training_requests = generate_training_requests(
            num_requests=num_samples,
            output_path=requests_path
        )

    print(f"✅ Loaded {len(training_requests)} total training requests.")

    prompts = [
        format_prompt_with_chat_template(req, tokenizer, add_examples=True)
        for req in training_requests
    ]

    if len(prompts) > num_samples:
        print(f"Sampling {num_samples} requests for this run (seed={seed})...")
        random.seed(seed)
        random_indices = random.sample(range(len(prompts)), num_samples)
        prompts = [prompts[i] for i in random_indices]
        training_requests_subset = [training_requests[i] for i in random_indices]
    else:
        training_requests_subset = training_requests

    return Dataset.from_dict({
        "prompt": prompts,
        "request": training_requests_subset,
    })

print("✅ Helper functions defined.")


✅ Helper functions defined.


In [4]:
# --- Cell 4: Main Execution ---

print("=" * 80)
print("🚀 STARTING GRPO TRAINING RUN")
print("=" * 80)

# 1. Configure OpenAI
configure_openai(USE_OPENAI)

# 2. Load Model and Tokenizer
print(f"\nLoading base model from: {BASE_MODEL_PATH}")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, padding_side='left')
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"



model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_PATH,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
    use_cache=False,
)
model.gradient_checkpointing_enable()
print("✅ Model and tokenizer loaded.")
# CRITICAL FIX: Synchronize the model's config with the tokenizer's settings.
# This ensures the model's `generate` method uses the correct padding side.
model.config.pad_token_id = tokenizer.pad_token_id
model.config.padding_side = tokenizer.padding_side

# 3. Prepare Dataset and Reward Function
train_dataset = load_training_data(tokenizer, num_samples=NUM_SAMPLES, seed=RANDOM_SEED)

print("\nInitializing reward function...")
# Pass the concurrency limit directly to the main reward function
reward_fn_instance = ExerciseRewardFunction(
    device="cuda",
    disabled_scorers=DISABLED_SCORERS,
    concurrency_limit=20  # Explicitly set concurrency here
)
# The wrapper is now much simpler
reward_func = create_async_multi_reward(
    reward_fn_instance,
    use_openai=USE_OPENAI
)
print("✅ Reward function ready.")

# 4. Initialize Trainer
print("\nInitializing GRPO Trainer...")
trainer = GRPOTrainer(
    model=model,
    args=grpo_config,
    reward_funcs=reward_func,
    train_dataset=train_dataset,
    processing_class=tokenizer,
)
print("✅ GRPO Trainer initialized. Ready to start training!")

# 5. Start Training
print("\n" + "=" * 80)
print("🔥 Training begins! 🔥")
print("=" * 80)
trainer.train()
print("\n" + "=" * 80)
print("🎉 Training complete! 🎉")
print("=" * 80)

# 6. Save Final Model
print(f"\nSaving final model to {OUTPUT_DIR}...")
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print("✅ Model and tokenizer saved successfully.")


🚀 STARTING GRPO TRAINING RUN
OpenAI API key not found in environment variables.
Enter your OpenAI API key: ··········
✅ OpenAI API key configured.

Loading base model from: ./models/italian_v8_grpo_round2


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

✅ Model and tokenizer loaded.
Loading existing training requests from src/rl/training_requests.json...
✅ Loaded 2000 total training requests.

Initializing reward function...
Loading spaCy model: it_core_news_sm...
✅ spaCy model loaded
Reward function will use device: cuda
Initializing scorers...
  ✅ LLM scoring enabled for cefr_alignment (batch size: 10)
  ✅ LLM scoring enabled for grammar_correctness (batch size: 10)
  ✅ LLM scoring enabled for coherence (batch size: 10)
Loading sentence transformer for topic similarity...


The model is already on multiple devices. Skipping the move to device specified in `args`.


✅ Sentence transformer loaded in cuda
  ✅ LLM topic checking enabled (OpenAI API)
✅ Reward function initialized. Active scorers: ['json', 'quality', 'linguistic', 'cefr', 'grammar', 'coherence', 'topic']
   Disabled scorers: ['fluency']
✅ Reward function ready.

Initializing GRPO Trainer...
✅ GRPO Trainer initialized. Ready to start training!

🔥 Training begins! 🔥


`generation_config` default values have been modified to match model-specific defaults: {'max_length': 8192}. If this is not desired, please set these values explicitly.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (13.1s):
   Grammar   : min=0.0, max=100.0, avg=31.5
   Coherence : min=16.7, max=100.0, avg=61.7
   Topic     : min=20.0, max=100.0, avg=72.1
   Quality   : min=10.0, max=66.7, avg=32.6
   Diversity : min=20.0, max=100.0, avg=65.0
   TOTAL     : min=0.000, max=61.000, avg=19.104




Step,Training Loss
5,-0.012
10,-0.026
15,-0.0026
20,0.0068
25,-0.0213
30,-0.0609
35,-0.123
40,-0.0337
45,-0.004
50,-0.0024



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (17.0s):
   Grammar   : min=0.0, max=100.0, avg=58.1
   Coherence : min=50.0, max=100.0, avg=67.4
   Topic     : min=0.0, max=100.0, avg=73.2
   Quality   : min=10.0, max=50.0, avg=27.8
   Diversity : min=25.3, max=100.0, avg=63.0
   TOTAL     : min=0.000, max=77.667, avg=24.827





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (27.3s):
   Grammar   : min=0.0, max=100.0, avg=63.2
   Coherence : min=50.0, max=100.0, avg=75.9
   Topic     : min=32.0, max=100.0, avg=73.3
   Quality   : min=10.0, max=50.0, avg=25.3
   Diversity : min=12.0, max=80.0, avg=49.6
   TOTAL     : min=0.000, max=69.000, avg=17.887





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (10.4s):
   Grammar   : min=0.0, max=100.0, avg=28.3
   Coherence : min=50.0, max=100.0, avg=62.0
   Topic     : min=10.0, max=100.0, avg=57.0
   Quality   : min=10.0, max=50.0, avg=25.6
   Diversity : min=60.0, max=100.0, avg=72.9
   TOTAL     : min=0.000, max=60.750, avg=15.217


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (12.3s):
   Grammar   : min=0.0, max=100.0, avg=61.2
   Coherence : min=0.0, max=87.5, avg=50.3
   Topic     : min=32.5, max=100.0, avg=77.2
   Quality   : min=10.0, max=50.0, avg=28.1
   Diversity : min=20.0, max=73.3, avg=51.1
   TOTAL     : min=0.000, max=68.250, avg=16.061





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (20.6s):
   Grammar   : min=0.0, max=100.0, avg=37.9
   Coherence : min=37.5, max=100.0, avg=67.1
   Topic     : min=10.0, max=100.0, avg=75.5
   Quality   : min=20.0, max=90.0, avg=44.1
   Diversity : min=15.0, max=86.7, avg=58.1
   TOTAL     : min=0.000, max=71.875, avg=29.128





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (7.4s):
   Grammar   : min=0.0, max=100.0, avg=64.6
   Coherence : min=0.0, max=83.3, avg=54.2
   Topic     : min=30.0, max=60.0, avg=45.8
   Quality   : min=5.0, max=53.3, avg=19.4
   Diversity : min=20.0, max=86.7, avg=50.8
   TOTAL     : min=-9.500, max=62.333, avg=7.406





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (13.4s):
   Grammar   : min=0.0, max=100.0, avg=45.4
   Coherence : min=20.0, max=100.0, avg=60.9
   Topic     : min=44.0, max=100.0, avg=83.6
   Quality   : min=10.0, max=50.0, avg=38.1
   Diversity : min=30.0, max=60.0, avg=42.5
   TOTAL     : min=0.000, max=77.700, avg=19.948





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (7.0s):
   Grammar   : min=0.0, max=48.0, avg=19.5
   Coherence : min=60.0, max=90.0, avg=72.5
   Topic     : min=74.0, max=96.0, avg=88.0
   Quality   : min=10.0, max=50.0, avg=34.0
   Diversity : min=24.0, max=100.0, avg=52.0
   TOTAL     : min=0.000, max=29.800, avg=2.906





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (12.0s):
   Grammar   : min=0.0, max=100.0, avg=74.2
   Coherence : min=43.3, max=100.0, avg=58.2
   Topic     : min=13.3, max=100.0, avg=71.4
   Quality   : min=10.0, max=50.0, avg=36.4
   Diversity : min=30.0, max=100.0, avg=67.8
   TOTAL     : min=0.000, max=74.000, avg=26.202





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (18.5s):
   Grammar   : min=0.0, max=100.0, avg=62.5
   Coherence : min=25.0, max=100.0, avg=71.3
   Topic     : min=20.0, max=100.0, avg=67.0
   Quality   : min=5.0, max=50.0, avg=40.4
   Diversity : min=15.0, max=80.0, avg=50.8
   TOTAL     : min=0.000, max=65.000, avg=24.362





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (15.3s):
   Grammar   : min=50.0, max=100.0, avg=86.7
   Coherence : min=50.0, max=100.0, avg=75.5
   Topic     : min=10.0, max=100.0, avg=68.5
   Quality   : min=10.0, max=50.0, avg=37.8
   Diversity : min=36.0, max=88.0, avg=62.5
   TOTAL     : min=0.000, max=77.800, avg=23.807





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (13.0s):
   Grammar   : min=0.0, max=100.0, avg=50.3
   Coherence : min=0.0, max=75.0, avg=55.0
   Topic     : min=15.0, max=93.3, avg=50.4
   Quality   : min=10.0, max=50.0, avg=32.8
   Diversity : min=15.0, max=80.0, avg=46.2
   TOTAL     : min=0.000, max=58.333, avg=12.293


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (6.5s):
   Grammar   : min=0.0, max=100.0, avg=20.0
   Coherence : min=50.0, max=100.0, avg=77.7
   Topic     : min=0.0, max=100.0, avg=45.7
   Quality   : min=10.0, max=50.0, avg=36.7
   Diversity : min=20.0, max=70.0, avg=52.0
   TOTAL     : min=0.000, max=59.000, avg=8.964





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (14.6s):
   Grammar   : min=0.0, max=100.0, avg=60.0
   Coherence : min=0.0, max=100.0, avg=62.5
   Topic     : min=72.0, max=100.0, avg=95.0
   Quality   : min=10.0, max=50.0, avg=31.2
   Diversity : min=24.0, max=68.0, avg=49.0
   TOTAL     : min=0.000, max=67.200, avg=21.122





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (17.8s):
   Grammar   : min=0.0, max=100.0, avg=55.2
   Coherence : min=20.0, max=100.0, avg=70.0
   Topic     : min=20.0, max=100.0, avg=72.5
   Quality   : min=10.0, max=50.0, avg=29.8
   Diversity : min=24.0, max=80.0, avg=52.4
   TOTAL     : min=0.000, max=76.200, avg=23.347


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (11.0s):
   Grammar   : min=0.0, max=100.0, avg=55.6
   Coherence : min=35.0, max=100.0, avg=62.2
   Topic     : min=37.5, max=95.0, avg=75.0
   Quality   : min=10.0, max=50.0, avg=40.0
   Diversity : min=35.0, max=80.0, avg=65.0
   TOTAL     : min=0.000, max=77.500, avg=14.340


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (9.1s):
   Grammar   : min=0.0, max=100.0, avg=30.9
   Coherence : min=37.5, max=100.0, avg=74.1
   Topic     : min=57.5, max=100.0, avg=85.0
   Quality   : min=30.0, max=50.0, avg=39.8
   Diversity : min=15.0, max=60.0, avg=41.2
   TOTAL     : min=0.000, max=32.750, avg=5.406





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (8.3s):
   Grammar   : min=0.0, max=100.0, avg=46.9
   Coherence : min=23.3, max=100.0, avg=57.2
   Topic     : min=20.0, max=93.3, avg=59.2
   Quality   : min=10.0, max=50.0, avg=31.6
   Diversity : min=30.0, max=100.0, avg=65.4
   TOTAL     : min=0.000, max=63.667, avg=16.284





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (10.3s):
   Grammar   : min=25.0, max=100.0, avg=84.4
   Coherence : min=46.0, max=100.0, avg=64.5
   Topic     : min=82.0, max=100.0, avg=93.5
   Quality   : min=10.0, max=50.0, avg=27.0
   Diversity : min=50.0, max=88.0, avg=75.8
   TOTAL     : min=0.000, max=74.250, avg=15.836





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (5.3s):
   Grammar   : min=0.0, max=66.7, avg=16.7
   Coherence : min=50.0, max=83.3, avg=70.8
   Topic     : min=100.0, max=100.0, avg=100.0
   Quality   : min=23.3, max=53.3, avg=40.8
   Diversity : min=53.3, max=86.7, avg=66.7
   TOTAL     : min=0.000, max=60.667, avg=6.750


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (4.7s):
   Grammar   : min=0.0, max=83.3, avg=33.3
   Coherence : min=66.7, max=83.3, avg=75.0
   Topic     : min=13.3, max=46.7, avg=25.0
   Quality   : min=10.0, max=66.7, avg=40.8
   Diversity : min=20.0, max=100.0, avg=60.0
   TOTAL     : min=0.000, max=36.167, avg=3.125





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (10.3s):
   Grammar   : min=0.0, max=100.0, avg=40.8
   Coherence : min=25.0, max=80.0, avg=55.6
   Topic     : min=10.0, max=100.0, avg=69.4
   Quality   : min=10.0, max=50.0, avg=37.2
   Diversity : min=30.0, max=73.3, avg=53.6
   TOTAL     : min=0.000, max=54.667, avg=11.489





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (11.7s):
   Grammar   : min=0.0, max=100.0, avg=59.2
   Coherence : min=33.3, max=100.0, avg=74.6
   Topic     : min=30.0, max=100.0, avg=81.7
   Quality   : min=10.0, max=66.7, avg=43.0
   Diversity : min=20.0, max=100.0, avg=62.9
   TOTAL     : min=0.000, max=70.500, avg=23.833





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (8.4s):
   Grammar   : min=0.0, max=100.0, avg=46.7
   Coherence : min=33.3, max=100.0, avg=79.0
   Topic     : min=20.0, max=100.0, avg=64.6
   Quality   : min=10.0, max=50.0, avg=35.8
   Diversity : min=20.0, max=86.7, avg=44.7
   TOTAL     : min=0.000, max=57.917, avg=12.370





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (8.9s):
   Grammar   : min=16.7, max=100.0, avg=84.9
   Coherence : min=0.0, max=100.0, avg=62.2
   Topic     : min=80.0, max=100.0, avg=95.0
   Quality   : min=10.0, max=65.0, avg=37.2
   Diversity : min=40.0, max=100.0, avg=74.0
   TOTAL     : min=0.000, max=77.500, avg=25.525





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (16.2s):
   Grammar   : min=0.0, max=100.0, avg=56.4
   Coherence : min=0.0, max=100.0, avg=69.0
   Topic     : min=5.0, max=100.0, avg=82.0
   Quality   : min=6.7, max=100.0, avg=40.8
   Diversity : min=12.0, max=100.0, avg=58.8
   TOTAL     : min=0.000, max=78.600, avg=27.306





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (11.5s):
   Grammar   : min=33.3, max=100.0, avg=79.8
   Coherence : min=40.0, max=100.0, avg=75.2
   Topic     : min=0.0, max=100.0, avg=67.3
   Quality   : min=10.0, max=62.5, avg=44.2
   Diversity : min=12.0, max=100.0, avg=64.3
   TOTAL     : min=0.000, max=75.667, avg=15.660





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (10.9s):
   Grammar   : min=36.0, max=100.0, avg=85.3
   Coherence : min=20.0, max=90.0, avg=59.2
   Topic     : min=15.0, max=80.0, avg=42.0
   Quality   : min=10.0, max=60.0, avg=33.5
   Diversity : min=56.0, max=100.0, avg=78.8
   TOTAL     : min=0.000, max=68.000, avg=19.078


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (14.4s):
   Grammar   : min=0.0, max=100.0, avg=59.7
   Coherence : min=50.0, max=92.5, avg=65.2
   Topic     : min=4.0, max=97.5, avg=39.2
   Quality   : min=28.0, max=72.5, avg=47.3
   Diversity : min=56.0, max=100.0, avg=71.5
   TOTAL     : min=0.000, max=66.900, avg=19.137





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (7.8s):
   Grammar   : min=0.0, max=100.0, avg=24.2
   Coherence : min=50.0, max=100.0, avg=65.9
   Topic     : min=40.0, max=100.0, avg=66.1
   Quality   : min=10.0, max=66.7, avg=45.9
   Diversity : min=30.0, max=86.7, avg=54.1
   TOTAL     : min=0.000, max=61.967, avg=7.749





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (7.8s):
   Grammar   : min=0.0, max=100.0, avg=61.2
   Coherence : min=50.0, max=100.0, avg=72.2
   Topic     : min=0.0, max=100.0, avg=75.4
   Quality   : min=50.0, max=75.0, avg=59.2
   Diversity : min=30.0, max=100.0, avg=60.6
   TOTAL     : min=0.000, max=81.125, avg=16.547





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (20.5s):
   Grammar   : min=0.0, max=100.0, avg=43.5
   Coherence : min=50.0, max=100.0, avg=74.0
   Topic     : min=6.7, max=100.0, avg=66.8
   Quality   : min=27.5, max=75.0, avg=55.4
   Diversity : min=56.7, max=100.0, avg=82.7
   TOTAL     : min=0.000, max=77.700, avg=46.022





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (11.8s):
   Grammar   : min=0.0, max=100.0, avg=20.0
   Coherence : min=0.0, max=100.0, avg=67.9
   Topic     : min=6.7, max=100.0, avg=72.2
   Quality   : min=10.0, max=100.0, avg=53.4
   Diversity : min=30.0, max=100.0, avg=86.1
   TOTAL     : min=0.000, max=68.750, avg=32.680


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (16.0s):
   Grammar   : min=0.0, max=100.0, avg=49.4
   Coherence : min=40.0, max=95.0, avg=65.1
   Topic     : min=16.0, max=100.0, avg=71.6
   Quality   : min=34.0, max=80.0, avg=55.2
   Diversity : min=60.0, max=100.0, avg=84.9
   TOTAL     : min=0.000, max=74.700, avg=31.302





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (18.2s):
   Grammar   : min=12.5, max=100.0, avg=64.9
   Coherence : min=50.0, max=100.0, avg=75.9
   Topic     : min=42.5, max=100.0, avg=82.1
   Quality   : min=32.5, max=75.0, avg=57.9
   Diversity : min=45.0, max=100.0, avg=82.6
   TOTAL     : min=0.000, max=82.375, avg=44.210





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (19.9s):
   Grammar   : min=0.0, max=100.0, avg=33.2
   Coherence : min=50.0, max=100.0, avg=70.6
   Topic     : min=4.0, max=100.0, avg=71.0
   Quality   : min=27.0, max=81.7, avg=62.2
   Diversity : min=56.0, max=100.0, avg=83.3
   TOTAL     : min=0.000, max=76.833, avg=39.514





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (22.3s):
   Grammar   : min=0.0, max=100.0, avg=59.4
   Coherence : min=37.5, max=100.0, avg=72.1
   Topic     : min=15.0, max=100.0, avg=81.6
   Quality   : min=27.5, max=80.0, avg=59.9
   Diversity : min=56.0, max=100.0, avg=87.8
   TOTAL     : min=0.000, max=84.000, avg=38.755





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (27.8s):
   Grammar   : min=0.0, max=100.0, avg=46.9
   Coherence : min=47.5, max=93.3, avg=67.0
   Topic     : min=0.0, max=100.0, avg=76.8
   Quality   : min=36.0, max=75.0, avg=58.7
   Diversity : min=60.0, max=100.0, avg=86.5
   TOTAL     : min=29.333, max=78.375, avg=59.399





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (17.7s):
   Grammar   : min=0.0, max=100.0, avg=61.3
   Coherence : min=37.5, max=100.0, avg=72.3
   Topic     : min=45.0, max=100.0, avg=81.3
   Quality   : min=30.0, max=80.0, avg=61.2
   Diversity : min=70.0, max=100.0, avg=93.6
   TOTAL     : min=0.000, max=86.500, avg=40.959





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (17.6s):
   Grammar   : min=25.0, max=100.0, avg=72.7
   Coherence : min=40.0, max=83.3, avg=69.9
   Topic     : min=32.0, max=100.0, avg=71.0
   Quality   : min=30.0, max=83.3, avg=61.1
   Diversity : min=60.0, max=100.0, avg=86.1
   TOTAL     : min=0.000, max=79.000, avg=37.547





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (17.6s):
   Grammar   : min=0.0, max=100.0, avg=53.4
   Coherence : min=0.0, max=100.0, avg=67.2
   Topic     : min=15.0, max=100.0, avg=50.9
   Quality   : min=10.0, max=100.0, avg=60.8
   Diversity : min=30.0, max=100.0, avg=88.1
   TOTAL     : min=0.000, max=67.667, avg=43.843





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (15.7s):
   Grammar   : min=0.0, max=100.0, avg=51.9
   Coherence : min=45.0, max=100.0, avg=73.6
   Topic     : min=0.0, max=100.0, avg=66.5
   Quality   : min=40.0, max=87.5, avg=62.2
   Diversity : min=30.0, max=100.0, avg=83.5
   TOTAL     : min=0.000, max=85.500, avg=34.740





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (24.7s):
   Grammar   : min=0.0, max=100.0, avg=44.5
   Coherence : min=50.0, max=90.0, avg=69.5
   Topic     : min=30.0, max=100.0, avg=81.9
   Quality   : min=53.3, max=87.5, avg=74.3
   Diversity : min=76.0, max=100.0, avg=92.5
   TOTAL     : min=38.333, max=79.000, avg=60.486





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (19.6s):
   Grammar   : min=16.7, max=100.0, avg=65.5
   Coherence : min=42.5, max=90.0, avg=74.2
   Topic     : min=13.3, max=100.0, avg=81.9
   Quality   : min=42.5, max=87.5, avg=73.9
   Diversity : min=80.0, max=100.0, avg=98.1
   TOTAL     : min=0.000, max=80.250, avg=50.688





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (9.5s):
   Grammar   : min=20.0, max=100.0, avg=68.8
   Coherence : min=60.0, max=100.0, avg=76.7
   Topic     : min=16.0, max=70.0, avg=40.7
   Quality   : min=40.0, max=90.0, avg=68.6
   Diversity : min=70.0, max=100.0, avg=91.5
   TOTAL     : min=0.000, max=68.200, avg=18.429





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (15.5s):
   Grammar   : min=0.0, max=100.0, avg=35.5
   Coherence : min=50.0, max=100.0, avg=74.3
   Topic     : min=0.0, max=100.0, avg=64.8
   Quality   : min=44.0, max=100.0, avg=66.0
   Diversity : min=80.0, max=100.0, avg=91.7
   TOTAL     : min=0.000, max=63.000, avg=35.961





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (19.9s):
   Grammar   : min=0.0, max=100.0, avg=45.7
   Coherence : min=37.5, max=90.0, avg=67.2
   Topic     : min=0.0, max=100.0, avg=49.0
   Quality   : min=45.0, max=90.0, avg=71.4
   Diversity : min=60.0, max=100.0, avg=89.5
   TOTAL     : min=0.000, max=78.100, avg=42.973





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (20.0s):
   Grammar   : min=0.0, max=100.0, avg=57.0
   Coherence : min=33.3, max=90.0, avg=69.8
   Topic     : min=10.0, max=100.0, avg=62.6
   Quality   : min=52.5, max=90.0, avg=73.2
   Diversity : min=85.0, max=100.0, avg=98.0
   TOTAL     : min=0.000, max=79.375, avg=58.757





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (22.3s):
   Grammar   : min=0.0, max=100.0, avg=47.5
   Coherence : min=50.0, max=100.0, avg=74.1
   Topic     : min=0.0, max=98.0, avg=63.6
   Quality   : min=54.0, max=87.5, avg=77.4
   Diversity : min=76.0, max=100.0, avg=97.7
   TOTAL     : min=0.000, max=77.750, avg=52.469





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (21.2s):
   Grammar   : min=0.0, max=100.0, avg=56.6
   Coherence : min=0.0, max=100.0, avg=59.8
   Topic     : min=0.0, max=100.0, avg=64.7
   Quality   : min=10.0, max=100.0, avg=70.3
   Diversity : min=30.0, max=100.0, avg=81.3
   TOTAL     : min=0.000, max=82.333, avg=43.933


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (21.5s):
   Grammar   : min=25.0, max=100.0, avg=67.1
   Coherence : min=25.0, max=100.0, avg=75.2
   Topic     : min=4.0, max=100.0, avg=64.5
   Quality   : min=10.0, max=100.0, avg=71.8
   Diversity : min=70.0, max=100.0, avg=92.1
   TOTAL     : min=0.000, max=88.500, avg=53.523





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (21.5s):
   Grammar   : min=28.0, max=100.0, avg=74.6
   Coherence : min=33.3, max=100.0, avg=70.6
   Topic     : min=8.0, max=100.0, avg=53.8
   Quality   : min=50.0, max=90.0, avg=69.4
   Diversity : min=76.0, max=100.0, avg=98.0
   TOTAL     : min=0.000, max=85.833, avg=55.822





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (27.4s):
   Grammar   : min=0.0, max=100.0, avg=60.0
   Coherence : min=37.5, max=100.0, avg=73.7
   Topic     : min=6.7, max=100.0, avg=74.2
   Quality   : min=10.0, max=90.0, avg=75.5
   Diversity : min=85.0, max=100.0, avg=98.6
   TOTAL     : min=0.000, max=79.375, avg=55.322





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (28.5s):
   Grammar   : min=0.0, max=100.0, avg=55.6
   Coherence : min=0.0, max=100.0, avg=72.2
   Topic     : min=15.0, max=100.0, avg=71.5
   Quality   : min=45.0, max=100.0, avg=72.4
   Diversity : min=30.0, max=100.0, avg=88.8
   TOTAL     : min=24.500, max=84.250, avg=62.392





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (16.2s):
   Grammar   : min=0.0, max=100.0, avg=56.8
   Coherence : min=50.0, max=100.0, avg=68.1
   Topic     : min=60.0, max=100.0, avg=89.1
   Quality   : min=53.3, max=100.0, avg=77.3
   Diversity : min=60.0, max=100.0, avg=98.1
   TOTAL     : min=0.000, max=82.000, avg=39.793





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (27.6s):
   Grammar   : min=0.0, max=100.0, avg=66.5
   Coherence : min=50.0, max=100.0, avg=77.9
   Topic     : min=13.3, max=100.0, avg=81.8
   Quality   : min=10.0, max=100.0, avg=77.3
   Diversity : min=30.0, max=100.0, avg=93.0
   TOTAL     : min=0.000, max=85.333, avg=56.233





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (16.8s):
   Grammar   : min=0.0, max=100.0, avg=57.7
   Coherence : min=50.0, max=100.0, avg=73.5
   Topic     : min=20.0, max=100.0, avg=81.1
   Quality   : min=10.0, max=100.0, avg=70.8
   Diversity : min=70.0, max=100.0, avg=94.0
   TOTAL     : min=0.000, max=86.333, avg=48.212





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




⏳ Step 3/3: Computing CPU-bound rewards and aggregating results...


                                                           


🎯 Reward calculation complete (24.7s):
   Grammar   : min=16.7, max=100.0, avg=59.4
   Coherence : min=33.3, max=100.0, avg=71.6
   Topic     : min=23.3, max=100.0, avg=84.9
   Quality   : min=66.7, max=100.0, avg=84.1
   Diversity : min=80.0, max=100.0, avg=96.4
   TOTAL     : min=0.000, max=88.500, avg=41.919





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ gpt-4o-mini failed (Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}). This batch will use neutral scores after retries.
  ⚠️ Unrecoverable error in cefr_alignment scorer: LLM model gpt-4o-mini failed to return a response.
  ⚠️ gpt-4o-mini failed (Error code: 429 - {'error': {'message': 'Rate limit reached for g

                                                           


🎯 Reward calculation complete (64.5s):
   Grammar   : min=0.0, max=100.0, avg=64.1
   Coherence : min=40.0, max=100.0, avg=69.7
   Topic     : min=0.0, max=100.0, avg=63.4
   Quality   : min=32.5, max=100.0, avg=74.2
   Diversity : min=80.0, max=100.0, avg=99.4
   TOTAL     : min=0.000, max=93.125, avg=57.172





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (96.3s):
   Grammar   : min=0.0, max=100.0, avg=59.0
   Coherence : min=50.0, max=100.0, avg=68.4
   Topic     : min=5.0, max=94.0, avg=52.3
   Quality   : min=36.0, max=100.0, avg=83.1
   Diversity : min=85.0, max=100.0, avg=98.6
   TOTAL     : min=0.000, max=86.600, avg=57.192





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (75.2s):
   Grammar   : min=0.0, max=80.0, avg=40.3
   Coherence : min=40.0, max=100.0, avg=64.1
   Topic     : min=15.0, max=100.0, avg=68.2
   Quality   : min=10.0, max=100.0, avg=75.7
   Diversity : min=30.0, max=100.0, avg=84.4
   TOTAL     : min=0.000, max=64.333, avg=30.075





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (95.3s):
   Grammar   : min=0.0, max=100.0, avg=51.4
   Coherence : min=30.0, max=92.0, avg=63.4
   Topic     : min=16.0, max=100.0, avg=72.9
   Quality   : min=28.0, max=100.0, avg=78.5
   Diversity : min=80.0, max=100.0, avg=97.3
   TOTAL     : min=0.000, max=78.000, avg=41.857





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (116.0s):
   Grammar   : min=0.0, max=100.0, avg=59.1
   Coherence : min=50.0, max=100.0, avg=65.4
   Topic     : min=30.0, max=100.0, avg=75.0
   Quality   : min=50.0, max=100.0, avg=84.4
   Diversity : min=80.0, max=100.0, avg=98.3
   TOTAL     : min=0.000, max=90.000, avg=57.821





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (98.4s):
   Grammar   : min=30.0, max=100.0, avg=55.0
   Coherence : min=33.3, max=83.3, avg=58.3
   Topic     : min=13.3, max=100.0, avg=73.2
   Quality   : min=53.3, max=100.0, avg=73.3
   Diversity : min=80.0, max=100.0, avg=96.6
   TOTAL     : min=0.000, max=76.000, avg=51.277





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (96.0s):
   Grammar   : min=0.0, max=100.0, avg=56.8
   Coherence : min=50.0, max=100.0, avg=68.1
   Topic     : min=40.0, max=100.0, avg=73.8
   Quality   : min=10.0, max=100.0, avg=84.9
   Diversity : min=30.0, max=100.0, avg=89.1
   TOTAL     : min=0.000, max=75.500, avg=41.467





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (94.3s):
   Grammar   : min=0.0, max=100.0, avg=57.1
   Coherence : min=35.0, max=100.0, avg=66.7
   Topic     : min=10.0, max=100.0, avg=70.2
   Quality   : min=62.5, max=100.0, avg=83.8
   Diversity : min=80.0, max=100.0, avg=97.6
   TOTAL     : min=0.000, max=88.000, avg=44.721





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (113.0s):
   Grammar   : min=0.0, max=100.0, avg=49.2
   Coherence : min=50.0, max=100.0, avg=66.1
   Topic     : min=14.0, max=100.0, avg=76.7
   Quality   : min=50.0, max=100.0, avg=78.5
   Diversity : min=60.0, max=100.0, avg=91.8
   TOTAL     : min=40.667, max=80.833, avg=59.848





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (104.0s):
   Grammar   : min=0.0, max=100.0, avg=56.9
   Coherence : min=27.5, max=100.0, avg=66.5
   Topic     : min=10.0, max=100.0, avg=70.7
   Quality   : min=50.0, max=100.0, avg=82.1
   Diversity : min=30.0, max=100.0, avg=86.2
   TOTAL     : min=0.000, max=78.417, avg=47.047





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (69.5s):
   Grammar   : min=25.0, max=100.0, avg=62.1
   Coherence : min=50.0, max=100.0, avg=68.4
   Topic     : min=6.7, max=100.0, avg=66.2
   Quality   : min=40.0, max=100.0, avg=80.3
   Diversity : min=60.0, max=100.0, avg=86.0
   TOTAL     : min=0.000, max=77.100, avg=34.144





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (89.5s):
   Grammar   : min=38.0, max=100.0, avg=67.5
   Coherence : min=50.0, max=100.0, avg=71.3
   Topic     : min=4.0, max=100.0, avg=70.2
   Quality   : min=50.0, max=100.0, avg=82.1
   Diversity : min=80.0, max=100.0, avg=94.5
   TOTAL     : min=0.000, max=87.333, avg=56.855





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (89.2s):
   Grammar   : min=0.0, max=100.0, avg=61.9
   Coherence : min=50.0, max=100.0, avg=72.8
   Topic     : min=15.0, max=100.0, avg=64.8
   Quality   : min=50.0, max=100.0, avg=82.9
   Diversity : min=60.0, max=100.0, avg=84.9
   TOTAL     : min=0.000, max=82.000, avg=47.741





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (111.7s):
   Grammar   : min=0.0, max=100.0, avg=58.9
   Coherence : min=50.0, max=100.0, avg=67.0
   Topic     : min=5.0, max=100.0, avg=75.0
   Quality   : min=62.5, max=100.0, avg=82.1
   Diversity : min=80.0, max=100.0, avg=95.7
   TOTAL     : min=55.833, max=84.700, avg=68.379





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (69.6s):
   Grammar   : min=37.5, max=100.0, avg=75.2
   Coherence : min=50.0, max=100.0, avg=75.6
   Topic     : min=35.0, max=100.0, avg=79.0
   Quality   : min=62.5, max=100.0, avg=90.4
   Diversity : min=60.0, max=100.0, avg=91.4
   TOTAL     : min=0.000, max=91.167, avg=46.534





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (86.5s):
   Grammar   : min=0.0, max=100.0, avg=60.9
   Coherence : min=50.0, max=87.5, avg=69.1
   Topic     : min=20.0, max=100.0, avg=69.9
   Quality   : min=65.0, max=100.0, avg=85.0
   Diversity : min=60.0, max=100.0, avg=92.4
   TOTAL     : min=0.000, max=79.625, avg=42.034





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (117.7s):
   Grammar   : min=0.0, max=100.0, avg=69.1
   Coherence : min=46.7, max=100.0, avg=75.4
   Topic     : min=22.0, max=100.0, avg=74.0
   Quality   : min=50.0, max=100.0, avg=83.4
   Diversity : min=80.0, max=100.0, avg=97.5
   TOTAL     : min=40.333, max=92.375, avg=69.274





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (121.6s):
   Grammar   : min=37.5, max=100.0, avg=55.2
   Coherence : min=50.0, max=87.5, avg=54.1
   Topic     : min=45.0, max=100.0, avg=78.8
   Quality   : min=52.5, max=100.0, avg=78.4
   Diversity : min=80.0, max=100.0, avg=97.9
   TOTAL     : min=0.000, max=82.875, avg=44.659





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (58.4s):
   Grammar   : min=33.3, max=80.0, avg=53.2
   Coherence : min=50.0, max=75.0, avg=54.2
   Topic     : min=32.5, max=100.0, avg=63.9
   Quality   : min=40.0, max=87.5, avg=70.8
   Diversity : min=80.0, max=100.0, avg=96.7
   TOTAL     : min=0.000, max=63.125, avg=20.400





⏳ Step 1/3: Parsing 32 JSON completions...


                                                              

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (86.7s):
   Grammar   : min=0.0, max=100.0, avg=49.9
   Coherence : min=50.0, max=94.0, avg=54.3
   Topic     : min=30.0, max=100.0, avg=77.2
   Quality   : min=62.5, max=100.0, avg=83.7
   Diversity : min=80.0, max=100.0, avg=98.2
   TOTAL     : min=0.000, max=82.667, avg=32.701





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (127.5s):
   Grammar   : min=22.5, max=100.0, avg=58.4
   Coherence : min=50.0, max=90.0, avg=61.7
   Topic     : min=42.0, max=100.0, avg=74.6
   Quality   : min=52.5, max=100.0, avg=83.5
   Diversity : min=85.0, max=100.0, avg=98.5
   TOTAL     : min=0.000, max=80.700, avg=62.060





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (131.2s):
   Grammar   : min=0.0, max=100.0, avg=53.0
   Coherence : min=50.0, max=90.0, avg=60.6
   Topic     : min=62.0, max=100.0, avg=78.9
   Quality   : min=40.0, max=100.0, avg=85.8
   Diversity : min=80.0, max=100.0, avg=97.6
   TOTAL     : min=0.000, max=81.625, avg=52.034





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (108.1s):
   Grammar   : min=33.3, max=92.0, avg=51.5
   Coherence : min=50.0, max=100.0, avg=56.5
   Topic     : min=20.0, max=100.0, avg=78.6
   Quality   : min=10.0, max=100.0, avg=87.6
   Diversity : min=85.0, max=100.0, avg=98.8
   TOTAL     : min=0.000, max=89.200, avg=46.167





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (101.3s):
   Grammar   : min=47.5, max=100.0, avg=60.4
   Coherence : min=50.0, max=87.5, avg=57.9
   Topic     : min=13.3, max=100.0, avg=79.1
   Quality   : min=65.0, max=100.0, avg=84.8
   Diversity : min=80.0, max=100.0, avg=96.9
   TOTAL     : min=0.000, max=85.833, avg=54.878





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (120.5s):
   Grammar   : min=16.7, max=100.0, avg=54.1
   Coherence : min=50.0, max=100.0, avg=54.7
   Topic     : min=32.5, max=85.0, avg=66.7
   Quality   : min=54.0, max=100.0, avg=80.8
   Diversity : min=80.0, max=100.0, avg=95.3
   TOTAL     : min=0.000, max=73.800, avg=53.617





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (118.2s):
   Grammar   : min=0.0, max=100.0, avg=50.9
   Coherence : min=50.0, max=100.0, avg=62.8
   Topic     : min=32.5, max=94.0, avg=70.9
   Quality   : min=65.0, max=100.0, avg=81.9
   Diversity : min=88.0, max=100.0, avg=97.9
   TOTAL     : min=0.000, max=81.900, avg=44.056





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (107.3s):
   Grammar   : min=25.0, max=88.0, avg=55.3
   Coherence : min=50.0, max=87.5, avg=55.8
   Topic     : min=26.0, max=100.0, avg=69.6
   Quality   : min=40.0, max=100.0, avg=76.3
   Diversity : min=80.0, max=100.0, avg=96.6
   TOTAL     : min=0.000, max=86.500, avg=52.101


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (91.5s):
   Grammar   : min=16.7, max=100.0, avg=53.0
   Coherence : min=47.5, max=90.0, avg=55.1
   Topic     : min=20.0, max=100.0, avg=71.1
   Quality   : min=50.0, max=100.0, avg=79.4
   Diversity : min=70.0, max=100.0, avg=98.0
   TOTAL     : min=0.000, max=77.200, avg=42.726





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (112.3s):
   Grammar   : min=0.0, max=100.0, avg=58.5
   Coherence : min=50.0, max=100.0, avg=64.7
   Topic     : min=20.0, max=100.0, avg=69.1
   Quality   : min=42.0, max=100.0, avg=71.8
   Diversity : min=76.0, max=100.0, avg=94.3
   TOTAL     : min=0.000, max=80.000, avg=52.316





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (121.4s):
   Grammar   : min=26.7, max=100.0, avg=57.6
   Coherence : min=50.0, max=87.5, avg=57.2
   Topic     : min=6.7, max=100.0, avg=70.7
   Quality   : min=53.3, max=100.0, avg=80.7
   Diversity : min=70.0, max=100.0, avg=93.7
   TOTAL     : min=0.000, max=87.250, avg=49.980





⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}
  ⚠️ LLM topic check failed: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-relcZrNpLOM7vNfjCsrTsokn on requests per day (RPD): Limit 10000, Used 10000, Requested 1. Please try again in 8.64s

                                                           


🎯 Reward calculation complete (116.6s):
   Grammar   : min=36.0, max=62.5, avg=50.5
   Coherence : min=50.0, max=90.0, avg=57.0
   Topic     : min=27.5, max=100.0, avg=79.3
   Quality   : min=42.5, max=100.0, avg=72.0
   Diversity : min=80.0, max=100.0, avg=97.8
   TOTAL     : min=0.000, max=73.000, avg=47.783


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



⏳ Step 1/3: Parsing 32 JSON completions...


                                                    

⏳ Step 2/3: Scoring 32 completions with batched reward function...




KeyboardInterrupt: 