In [None]:
import os
import json
import glob
from tqdm.notebook import tqdm

import torch
from transformers import (
    AutoModelForCausalLM, 
    AutoModelForCausalLM as BaseCausalLM,
    AutoTokenizer, 
    BitsAndBytesConfig, 
    pipeline
)
from peft import PeftModel
from datasets import Dataset
from trl import PPOConfig, PPOTrainer, AutoModelForCausalLMWithValueHead

In [2]:
# Load Hugging Face API token from file and set it as an environment variable.
keys_path = "../keys.json"
with open(keys_path, "r") as f:
    keys = json.load(f)
os.environ["HF_TOKEN"] = keys["huggingface_access_token"]

print("Keys loaded successfully.")


Keys loaded successfully.


In [3]:
# Define model name and cache directory.
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
scratch_dir = "../models"
device = torch.device("cuda:0")

# Create the quantization configuration.
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load the tokenizer.
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=scratch_dir)

# --- Load the base model and then wrap it with your LoRA adapter ---
# First, load the base model in 4-bit.
# base_model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     cache_dir=scratch_dir,
#     device_map={"": 0},
#     quantization_config=quantization_config
# )

# Load your fine-tuned LoRA adapter.
# (This returns a wrapped model using PEFT.)
# sft_model = PeftModel.from_pretrained(base_model, "./lora_mistral_adapter_mindmaps_v1")
# print("SFT model with LoRA adapter loaded.")

In [None]:
# --- For PPO, load a value-headed model and a frozen reference model. ---
# We assume you want your SFT weights (with adapter) to be available in the value-headed model.
# In many setups you would merge LoRA adapter weights into a value-headed model.
# Here, we load the value-headed model from scratch and assume the adapter is merged.
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    cache_dir=scratch_dir,
    device_map={"": 0},
    quantization_config=quantization_config
)

# Load LoRA adapter into base model
peft_model = PeftModel.from_pretrained(base_model, "./lora_mistral_adapter_mindmaps_v1")

# Wrap it with a value head for PPO
ppo_model = AutoModelForCausalLMWithValueHead.from_pretrained(peft_model)

# Prepare reference model (no LoRA, just same base weights)
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(
    model_name,
    cache_dir=scratch_dir,
    device_map={"": 0},
    quantization_config=quantization_config
)
ref_model.eval()

print("Value-headed model and reference model set for PPO training.")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



Value-headed model and reference model set for PPO training.


In [5]:
# Load training entries.
train_data_path = "../data/tqa_train_val_test/train/tqa_v1_train.json"
with open(train_data_path, "r", encoding="utf-8") as f:
    train_entries = json.load(f)
print(f"Loaded {len(train_entries)} training entries.")

# Function to load ground truth mind maps from a given folder.
def load_gt_mindmaps(folder):
    gt = {}
    for file_path in glob.glob(os.path.join(folder, "*.txt")):
        example_id = os.path.splitext(os.path.basename(file_path))[0]
        with open(file_path, "r", encoding="utf-8") as f:
            gt[example_id] = f.read().strip()
    return gt

gt_mindmaps_v1 = load_gt_mindmaps("../data/tqa_train_val_test/train/mindmap_text_v1")
gt_mindmaps_v2 = load_gt_mindmaps("../data/tqa_train_val_test/train/mindmap_text_v2")
print(f"Loaded {len(gt_mindmaps_v1)} GT mind maps from v1 and {len(gt_mindmaps_v2)} from v2.")



Loaded 666 training entries.
Loaded 666 GT mind maps from v1 and 666 from v2.


In [None]:

pairwise_eval_path = "../data/tqa_train_val_test/train/pairwise_evaluations.json"

with open(pairwise_eval_path, "r", encoding="utf-8") as f:
    evaluations = json.load(f)

preferred_mindmaps = {}
for record in evaluations:
    # Use the filename as the example ID.
    filename = record["filename"]
    example_id = os.path.splitext(filename)[0]
    
    # Determine the preferred output based on the evaluation.
    preferred_output = record["evaluation"]["preferred_output"]
    if preferred_output == "Output A":
        preferred_path = record["mind_map_v1_path"]
    elif preferred_output == "Output B":
        preferred_path = record["mind_map_v2_path"]
    else:
        preferred_path = ""  # Fallback if preferred_output is not recognized.
    
    # Read the preferred mind map content.
    if preferred_path and os.path.exists(preferred_path):
        with open(preferred_path, "r", encoding="utf-8") as fp:
            mind_map_text = fp.read().strip()
    else:
        mind_map_text = ""
    
    preferred_mindmaps[example_id] = mind_map_text

print(example_id,"\n",preferred_mindmaps[example_id])

mermaid_code_L_0547 
 mindmap
  Human Causes of Extinction
    Causes of Extinction
      Primary Cause
        Human-induced extinctions
      Greatest Contributor
        Habitat destruction
    Global Climate Change
      Definition
        Long-term change in climate
      Effects on Organisms
        Hazardous to survival
    Global Warming
      Definition
        Increase in Earth's average temperature
      Impact on Species
        Destruction of habitats
        Stress from brief climate changes
    Pollution
      Types
        Chemicals
        Noise
        Heat
        Light
      Effects on Organisms
        Threats to survival
        Example: DDT and peregrine falcons
        Water pollution
        Soil contamination
    Human Population Growth
      Current Statistics
        Over 7 billion in 2011
        Projected 8-9 billion by mid-century
      Resource Demand
        Need for housing, food, water
      Contribution to Extinction
        Increased global warming


In [7]:
def extract_textbook_text_with_image_explainations(entry,):
    """
    Extract textbook texts and any image-based explanations from a test entry.
    This function consolidates lesson names, section texts, and image explanations.
    """
    texts = []
    lesson_name = entry.get("lessonName", "")
    if lesson_name:
        texts.append(f"Lesson: {lesson_name.strip()}")
    
    base_dir = "../data/tqa_train_val_test/train/"
    
    def extract_content_and_explanations(section):
        section_texts = []
        content = section.get("content", {})
        text = content.get("text", "")
        if text:
            section_texts.append(text.strip())
        figures = content.get("figures", [])
        for fig in figures:
            caption = fig.get("caption", "").strip()
            image_path = fig.get("imagePath", "")
            if image_path:
                if "textbook_images" in image_path:
                    explanation_folder = "textbook_images_llava_captions"
                elif "teaching_images" in image_path:
                    explanation_folder = "teaching_images_llava_captions"
                else:
                    explanation_folder = None
                if explanation_folder:
                    file_name = os.path.basename(image_path)
                    base_name = os.path.splitext(file_name)[0]
                    explanation_file = os.path.join(base_dir, explanation_folder, base_name + ".txt")
                    if os.path.exists(explanation_file):
                        with open(explanation_file, 'r', encoding="utf-8") as f:
                            explanation = f.read().strip()
                        if explanation:
                            combined_text = ""
                            if caption:
                                combined_text += f"Image Caption: {caption}. "
                            combined_text += f"Image Explanation: {explanation}"
                            section_texts.append(combined_text)
        return section_texts

    # Extract texts from adjunct topics and main topics.
    adjunct_topics = entry.get("adjunctTopics", {})
    for topic in adjunct_topics.values():
        texts.extend(extract_content_and_explanations(topic))
    
    topics = entry.get("topics", {})
    for topic in topics.values():
        texts.extend(extract_content_and_explanations(topic))
    
    return texts

In [8]:
def build_training_prompt(entry):
    """Build the complete prompt with a fixed instruction and the extracted textbook text."""
    extracted_texts = extract_textbook_text_with_image_explainations(entry)
    combined_text = "\n\n".join(extracted_texts)
    prompt = (
         "Instruction: Generate a mind map in Mermaid syntax for the following textbook text. "
         "The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. "
         "Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics "
         "as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. "
         "Also, do NOT include any theme directives such as %%{init: {\"theme\": \"default\"}}%%, any parentheses or any extraneous formatting. "
         "The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure "
         "of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.\n\n"
         f"Input: {combined_text}\nOutput:"
    )
    return prompt

In [None]:
training_examples = []

print("Sample training entry:", train_entries[0])

for entry in train_entries:
    # Use the "globalID" field from the training entry.
    example_id = entry.get("globalID", "").strip()
    if not example_id:
        continue

    # If the training entry's ID doesn't already include the prefix, add it.
    if not example_id.startswith("mermaid_code_"):
        lookup_id = f"mermaid_code_{example_id}"
    else:
        lookup_id = example_id

    # Retrieve the preferred mind map using the lookup_id.
    preferred = preferred_mindmaps.get(lookup_id, "")
    
    # Build the prompt using your helper function.
    prompt = build_training_prompt(entry)
    
    # Only include the entry if a preferred mind map is found.
    if preferred:
        training_examples.append({
            "example_id": example_id,
            "prompt": prompt,
            "preferred": preferred
        })

print(f"Prepared {len(training_examples)} training examples.")


Sample training entry: {'adjunctTopics': {'Apply Concepts': {'content': {'figures': [], 'mediaLinks': [], 'text': '5. A glacier is melting. What are all of the scientists you can think of who might be involved in studying this glacier? What would each of them do? '}, 'orderID': 't_12'}, 'Introduction': {'content': {'figures': [{'caption': 'FIGURE 1.10 Earth as seen from Apollo 17.', 'imagePath': 'textbook_images/earth_science_and_its_branches_20010.png'}], 'mediaLinks': [], 'text': 'Earth Science is the study of all aspects of our planet Earth. Earth Science is not just about the molten lava, icy mountain peaks, steep canyons and towering waterfalls of the continents. Earth Science includes the atmosphere and oceans. The field also looks out into the solar system, galaxy, and universe. Earth scientists seek to understand the beautiful planet on which we depend (Figure 1.10). Different branches of Earth Science study one particular part of Earth. Since all of the branches are connected,

In [10]:
train_dataset = Dataset.from_list([
    {
        "query": example["prompt"],
        "example_id": example["example_id"]
    }
    for example in training_examples
])
print("Hugging Face training dataset created.")
print(train_dataset)

Hugging Face training dataset created.
Dataset({
    features: ['query', 'example_id'],
    num_rows: 664
})


In [11]:
def load_api_key(path: str = "../keys.json") -> None:
    """Load OpenAI API key from a JSON file into environment."""
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    os.environ["OPENAI_API_KEY"] = data.get("openai_api_key")

In [12]:
load_api_key()

In [None]:
import requests
import re

def compute_reward_gpt4o(ground_truth: str, prediction: str) -> float:
    """
    Compute a reward for the generated mind map by comparing it to the ground truth mind map
    using GPT-4O-mini.
    
    The function calls the GPT-4O-mini API with an evaluation prompt. It expects a JSON
    response containing "cognitive_score" and "structural_score" (values 1 to 7). We then 
    compute the average score and normalize it so that a score of 1 corresponds to 0.0 reward and 
    a score of 7 corresponds to 1.0 reward.
    
    Returns:
         normalized_reward (float): Overall reward normalized between 0 and 1.
    """
    prompt = f"""
You are an expert evaluator of mind maps. Evaluate the quality of the generated mind map relative to the ground truth.
Consider the following dimensions:

1. **Cognitive Indicators**: How easily can a user understand and mentally process the information? 
   Evaluate clarity, flow of ideas, and thematic relevance.

2. **Structural Indicators**: How well-organized and logically complete is the mind map? 
   Evaluate grouping of ideas, coverage of key concepts, level of detail, and consistency of hierarchy.

Scoring Instructions:
- Use a Likert scale from 1 (much worse) to 7 (much better).
- A score of 4 indicates the generated mind map is equal in quality to the ground truth.

Return your evaluation as a JSON object with the keys:
  "cognitive_score": int,
  "structural_score": int

Ground Truth Mind Map:
{ground_truth}

Generated Mind Map:
{prediction}
    """
    
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise RuntimeError("OPENAI_API_KEY not set — run load_api_key() first.")

    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    
    payload = {
        "model": "gpt-4o-mini",
        "messages": [
            {"role": "system", "content": "You are an expert evaluator of mind maps."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    response.raise_for_status()
    
    # Retrieve and clean the returned text.
    text = response.json()["choices"][0]["message"]["content"].strip()
    
    # Try parsing the text as JSON.
    try:
        evaluation = json.loads(text)
    except json.JSONDecodeError:
        match = re.search(r"\{.*\}", text, re.S)
        if match:
            evaluation = json.loads(match.group())
        else:
            raise ValueError("Could not parse JSON from API response.")

    # Extract the scores
    cognitive = evaluation.get("cognitive_score", 4)
    structural = evaluation.get("structural_score", 4)
    
    # Compute the average score.
    avg_score = (cognitive + structural) / 2.0
    # Normalize: (score - 1) / (7 - 1) so that 1 becomes 0.0 and 7 becomes 1.0.
    normalized_reward = (avg_score - 1) / 6.0
    
    # Print the evaluation and computed reward for debugging.
    print("Evaluation:", evaluation)
    print("Average score:", avg_score, "Normalized reward:", normalized_reward)
    
    return normalized_reward

In [14]:
from transformers import AutoConfig, GenerationConfig
# Load generation config
gen_config = GenerationConfig.from_pretrained(model_name)

# Attach to model manually
ppo_model.generation_config = gen_config
ppo_model.base_model_prefix = "pretrained_model"

device = torch.device("cuda:0" if torch.cuda.is_available() else "CPU")
ppo_model = ppo_model.to(device)
ppo_model.device = device 

In [15]:
from torch import nn
class DummyRewardModel(nn.Module):
    def forward(self, *args, **kwargs):
        return torch.tensor([0.0])

dummy_reward_model = DummyRewardModel().to(device)

In [None]:
ppo_config = PPOConfig(
    model_name=None,          
    learning_rate=1e-5,
    batch_size=1,  # must be divisible by mini_batch_size * gradient_accumulation_steps
    mini_batch_size=1,
    log_with="tensorboard",
    project_kwargs={"logging_dir": "./ppo_logs"}
)
ppo_trainer = PPOTrainer(
    config=ppo_config,
    tokenizer=tokenizer,
    model=ppo_model,
    ref_model=ref_model,
)
print("PPO trainer initialized with training dataset.")


PPO trainer initialized with training dataset.




In [None]:
import gc
from tqdm import tqdm
from torch.nn.utils.rnn import pad_sequence

# Ensure tokenizer has a pad_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

num_epochs = 1  # Adjust as needed
print("Starting PPO training loop...\n")

for epoch in range(num_epochs):
    # print(f"Epoch {epoch + 1}/{num_epochs}")
    
    for example in tqdm(training_examples, desc=f"Epoch {epoch + 1}"):
        prompt = example["prompt"]
        preferred = example["preferred"]

        # Tokenize prompt and move to device
        query_inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=5000)
        query_inputs = {k: v.to(device) for k, v in query_inputs.items()}
        query_tensors = query_inputs["input_ids"]

        # Generate response using model
        with torch.inference_mode():
            output_ids = ppo_model.generate(
                **query_inputs,
                max_new_tokens=300,
                do_sample=True,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id
            )

        # Extract generated response only (excluding prompt)
        response_tensors = output_ids[:, query_tensors.shape[-1]:]

        # Decode generated response only
        generated_mindmap = tokenizer.decode(response_tensors[0], skip_special_tokens=True)

        # Compute reward from preferred vs. generated output
        reward = compute_reward_gpt4o(preferred, generated_mindmap)
        reward_tensor = torch.tensor([reward], device=device)

        # PPO step
        stats = ppo_trainer.step(
            queries=[query_tensors[0]],
            responses=[response_tensors[0]],
            scores=[reward_tensor]
        )

        # # Logging
        # print(f"Example ID: {example['example_id']}")
        # print(f"Reward: {reward}")
        # print("PPO update stats:", stats, "\n")

        # Memory cleanup
        del query_inputs, query_tensors, output_ids, response_tensors, reward_tensor
        torch.cuda.empty_cache()
        gc.collect()

    # print("------------------------------------------------------\n")

print("PPO training complete.")

Starting PPO training loop...



Epoch 1:   0%|          | 0/664 [00:00<?, ?it/s]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
Epoch 1:   0%|          | 1/664 [00:24<4:36:09, 24.99s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   0%|          | 2/664 [00:56<5:16:58, 28.73s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   0%|          | 3/664 [01:18<4:44:50, 25.86s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   1%|          | 4/664 [01:44<4:42:28, 25.68s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   1%|          | 5/664 [02:12<4:51:16, 26.52s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   1%|          | 6/664 [02:45<5:17:08, 28.92s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   1%|          | 7/664 [03:05<4:43:24, 25.88s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   1%|          | 8/664 [03:26<4:26:09, 24.34s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   1%|▏         | 9/664 [03:51<4:28:57, 24.64s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   2%|▏         | 10/664 [04:15<4:26:52, 24.48s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   2%|▏         | 11/664 [04:42<4:35:01, 25.27s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   2%|▏         | 12/664 [05:15<4:57:45, 27.40s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   2%|▏         | 13/664 [05:43<5:00:20, 27.68s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   2%|▏         | 14/664 [06:19<5:26:27, 30.13s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   2%|▏         | 15/664 [06:39<4:54:35, 27.23s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   2%|▏         | 16/664 [07:03<4:44:00, 26.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   3%|▎         | 17/664 [07:31<4:48:15, 26.73s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   3%|▎         | 18/664 [08:00<4:55:08, 27.41s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   3%|▎         | 19/664 [08:30<5:02:42, 28.16s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   3%|▎         | 20/664 [09:01<5:11:06, 28.99s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   3%|▎         | 21/664 [09:29<5:06:44, 28.62s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   3%|▎         | 22/664 [10:01<5:19:07, 29.82s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:   3%|▎         | 23/664 [10:29<5:12:21, 29.24s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   4%|▎         | 24/664 [11:02<5:23:15, 30.31s/it]

Evaluation: {'cognitive_score': 2, 'structural_score': 2}
Average score: 2.0 Normalized reward: 0.16666666666666666


Epoch 1:   4%|▍         | 25/664 [11:41<5:49:43, 32.84s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   4%|▍         | 26/664 [12:12<5:44:55, 32.44s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   4%|▍         | 27/664 [12:42<5:35:15, 31.58s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   4%|▍         | 28/664 [13:16<5:41:11, 32.19s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:   4%|▍         | 29/664 [13:44<5:28:18, 31.02s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   5%|▍         | 30/664 [14:11<5:16:58, 30.00s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   5%|▍         | 31/664 [14:40<5:11:33, 29.53s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   5%|▍         | 32/664 [15:08<5:07:26, 29.19s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   5%|▍         | 33/664 [15:36<5:02:17, 28.74s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   5%|▌         | 34/664 [16:09<5:15:36, 30.06s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   5%|▌         | 35/664 [16:32<4:53:26, 27.99s/it]

Evaluation: {'cognitive_score': 2, 'structural_score': 2}
Average score: 2.0 Normalized reward: 0.16666666666666666


Epoch 1:   5%|▌         | 36/664 [17:11<5:27:34, 31.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   6%|▌         | 37/664 [17:47<5:41:02, 32.64s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   6%|▌         | 38/664 [18:16<5:28:39, 31.50s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   6%|▌         | 39/664 [18:51<5:40:12, 32.66s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   6%|▌         | 40/664 [19:11<4:58:24, 28.69s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   6%|▌         | 41/664 [19:37<4:51:09, 28.04s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:   6%|▋         | 42/664 [20:02<4:41:10, 27.12s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   6%|▋         | 43/664 [20:38<5:06:55, 29.65s/it]

Evaluation: {'cognitive_score': 2, 'structural_score': 2}
Average score: 2.0 Normalized reward: 0.16666666666666666


Epoch 1:   7%|▋         | 44/664 [21:16<5:33:52, 32.31s/it]

Evaluation: {'cognitive_score': 2, 'structural_score': 2}
Average score: 2.0 Normalized reward: 0.16666666666666666


Epoch 1:   7%|▋         | 45/664 [21:55<5:52:18, 34.15s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   7%|▋         | 46/664 [22:31<5:59:23, 34.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   7%|▋         | 47/664 [23:05<5:55:24, 34.56s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   7%|▋         | 48/664 [23:27<5:16:50, 30.86s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   7%|▋         | 49/664 [23:50<4:50:10, 28.31s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   8%|▊         | 50/664 [24:12<4:30:08, 26.40s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   8%|▊         | 51/664 [24:26<3:53:14, 22.83s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   8%|▊         | 52/664 [24:46<3:42:54, 21.85s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   8%|▊         | 53/664 [25:12<3:56:48, 23.25s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   8%|▊         | 54/664 [25:43<4:20:23, 25.61s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:   8%|▊         | 55/664 [26:15<4:38:38, 27.45s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   8%|▊         | 56/664 [26:39<4:28:46, 26.52s/it]

Evaluation: {'cognitive_score': 2, 'structural_score': 2}
Average score: 2.0 Normalized reward: 0.16666666666666666


Epoch 1:   9%|▊         | 57/664 [27:18<5:05:04, 30.16s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   9%|▊         | 58/664 [27:40<4:41:00, 27.82s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   9%|▉         | 59/664 [28:03<4:24:28, 26.23s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 3}
Average score: 3.0 Normalized reward: 0.3333333333333333


Epoch 1:   9%|▉         | 60/664 [28:19<3:53:15, 23.17s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   9%|▉         | 61/664 [28:41<3:50:15, 22.91s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:   9%|▉         | 62/664 [29:02<3:43:07, 22.24s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:   9%|▉         | 63/664 [29:29<3:57:52, 23.75s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  10%|▉         | 64/664 [29:53<3:56:19, 23.63s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  10%|▉         | 65/664 [30:14<3:49:18, 22.97s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  10%|▉         | 66/664 [30:38<3:50:37, 23.14s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  10%|█         | 67/664 [31:03<3:56:43, 23.79s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  10%|█         | 68/664 [31:26<3:54:31, 23.61s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  10%|█         | 69/664 [31:50<3:53:42, 23.57s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  11%|█         | 70/664 [32:13<3:52:54, 23.53s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  11%|█         | 71/664 [32:33<3:42:38, 22.53s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  11%|█         | 72/664 [32:53<3:33:10, 21.61s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  11%|█         | 73/664 [33:15<3:34:44, 21.80s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  11%|█         | 74/664 [33:37<3:33:56, 21.76s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  11%|█▏        | 75/664 [34:00<3:38:46, 22.29s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  11%|█▏        | 76/664 [34:24<3:43:15, 22.78s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  12%|█▏        | 77/664 [34:46<3:40:59, 22.59s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  12%|█▏        | 78/664 [35:07<3:36:00, 22.12s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  12%|█▏        | 79/664 [35:28<3:32:59, 21.85s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  12%|█▏        | 80/664 [35:43<3:12:36, 19.79s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  12%|█▏        | 81/664 [36:05<3:17:24, 20.32s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  12%|█▏        | 82/664 [36:29<3:26:45, 21.32s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  12%|█▎        | 83/664 [36:50<3:26:31, 21.33s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  13%|█▎        | 84/664 [37:11<3:26:39, 21.38s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  13%|█▎        | 85/664 [37:32<3:24:30, 21.19s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  13%|█▎        | 86/664 [37:53<3:23:20, 21.11s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  13%|█▎        | 87/664 [38:12<3:16:09, 20.40s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  13%|█▎        | 88/664 [38:38<3:31:16, 22.01s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  13%|█▎        | 89/664 [39:02<3:36:56, 22.64s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  14%|█▎        | 90/664 [39:24<3:34:37, 22.43s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  14%|█▎        | 91/664 [39:38<3:12:28, 20.15s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  14%|█▍        | 92/664 [39:55<3:03:11, 19.22s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  14%|█▍        | 93/664 [40:04<2:33:23, 16.12s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  14%|█▍        | 94/664 [40:21<2:33:59, 16.21s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  14%|█▍        | 95/664 [40:45<2:55:27, 18.50s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  14%|█▍        | 96/664 [41:04<2:58:26, 18.85s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  15%|█▍        | 97/664 [41:26<3:05:49, 19.66s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  15%|█▍        | 98/664 [41:49<3:14:37, 20.63s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  15%|█▍        | 99/664 [42:06<3:05:21, 19.68s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  15%|█▌        | 100/664 [42:27<3:07:43, 19.97s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  15%|█▌        | 101/664 [42:44<2:58:33, 19.03s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  15%|█▌        | 102/664 [43:04<3:00:24, 19.26s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  16%|█▌        | 103/664 [43:25<3:05:03, 19.79s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  16%|█▌        | 104/664 [43:48<3:15:04, 20.90s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  16%|█▌        | 105/664 [44:12<3:24:09, 21.91s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  16%|█▌        | 106/664 [44:37<3:30:37, 22.65s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  16%|█▌        | 107/664 [45:00<3:33:26, 22.99s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  16%|█▋        | 108/664 [45:23<3:32:46, 22.96s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  16%|█▋        | 109/664 [45:45<3:28:57, 22.59s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  17%|█▋        | 110/664 [46:03<3:16:08, 21.24s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  17%|█▋        | 111/664 [46:25<3:17:38, 21.44s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  17%|█▋        | 112/664 [46:47<3:19:41, 21.71s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  17%|█▋        | 113/664 [47:12<3:26:02, 22.44s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  17%|█▋        | 114/664 [47:36<3:31:02, 23.02s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  17%|█▋        | 115/664 [47:59<3:31:11, 23.08s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  17%|█▋        | 116/664 [48:21<3:27:30, 22.72s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  18%|█▊        | 117/664 [48:37<3:08:34, 20.69s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  18%|█▊        | 118/664 [48:59<3:10:50, 20.97s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  18%|█▊        | 119/664 [49:22<3:16:17, 21.61s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  18%|█▊        | 120/664 [49:38<3:00:44, 19.93s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  18%|█▊        | 121/664 [50:00<3:07:19, 20.70s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  18%|█▊        | 122/664 [50:24<3:16:23, 21.74s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  19%|█▊        | 123/664 [50:47<3:19:30, 22.13s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  19%|█▊        | 124/664 [51:01<2:55:24, 19.49s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  19%|█▉        | 125/664 [51:23<3:03:05, 20.38s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  19%|█▉        | 126/664 [51:41<2:56:37, 19.70s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  19%|█▉        | 127/664 [52:02<2:59:40, 20.07s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  19%|█▉        | 128/664 [52:23<3:02:18, 20.41s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  19%|█▉        | 129/664 [52:37<2:42:45, 18.25s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  20%|█▉        | 130/664 [52:58<2:50:33, 19.16s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  20%|█▉        | 131/664 [53:26<3:14:22, 21.88s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  20%|█▉        | 132/664 [53:46<3:09:29, 21.37s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  20%|██        | 133/664 [54:05<3:01:10, 20.47s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  20%|██        | 134/664 [54:27<3:05:46, 21.03s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  20%|██        | 135/664 [54:53<3:17:33, 22.41s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  20%|██        | 136/664 [55:11<3:07:15, 21.28s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  21%|██        | 137/664 [55:33<3:06:38, 21.25s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  21%|██        | 138/664 [55:43<2:39:09, 18.15s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  21%|██        | 139/664 [56:05<2:47:28, 19.14s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  21%|██        | 140/664 [56:27<2:55:58, 20.15s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  21%|██        | 141/664 [56:44<2:46:42, 19.13s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  21%|██▏       | 142/664 [57:05<2:50:23, 19.58s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  22%|██▏       | 143/664 [57:28<3:00:33, 20.79s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  22%|██▏       | 144/664 [57:53<3:11:03, 22.05s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  22%|██▏       | 145/664 [58:20<3:22:35, 23.42s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  22%|██▏       | 146/664 [58:39<3:11:16, 22.15s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  22%|██▏       | 147/664 [58:53<2:49:58, 19.73s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  22%|██▏       | 148/664 [59:17<3:00:27, 20.98s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  22%|██▏       | 149/664 [59:39<3:01:25, 21.14s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  23%|██▎       | 150/664 [1:00:01<3:03:49, 21.46s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  23%|██▎       | 151/664 [1:00:25<3:09:37, 22.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  23%|██▎       | 152/664 [1:00:47<3:10:10, 22.29s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  23%|██▎       | 153/664 [1:01:12<3:15:31, 22.96s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  23%|██▎       | 154/664 [1:01:27<2:55:57, 20.70s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  23%|██▎       | 155/664 [1:01:50<2:59:37, 21.17s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  23%|██▎       | 156/664 [1:02:13<3:05:21, 21.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  24%|██▎       | 157/664 [1:02:38<3:12:47, 22.82s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  24%|██▍       | 158/664 [1:02:59<3:08:32, 22.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  24%|██▍       | 159/664 [1:03:20<3:04:36, 21.93s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  24%|██▍       | 160/664 [1:03:41<3:01:40, 21.63s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  24%|██▍       | 161/664 [1:04:02<2:59:23, 21.40s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  24%|██▍       | 162/664 [1:04:25<3:03:06, 21.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  25%|██▍       | 163/664 [1:04:51<3:12:19, 23.03s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  25%|██▍       | 164/664 [1:05:08<2:58:13, 21.39s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  25%|██▍       | 165/664 [1:05:29<2:56:10, 21.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  25%|██▌       | 166/664 [1:05:53<3:03:05, 22.06s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  25%|██▌       | 167/664 [1:06:15<3:01:16, 21.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  25%|██▌       | 168/664 [1:06:39<3:06:11, 22.52s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  25%|██▌       | 169/664 [1:07:00<3:02:14, 22.09s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  26%|██▌       | 170/664 [1:07:22<3:01:11, 22.01s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  26%|██▌       | 171/664 [1:07:42<2:57:50, 21.64s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  26%|██▌       | 172/664 [1:08:06<3:01:51, 22.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  26%|██▌       | 173/664 [1:08:28<3:01:46, 22.21s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  26%|██▌       | 174/664 [1:08:57<3:16:50, 24.10s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  26%|██▋       | 175/664 [1:09:20<3:15:22, 23.97s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  27%|██▋       | 176/664 [1:09:46<3:19:04, 24.48s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  27%|██▋       | 177/664 [1:10:09<3:14:37, 23.98s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  27%|██▋       | 178/664 [1:10:32<3:12:52, 23.81s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  27%|██▋       | 179/664 [1:10:57<3:14:30, 24.06s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  27%|██▋       | 180/664 [1:11:08<2:44:05, 20.34s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  27%|██▋       | 181/664 [1:11:24<2:32:54, 19.00s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  27%|██▋       | 182/664 [1:11:48<2:43:34, 20.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  28%|██▊       | 183/664 [1:12:11<2:49:33, 21.15s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  28%|██▊       | 184/664 [1:12:33<2:52:45, 21.59s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  28%|██▊       | 185/664 [1:12:52<2:45:33, 20.74s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  28%|██▊       | 186/664 [1:13:14<2:48:25, 21.14s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  28%|██▊       | 187/664 [1:13:36<2:50:20, 21.43s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  28%|██▊       | 188/664 [1:13:58<2:51:26, 21.61s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  28%|██▊       | 189/664 [1:14:23<2:57:18, 22.40s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  29%|██▊       | 190/664 [1:14:46<2:58:53, 22.64s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  29%|██▉       | 191/664 [1:15:08<2:56:31, 22.39s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  29%|██▉       | 192/664 [1:15:31<2:57:41, 22.59s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  29%|██▉       | 193/664 [1:15:56<3:02:32, 23.25s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  29%|██▉       | 194/664 [1:16:20<3:04:48, 23.59s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  29%|██▉       | 195/664 [1:16:43<3:02:11, 23.31s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  30%|██▉       | 196/664 [1:17:00<2:47:48, 21.51s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  30%|██▉       | 197/664 [1:17:22<2:48:51, 21.70s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  30%|██▉       | 198/664 [1:17:45<2:52:00, 22.15s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  30%|██▉       | 199/664 [1:18:04<2:44:16, 21.20s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  30%|███       | 200/664 [1:18:25<2:43:49, 21.19s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  30%|███       | 201/664 [1:18:49<2:49:30, 21.97s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  30%|███       | 202/664 [1:19:12<2:52:03, 22.35s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  31%|███       | 203/664 [1:19:27<2:34:05, 20.06s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  31%|███       | 204/664 [1:19:49<2:38:39, 20.70s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  31%|███       | 205/664 [1:20:14<2:47:04, 21.84s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  31%|███       | 206/664 [1:20:36<2:47:31, 21.95s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  31%|███       | 207/664 [1:20:56<2:42:10, 21.29s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  31%|███▏      | 208/664 [1:21:11<2:28:26, 19.53s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  31%|███▏      | 209/664 [1:21:33<2:33:06, 20.19s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  32%|███▏      | 210/664 [1:21:56<2:39:43, 21.11s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  32%|███▏      | 211/664 [1:22:13<2:28:59, 19.73s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  32%|███▏      | 212/664 [1:22:34<2:33:22, 20.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  32%|███▏      | 213/664 [1:22:56<2:35:51, 20.74s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  32%|███▏      | 214/664 [1:23:13<2:26:10, 19.49s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  32%|███▏      | 215/664 [1:23:35<2:33:16, 20.48s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  33%|███▎      | 216/664 [1:23:52<2:23:31, 19.22s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  33%|███▎      | 217/664 [1:24:07<2:13:36, 17.93s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  33%|███▎      | 218/664 [1:24:32<2:30:04, 20.19s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  33%|███▎      | 219/664 [1:24:44<2:11:57, 17.79s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  33%|███▎      | 220/664 [1:25:08<2:25:16, 19.63s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  33%|███▎      | 221/664 [1:25:27<2:23:40, 19.46s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  33%|███▎      | 222/664 [1:25:43<2:14:26, 18.25s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  34%|███▎      | 223/664 [1:26:07<2:26:20, 19.91s/it]

Evaluation: {'cognitive_score': 2, 'structural_score': 2}
Average score: 2.0 Normalized reward: 0.16666666666666666


Epoch 1:  34%|███▎      | 224/664 [1:26:45<3:07:36, 25.58s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  34%|███▍      | 225/664 [1:27:07<2:58:53, 24.45s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  34%|███▍      | 226/664 [1:27:32<3:00:15, 24.69s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  34%|███▍      | 227/664 [1:27:53<2:50:26, 23.40s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  34%|███▍      | 228/664 [1:28:11<2:39:09, 21.90s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  34%|███▍      | 229/664 [1:28:33<2:37:40, 21.75s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  35%|███▍      | 230/664 [1:28:57<2:42:34, 22.47s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  35%|███▍      | 231/664 [1:29:19<2:41:20, 22.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  35%|███▍      | 232/664 [1:29:40<2:37:17, 21.85s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  35%|███▌      | 233/664 [1:30:00<2:33:23, 21.35s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  35%|███▌      | 234/664 [1:30:27<2:46:44, 23.27s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  35%|███▌      | 235/664 [1:30:49<2:43:23, 22.85s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  36%|███▌      | 236/664 [1:31:13<2:45:32, 23.21s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  36%|███▌      | 237/664 [1:31:37<2:45:23, 23.24s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  36%|███▌      | 238/664 [1:31:57<2:38:54, 22.38s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  36%|███▌      | 239/664 [1:32:22<2:43:34, 23.09s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  36%|███▌      | 240/664 [1:32:39<2:30:02, 21.23s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  36%|███▋      | 241/664 [1:32:58<2:24:38, 20.52s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  36%|███▋      | 242/664 [1:33:23<2:34:12, 21.92s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  37%|███▋      | 243/664 [1:33:49<2:43:26, 23.29s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  37%|███▋      | 244/664 [1:34:22<3:03:12, 26.17s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  37%|███▋      | 245/664 [1:34:50<3:06:35, 26.72s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  37%|███▋      | 246/664 [1:35:07<2:46:27, 23.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  37%|███▋      | 247/664 [1:35:34<2:51:01, 24.61s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  37%|███▋      | 248/664 [1:35:57<2:47:42, 24.19s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  38%|███▊      | 249/664 [1:36:26<2:56:44, 25.55s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  38%|███▊      | 250/664 [1:36:54<3:02:56, 26.51s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  38%|███▊      | 251/664 [1:37:28<3:17:57, 28.76s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  38%|███▊      | 252/664 [1:37:52<3:06:01, 27.09s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  38%|███▊      | 253/664 [1:38:20<3:08:16, 27.48s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  38%|███▊      | 254/664 [1:38:47<3:07:14, 27.40s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  38%|███▊      | 255/664 [1:39:11<2:59:19, 26.31s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  39%|███▊      | 256/664 [1:39:31<2:46:54, 24.55s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  39%|███▊      | 257/664 [1:40:09<3:13:53, 28.58s/it]

Evaluation: {'cognitive_score': 1, 'structural_score': 1}
Average score: 1.0 Normalized reward: 0.0


Epoch 1:  39%|███▉      | 258/664 [1:40:49<3:35:58, 31.92s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  39%|███▉      | 259/664 [1:41:20<3:33:26, 31.62s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  39%|███▉      | 260/664 [1:41:48<3:26:16, 30.64s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  39%|███▉      | 261/664 [1:42:14<3:15:20, 29.08s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  39%|███▉      | 262/664 [1:42:44<3:17:55, 29.54s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  40%|███▉      | 263/664 [1:43:01<2:52:03, 25.74s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  40%|███▉      | 264/664 [1:43:28<2:53:02, 25.96s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  40%|███▉      | 265/664 [1:44:06<3:17:21, 29.68s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  40%|████      | 266/664 [1:44:32<3:10:08, 28.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  40%|████      | 267/664 [1:44:55<2:57:41, 26.86s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  40%|████      | 268/664 [1:45:22<2:56:56, 26.81s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  41%|████      | 269/664 [1:45:50<2:59:31, 27.27s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  41%|████      | 270/664 [1:46:14<2:51:44, 26.15s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  41%|████      | 271/664 [1:46:40<2:51:00, 26.11s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  41%|████      | 272/664 [1:47:05<2:49:17, 25.91s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  41%|████      | 273/664 [1:47:35<2:57:28, 27.23s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  41%|████▏     | 274/664 [1:48:02<2:56:20, 27.13s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  41%|████▏     | 275/664 [1:48:32<3:00:18, 27.81s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  42%|████▏     | 276/664 [1:49:05<3:09:41, 29.33s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  42%|████▏     | 277/664 [1:49:32<3:06:07, 28.86s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  42%|████▏     | 278/664 [1:50:01<3:06:06, 28.93s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  42%|████▏     | 279/664 [1:50:22<2:49:12, 26.37s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 2}
Average score: 2.5 Normalized reward: 0.25


Epoch 1:  42%|████▏     | 280/664 [1:51:02<3:15:20, 30.52s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  42%|████▏     | 281/664 [1:51:29<3:08:41, 29.56s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  42%|████▏     | 282/664 [1:51:51<2:53:57, 27.32s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  43%|████▎     | 283/664 [1:52:16<2:47:22, 26.36s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  43%|████▎     | 284/664 [1:52:38<2:39:10, 25.13s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  43%|████▎     | 285/664 [1:53:00<2:32:55, 24.21s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  43%|████▎     | 286/664 [1:53:15<2:15:55, 21.57s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  43%|████▎     | 287/664 [1:53:38<2:17:09, 21.83s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  43%|████▎     | 288/664 [1:53:58<2:13:33, 21.31s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  44%|████▎     | 289/664 [1:54:18<2:11:40, 21.07s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  44%|████▎     | 290/664 [1:54:42<2:16:36, 21.92s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  44%|████▍     | 291/664 [1:55:04<2:16:21, 21.93s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  44%|████▍     | 292/664 [1:55:28<2:18:28, 22.34s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  44%|████▍     | 293/664 [1:55:41<2:00:48, 19.54s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  44%|████▍     | 294/664 [1:56:03<2:05:52, 20.41s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  44%|████▍     | 295/664 [1:56:27<2:11:42, 21.42s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  45%|████▍     | 296/664 [1:56:46<2:06:51, 20.68s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  45%|████▍     | 297/664 [1:57:02<1:58:03, 19.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  45%|████▍     | 298/664 [1:57:26<2:05:59, 20.65s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  45%|████▌     | 299/664 [1:57:48<2:08:18, 21.09s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  45%|████▌     | 300/664 [1:58:11<2:12:19, 21.81s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  45%|████▌     | 301/664 [1:58:28<2:03:23, 20.40s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  45%|████▌     | 302/664 [1:58:51<2:07:17, 21.10s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  46%|████▌     | 303/664 [1:59:14<2:10:50, 21.75s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  46%|████▌     | 304/664 [1:59:41<2:19:05, 23.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  46%|████▌     | 305/664 [2:00:05<2:20:58, 23.56s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  46%|████▌     | 306/664 [2:00:29<2:21:10, 23.66s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  46%|████▌     | 307/664 [2:00:49<2:14:23, 22.59s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  46%|████▋     | 308/664 [2:01:07<2:05:22, 21.13s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  47%|████▋     | 309/664 [2:01:30<2:08:27, 21.71s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  47%|████▋     | 310/664 [2:01:52<2:08:31, 21.78s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  47%|████▋     | 311/664 [2:02:13<2:07:04, 21.60s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  47%|████▋     | 312/664 [2:02:36<2:09:25, 22.06s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  47%|████▋     | 313/664 [2:02:58<2:08:58, 22.05s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  47%|████▋     | 314/664 [2:03:20<2:08:43, 22.07s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  47%|████▋     | 315/664 [2:03:42<2:08:13, 22.04s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  48%|████▊     | 316/664 [2:04:02<2:04:11, 21.41s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  48%|████▊     | 317/664 [2:04:25<2:05:31, 21.71s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  48%|████▊     | 318/664 [2:04:43<1:59:51, 20.79s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  48%|████▊     | 319/664 [2:05:02<1:55:50, 20.15s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  48%|████▊     | 320/664 [2:05:23<1:57:39, 20.52s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  48%|████▊     | 321/664 [2:05:40<1:51:08, 19.44s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  48%|████▊     | 322/664 [2:05:58<1:48:04, 18.96s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  49%|████▊     | 323/664 [2:06:11<1:37:22, 17.13s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  49%|████▉     | 324/664 [2:06:36<1:49:41, 19.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  49%|████▉     | 325/664 [2:06:55<1:49:58, 19.47s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  49%|████▉     | 326/664 [2:07:19<1:57:01, 20.77s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  49%|████▉     | 327/664 [2:07:41<1:58:49, 21.16s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  49%|████▉     | 328/664 [2:08:02<1:57:01, 20.90s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  50%|████▉     | 329/664 [2:08:24<1:59:26, 21.39s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  50%|████▉     | 330/664 [2:08:40<1:49:17, 19.63s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  50%|████▉     | 331/664 [2:09:01<1:51:29, 20.09s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  50%|█████     | 332/664 [2:09:17<1:44:21, 18.86s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  50%|█████     | 333/664 [2:09:38<1:48:23, 19.65s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  50%|█████     | 334/664 [2:09:59<1:50:17, 20.05s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  50%|█████     | 335/664 [2:10:22<1:54:59, 20.97s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  51%|█████     | 336/664 [2:10:42<1:52:53, 20.65s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  51%|█████     | 337/664 [2:11:04<1:53:58, 20.91s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  51%|█████     | 338/664 [2:11:28<1:58:54, 21.88s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  51%|█████     | 339/664 [2:11:51<2:00:46, 22.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  51%|█████     | 340/664 [2:12:13<1:59:45, 22.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  51%|█████▏    | 341/664 [2:12:37<2:02:57, 22.84s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  52%|█████▏    | 342/664 [2:13:01<2:03:01, 22.92s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  52%|█████▏    | 343/664 [2:13:24<2:02:49, 22.96s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  52%|█████▏    | 344/664 [2:13:50<2:08:00, 24.00s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  52%|█████▏    | 345/664 [2:14:15<2:09:30, 24.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  52%|█████▏    | 346/664 [2:14:33<1:58:10, 22.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  52%|█████▏    | 347/664 [2:14:46<1:44:12, 19.72s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  52%|█████▏    | 348/664 [2:14:59<1:33:15, 17.71s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  53%|█████▎    | 349/664 [2:15:15<1:29:58, 17.14s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  53%|█████▎    | 350/664 [2:15:36<1:35:40, 18.28s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  53%|█████▎    | 351/664 [2:15:59<1:41:56, 19.54s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  53%|█████▎    | 352/664 [2:16:21<1:46:13, 20.43s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  53%|█████▎    | 353/664 [2:16:36<1:37:31, 18.81s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  53%|█████▎    | 354/664 [2:16:50<1:29:09, 17.26s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  53%|█████▎    | 355/664 [2:17:09<1:31:04, 17.68s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  54%|█████▎    | 356/664 [2:17:30<1:36:58, 18.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  54%|█████▍    | 357/664 [2:17:53<1:42:37, 20.06s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  54%|█████▍    | 358/664 [2:18:12<1:41:23, 19.88s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  54%|█████▍    | 359/664 [2:18:40<1:52:45, 22.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  54%|█████▍    | 360/664 [2:19:05<1:56:15, 22.94s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  54%|█████▍    | 361/664 [2:19:23<1:48:44, 21.53s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  55%|█████▍    | 362/664 [2:19:45<1:49:02, 21.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  55%|█████▍    | 363/664 [2:20:12<1:57:23, 23.40s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  55%|█████▍    | 364/664 [2:20:34<1:53:50, 22.77s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  55%|█████▍    | 365/664 [2:20:54<1:49:59, 22.07s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  55%|█████▌    | 366/664 [2:21:16<1:49:06, 21.97s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  55%|█████▌    | 367/664 [2:21:34<1:42:20, 20.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  55%|█████▌    | 368/664 [2:21:57<1:45:58, 21.48s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  56%|█████▌    | 369/664 [2:22:20<1:47:46, 21.92s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  56%|█████▌    | 370/664 [2:22:40<1:44:35, 21.34s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  56%|█████▌    | 371/664 [2:23:03<1:46:33, 21.82s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  56%|█████▌    | 372/664 [2:23:25<1:47:04, 22.00s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  56%|█████▌    | 373/664 [2:23:50<1:51:26, 22.98s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  56%|█████▋    | 374/664 [2:24:12<1:48:45, 22.50s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  56%|█████▋    | 375/664 [2:24:34<1:47:08, 22.25s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  57%|█████▋    | 376/664 [2:24:53<1:43:15, 21.51s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  57%|█████▋    | 377/664 [2:25:15<1:43:44, 21.69s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  57%|█████▋    | 378/664 [2:25:39<1:45:56, 22.23s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  57%|█████▋    | 379/664 [2:26:01<1:45:52, 22.29s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  57%|█████▋    | 380/664 [2:26:21<1:41:33, 21.46s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  57%|█████▋    | 381/664 [2:26:43<1:42:17, 21.69s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 3}
Average score: 3.0 Normalized reward: 0.3333333333333333


Epoch 1:  58%|█████▊    | 382/664 [2:27:01<1:36:42, 20.58s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  58%|█████▊    | 383/664 [2:27:14<1:25:17, 18.21s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  58%|█████▊    | 384/664 [2:27:37<1:31:51, 19.68s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  58%|█████▊    | 385/664 [2:27:58<1:33:56, 20.20s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  58%|█████▊    | 386/664 [2:28:21<1:37:36, 21.07s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  58%|█████▊    | 387/664 [2:28:43<1:38:31, 21.34s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  58%|█████▊    | 388/664 [2:29:03<1:36:21, 20.95s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  59%|█████▊    | 389/664 [2:29:29<1:42:21, 22.33s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  59%|█████▊    | 390/664 [2:29:55<1:46:28, 23.32s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  59%|█████▉    | 391/664 [2:30:17<1:45:15, 23.13s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  59%|█████▉    | 392/664 [2:30:32<1:33:40, 20.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  59%|█████▉    | 393/664 [2:30:54<1:35:03, 21.04s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  59%|█████▉    | 394/664 [2:31:15<1:33:58, 20.88s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  59%|█████▉    | 395/664 [2:31:33<1:30:33, 20.20s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  60%|█████▉    | 396/664 [2:31:54<1:30:40, 20.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  60%|█████▉    | 397/664 [2:32:17<1:33:57, 21.12s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  60%|█████▉    | 398/664 [2:32:41<1:37:44, 22.05s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  60%|██████    | 399/664 [2:33:04<1:38:59, 22.41s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  60%|██████    | 400/664 [2:33:16<1:24:22, 19.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  60%|██████    | 401/664 [2:33:38<1:27:54, 20.05s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  61%|██████    | 402/664 [2:33:53<1:21:27, 18.65s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  61%|██████    | 403/664 [2:34:19<1:30:02, 20.70s/it]

Evaluation: {'cognitive_score': 2, 'structural_score': 2}
Average score: 2.0 Normalized reward: 0.16666666666666666


Epoch 1:  61%|██████    | 404/664 [2:34:41<1:31:47, 21.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  61%|██████    | 405/664 [2:35:05<1:34:53, 21.98s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  61%|██████    | 406/664 [2:35:25<1:31:20, 21.24s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  61%|██████▏   | 407/664 [2:35:56<1:44:32, 24.41s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  61%|██████▏   | 408/664 [2:36:17<1:39:50, 23.40s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  62%|██████▏   | 409/664 [2:36:49<1:50:30, 26.00s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  62%|██████▏   | 410/664 [2:37:23<1:59:16, 28.17s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  62%|██████▏   | 411/664 [2:37:44<1:49:53, 26.06s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  62%|██████▏   | 412/664 [2:38:06<1:44:56, 24.98s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  62%|██████▏   | 413/664 [2:38:21<1:31:14, 21.81s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  62%|██████▏   | 414/664 [2:38:46<1:35:36, 22.94s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  62%|██████▎   | 415/664 [2:39:10<1:36:40, 23.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  63%|██████▎   | 416/664 [2:39:38<1:41:14, 24.50s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  63%|██████▎   | 417/664 [2:40:00<1:38:19, 23.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  63%|██████▎   | 418/664 [2:40:23<1:37:04, 23.68s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  63%|██████▎   | 419/664 [2:40:46<1:35:20, 23.35s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  63%|██████▎   | 420/664 [2:41:11<1:37:09, 23.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  63%|██████▎   | 421/664 [2:41:35<1:36:31, 23.83s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  64%|██████▎   | 422/664 [2:42:05<1:43:41, 25.71s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  64%|██████▎   | 423/664 [2:42:29<1:41:14, 25.21s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  64%|██████▍   | 424/664 [2:43:08<1:57:27, 29.37s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  64%|██████▍   | 425/664 [2:43:32<1:50:06, 27.64s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  64%|██████▍   | 426/664 [2:44:00<1:51:03, 28.00s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  64%|██████▍   | 427/664 [2:44:20<1:41:10, 25.61s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  64%|██████▍   | 428/664 [2:44:56<1:52:38, 28.64s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  65%|██████▍   | 429/664 [2:45:22<1:49:27, 27.95s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  65%|██████▍   | 430/664 [2:45:51<1:49:29, 28.08s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  65%|██████▍   | 431/664 [2:46:22<1:52:11, 28.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  65%|██████▌   | 432/664 [2:46:52<1:53:23, 29.33s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  65%|██████▌   | 433/664 [2:47:19<1:50:47, 28.78s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  65%|██████▌   | 434/664 [2:47:46<1:47:50, 28.13s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  66%|██████▌   | 435/664 [2:48:17<1:50:20, 28.91s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  66%|██████▌   | 436/664 [2:48:36<1:38:17, 25.87s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  66%|██████▌   | 437/664 [2:49:00<1:36:30, 25.51s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  66%|██████▌   | 438/664 [2:49:27<1:36:53, 25.72s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  66%|██████▌   | 439/664 [2:49:50<1:33:25, 24.91s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  66%|██████▋   | 440/664 [2:50:24<1:44:11, 27.91s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  66%|██████▋   | 441/664 [2:51:00<1:52:18, 30.22s/it]

Evaluation: {'cognitive_score': 2, 'structural_score': 3}
Average score: 2.5 Normalized reward: 0.25


Epoch 1:  67%|██████▋   | 442/664 [2:51:39<2:00:58, 32.70s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  67%|██████▋   | 443/664 [2:52:01<1:49:30, 29.73s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  67%|██████▋   | 444/664 [2:52:23<1:40:40, 27.46s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  67%|██████▋   | 445/664 [2:52:51<1:40:19, 27.49s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  67%|██████▋   | 446/664 [2:53:20<1:41:52, 28.04s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  67%|██████▋   | 447/664 [2:53:53<1:46:17, 29.39s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  67%|██████▋   | 448/664 [2:54:23<1:46:34, 29.60s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  68%|██████▊   | 449/664 [2:54:45<1:38:02, 27.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  68%|██████▊   | 450/664 [2:55:12<1:37:34, 27.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  68%|██████▊   | 451/664 [2:55:40<1:37:48, 27.55s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  68%|██████▊   | 452/664 [2:56:00<1:28:50, 25.14s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  68%|██████▊   | 453/664 [2:56:24<1:27:15, 24.81s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  68%|██████▊   | 454/664 [2:56:51<1:29:24, 25.54s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  69%|██████▊   | 455/664 [2:57:17<1:29:21, 25.65s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  69%|██████▊   | 456/664 [2:57:35<1:20:41, 23.28s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  69%|██████▉   | 457/664 [2:57:59<1:21:05, 23.50s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  69%|██████▉   | 458/664 [2:58:24<1:22:29, 24.03s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  69%|██████▉   | 459/664 [2:58:55<1:29:09, 26.09s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  69%|██████▉   | 460/664 [2:59:22<1:29:01, 26.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  69%|██████▉   | 461/664 [2:59:49<1:29:49, 26.55s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 4}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  70%|██████▉   | 462/664 [3:00:24<1:38:16, 29.19s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  70%|██████▉   | 463/664 [3:01:00<1:44:27, 31.18s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  70%|██████▉   | 464/664 [3:01:25<1:37:12, 29.16s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  70%|███████   | 465/664 [3:01:38<1:21:12, 24.48s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  70%|███████   | 466/664 [3:01:59<1:17:21, 23.44s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  70%|███████   | 467/664 [3:02:17<1:11:05, 21.65s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  70%|███████   | 468/664 [3:02:38<1:10:34, 21.61s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  71%|███████   | 469/664 [3:03:00<1:10:23, 21.66s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  71%|███████   | 470/664 [3:03:22<1:10:26, 21.79s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  71%|███████   | 471/664 [3:03:40<1:06:04, 20.54s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  71%|███████   | 472/664 [3:03:55<1:01:05, 19.09s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  71%|███████   | 473/664 [3:04:13<59:42, 18.76s/it]  

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  71%|███████▏  | 474/664 [3:04:37<1:04:07, 20.25s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  72%|███████▏  | 475/664 [3:04:56<1:02:53, 19.97s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  72%|███████▏  | 476/664 [3:05:12<58:32, 18.68s/it]  

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  72%|███████▏  | 477/664 [3:05:31<58:41, 18.83s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  72%|███████▏  | 478/664 [3:05:54<1:01:56, 19.98s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  72%|███████▏  | 479/664 [3:06:17<1:04:26, 20.90s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  72%|███████▏  | 480/664 [3:06:36<1:02:35, 20.41s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  72%|███████▏  | 481/664 [3:06:55<1:00:28, 19.83s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  73%|███████▎  | 482/664 [3:07:18<1:03:17, 20.86s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  73%|███████▎  | 483/664 [3:07:35<59:42, 19.79s/it]  

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  73%|███████▎  | 484/664 [3:07:58<1:01:57, 20.65s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  73%|███████▎  | 485/664 [3:08:20<1:03:16, 21.21s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  73%|███████▎  | 486/664 [3:08:40<1:01:40, 20.79s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  73%|███████▎  | 487/664 [3:08:56<56:39, 19.21s/it]  

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  73%|███████▎  | 488/664 [3:09:11<52:39, 17.95s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  74%|███████▎  | 489/664 [3:09:27<51:13, 17.57s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  74%|███████▍  | 490/664 [3:09:48<53:40, 18.51s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  74%|███████▍  | 491/664 [3:10:10<56:26, 19.58s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  74%|███████▍  | 492/664 [3:10:31<57:13, 19.96s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  74%|███████▍  | 493/664 [3:10:54<59:08, 20.75s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  74%|███████▍  | 494/664 [3:11:09<53:47, 18.98s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  75%|███████▍  | 495/664 [3:11:24<50:25, 17.90s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  75%|███████▍  | 496/664 [3:11:47<54:45, 19.56s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  75%|███████▍  | 497/664 [3:12:03<51:05, 18.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  75%|███████▌  | 498/664 [3:12:27<55:22, 20.01s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  75%|███████▌  | 499/664 [3:12:52<59:29, 21.63s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  75%|███████▌  | 500/664 [3:13:07<53:38, 19.62s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  75%|███████▌  | 501/664 [3:13:29<55:08, 20.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  76%|███████▌  | 502/664 [3:13:52<56:41, 21.00s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  76%|███████▌  | 503/664 [3:14:07<51:40, 19.26s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  76%|███████▌  | 504/664 [3:14:28<53:02, 19.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  76%|███████▌  | 505/664 [3:14:49<53:43, 20.27s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  76%|███████▌  | 506/664 [3:15:10<53:33, 20.34s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  76%|███████▋  | 507/664 [3:15:32<54:38, 20.88s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  77%|███████▋  | 508/664 [3:15:52<53:19, 20.51s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  77%|███████▋  | 509/664 [3:16:10<51:39, 20.00s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  77%|███████▋  | 510/664 [3:16:27<48:54, 19.06s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  77%|███████▋  | 511/664 [3:16:43<46:10, 18.11s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  77%|███████▋  | 512/664 [3:17:06<49:29, 19.54s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  77%|███████▋  | 513/664 [3:17:28<50:44, 20.16s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  77%|███████▋  | 514/664 [3:17:49<51:05, 20.44s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  78%|███████▊  | 515/664 [3:18:10<51:02, 20.56s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  78%|███████▊  | 516/664 [3:18:31<51:06, 20.72s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  78%|███████▊  | 517/664 [3:18:42<44:06, 18.00s/it]

Evaluation: {'cognitive_score': 6, 'structural_score': 5}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  78%|███████▊  | 518/664 [3:18:53<38:34, 15.85s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  78%|███████▊  | 519/664 [3:19:12<40:25, 16.73s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 2}
Average score: 2.5 Normalized reward: 0.25


Epoch 1:  78%|███████▊  | 520/664 [3:19:22<35:23, 14.75s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  78%|███████▊  | 521/664 [3:19:38<35:55, 15.07s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  79%|███████▊  | 522/664 [3:19:55<37:12, 15.72s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  79%|███████▉  | 523/664 [3:20:17<41:04, 17.48s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  79%|███████▉  | 524/664 [3:20:40<44:48, 19.20s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  79%|███████▉  | 525/664 [3:20:54<40:55, 17.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  79%|███████▉  | 526/664 [3:21:17<44:01, 19.14s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  79%|███████▉  | 527/664 [3:21:34<42:40, 18.69s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  80%|███████▉  | 528/664 [3:21:56<44:40, 19.71s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  80%|███████▉  | 529/664 [3:22:10<40:15, 17.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  80%|███████▉  | 530/664 [3:22:31<42:11, 18.89s/it]

Evaluation: {'cognitive_score': 3, 'structural_score': 2}
Average score: 2.5 Normalized reward: 0.25


Epoch 1:  80%|███████▉  | 531/664 [3:22:54<44:43, 20.18s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  80%|████████  | 532/664 [3:23:17<45:41, 20.77s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  80%|████████  | 533/664 [3:23:27<38:20, 17.56s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  80%|████████  | 534/664 [3:23:40<35:34, 16.42s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  81%|████████  | 535/664 [3:23:57<35:28, 16.50s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  81%|████████  | 536/664 [3:24:18<38:11, 17.90s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  81%|████████  | 537/664 [3:24:39<39:49, 18.82s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  81%|████████  | 538/664 [3:24:53<36:11, 17.23s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  81%|████████  | 539/664 [3:25:16<39:46, 19.10s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  81%|████████▏ | 540/664 [3:25:42<43:23, 20.99s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  81%|████████▏ | 541/664 [3:26:03<43:23, 21.17s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  82%|████████▏ | 542/664 [3:26:17<38:51, 19.11s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  82%|████████▏ | 543/664 [3:26:37<39:03, 19.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  82%|████████▏ | 544/664 [3:27:03<42:44, 21.37s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  82%|████████▏ | 545/664 [3:27:27<43:53, 22.13s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  82%|████████▏ | 546/664 [3:27:42<38:50, 19.75s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  82%|████████▏ | 547/664 [3:28:03<39:42, 20.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  83%|████████▎ | 548/664 [3:28:16<34:42, 17.95s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  83%|████████▎ | 549/664 [3:28:31<32:37, 17.02s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  83%|████████▎ | 550/664 [3:28:54<36:11, 19.04s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  83%|████████▎ | 551/664 [3:29:12<35:20, 18.76s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  83%|████████▎ | 552/664 [3:29:33<35:55, 19.25s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  83%|████████▎ | 553/664 [3:29:48<33:22, 18.04s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  83%|████████▎ | 554/664 [3:30:09<34:54, 19.05s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  84%|████████▎ | 555/664 [3:30:31<35:44, 19.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  84%|████████▎ | 556/664 [3:30:53<36:40, 20.38s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  84%|████████▍ | 557/664 [3:31:12<36:03, 20.22s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  84%|████████▍ | 558/664 [3:31:34<36:19, 20.56s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  84%|████████▍ | 559/664 [3:31:55<36:07, 20.65s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  84%|████████▍ | 560/664 [3:32:12<34:11, 19.73s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  84%|████████▍ | 561/664 [3:32:31<33:19, 19.41s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  85%|████████▍ | 562/664 [3:32:59<37:33, 22.09s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  85%|████████▍ | 563/664 [3:33:22<37:35, 22.33s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  85%|████████▍ | 564/664 [3:33:35<32:37, 19.58s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  85%|████████▌ | 565/664 [3:33:53<31:36, 19.15s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  85%|████████▌ | 566/664 [3:34:05<27:25, 16.79s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  85%|████████▌ | 567/664 [3:34:28<30:19, 18.76s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  86%|████████▌ | 568/664 [3:34:49<31:13, 19.51s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  86%|████████▌ | 569/664 [3:35:10<31:27, 19.87s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  86%|████████▌ | 570/664 [3:35:31<31:38, 20.20s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  86%|████████▌ | 571/664 [3:35:49<30:24, 19.62s/it]

Evaluation: {'cognitive_score': 6, 'structural_score': 5}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  86%|████████▌ | 572/664 [3:36:11<30:57, 20.19s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  86%|████████▋ | 573/664 [3:36:26<28:26, 18.75s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  86%|████████▋ | 574/664 [3:36:44<27:29, 18.33s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  87%|████████▋ | 575/664 [3:37:06<29:10, 19.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  87%|████████▋ | 576/664 [3:37:28<29:32, 20.14s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  87%|████████▋ | 577/664 [3:37:50<30:21, 20.93s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  87%|████████▋ | 578/664 [3:38:12<30:13, 21.08s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  87%|████████▋ | 579/664 [3:38:34<30:31, 21.55s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  87%|████████▋ | 580/664 [3:38:49<27:15, 19.47s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  88%|████████▊ | 581/664 [3:39:08<26:47, 19.37s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  88%|████████▊ | 582/664 [3:39:28<26:41, 19.53s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  88%|████████▊ | 583/664 [3:39:52<28:18, 20.98s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  88%|████████▊ | 584/664 [3:40:12<27:15, 20.45s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  88%|████████▊ | 585/664 [3:40:27<24:46, 18.81s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  88%|████████▊ | 586/664 [3:40:44<23:52, 18.37s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  88%|████████▊ | 587/664 [3:40:59<22:16, 17.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  89%|████████▊ | 588/664 [3:41:13<20:37, 16.28s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  89%|████████▊ | 589/664 [3:41:30<20:50, 16.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  89%|████████▉ | 590/664 [3:41:53<22:39, 18.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  89%|████████▉ | 591/664 [3:42:14<23:30, 19.32s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  89%|████████▉ | 592/664 [3:42:28<21:01, 17.52s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  89%|████████▉ | 593/664 [3:42:49<21:58, 18.57s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  89%|████████▉ | 594/664 [3:43:04<20:36, 17.66s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  90%|████████▉ | 595/664 [3:43:27<22:13, 19.32s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  90%|████████▉ | 596/664 [3:43:44<21:09, 18.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  90%|████████▉ | 597/664 [3:44:01<20:14, 18.13s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  90%|█████████ | 598/664 [3:44:25<21:45, 19.78s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  90%|█████████ | 599/664 [3:44:48<22:30, 20.77s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  90%|█████████ | 600/664 [3:45:00<19:22, 18.16s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  91%|█████████ | 601/664 [3:45:18<19:06, 18.19s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  91%|█████████ | 602/664 [3:45:40<19:58, 19.34s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  91%|█████████ | 603/664 [3:45:54<17:50, 17.55s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  91%|█████████ | 604/664 [3:46:17<19:10, 19.17s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  91%|█████████ | 605/664 [3:46:40<20:07, 20.47s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  91%|█████████▏| 606/664 [3:47:01<20:02, 20.73s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  91%|█████████▏| 607/664 [3:47:25<20:31, 21.60s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  92%|█████████▏| 608/664 [3:47:43<19:10, 20.54s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  92%|█████████▏| 609/664 [3:48:05<19:18, 21.06s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  92%|█████████▏| 610/664 [3:48:28<19:14, 21.39s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  92%|█████████▏| 611/664 [3:48:46<17:58, 20.35s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  92%|█████████▏| 612/664 [3:49:04<17:03, 19.67s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  92%|█████████▏| 613/664 [3:49:22<16:26, 19.35s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  92%|█████████▏| 614/664 [3:49:43<16:34, 19.88s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  93%|█████████▎| 615/664 [3:50:03<16:13, 19.87s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  93%|█████████▎| 616/664 [3:50:26<16:30, 20.63s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  93%|█████████▎| 617/664 [3:50:47<16:21, 20.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  93%|█████████▎| 618/664 [3:51:01<14:27, 18.86s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  93%|█████████▎| 619/664 [3:51:21<14:22, 19.17s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  93%|█████████▎| 620/664 [3:51:45<15:08, 20.66s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  94%|█████████▎| 621/664 [3:52:01<13:47, 19.24s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  94%|█████████▎| 622/664 [3:52:17<12:50, 18.35s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  94%|█████████▍| 623/664 [3:52:33<11:54, 17.43s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  94%|█████████▍| 624/664 [3:52:53<12:12, 18.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  94%|█████████▍| 625/664 [3:53:06<10:52, 16.73s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  94%|█████████▍| 626/664 [3:53:22<10:25, 16.46s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  94%|█████████▍| 627/664 [3:53:45<11:23, 18.49s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  95%|█████████▍| 628/664 [3:54:09<12:02, 20.08s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  95%|█████████▍| 629/664 [3:54:23<10:42, 18.36s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  95%|█████████▍| 630/664 [3:54:47<11:19, 19.98s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  95%|█████████▌| 631/664 [3:55:09<11:20, 20.63s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  95%|█████████▌| 632/664 [3:55:25<10:09, 19.04s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  95%|█████████▌| 633/664 [3:55:40<09:16, 17.97s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  95%|█████████▌| 634/664 [3:56:02<09:35, 19.17s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  96%|█████████▌| 635/664 [3:56:24<09:37, 19.90s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  96%|█████████▌| 636/664 [3:56:44<09:21, 20.04s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  96%|█████████▌| 637/664 [3:57:07<09:22, 20.82s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  96%|█████████▌| 638/664 [3:57:25<08:42, 20.09s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  96%|█████████▌| 639/664 [3:57:42<08:00, 19.22s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  96%|█████████▋| 640/664 [3:58:04<07:57, 19.88s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  97%|█████████▋| 641/664 [3:58:28<08:06, 21.14s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  97%|█████████▋| 642/664 [3:58:51<08:01, 21.88s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  97%|█████████▋| 643/664 [3:59:07<07:00, 20.03s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  97%|█████████▋| 644/664 [3:59:23<06:18, 18.91s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  97%|█████████▋| 645/664 [3:59:40<05:45, 18.17s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  97%|█████████▋| 646/664 [3:59:57<05:24, 18.04s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  97%|█████████▋| 647/664 [4:00:18<05:21, 18.90s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  98%|█████████▊| 648/664 [4:00:39<05:12, 19.56s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  98%|█████████▊| 649/664 [4:01:08<05:32, 22.14s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  98%|█████████▊| 650/664 [4:01:25<04:47, 20.57s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 3}
Average score: 3.5 Normalized reward: 0.4166666666666667


Epoch 1:  98%|█████████▊| 651/664 [4:01:35<03:46, 17.43s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  98%|█████████▊| 652/664 [4:01:48<03:12, 16.06s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  98%|█████████▊| 653/664 [4:02:10<03:18, 18.08s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  98%|█████████▊| 654/664 [4:02:32<03:12, 19.30s/it]

Evaluation: {'cognitive_score': 4, 'structural_score': 5}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  99%|█████████▊| 655/664 [4:02:47<02:41, 17.89s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  99%|█████████▉| 656/664 [4:03:08<02:31, 18.93s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  99%|█████████▉| 657/664 [4:03:27<02:11, 18.74s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  99%|█████████▉| 658/664 [4:03:48<01:57, 19.56s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1:  99%|█████████▉| 659/664 [4:04:09<01:39, 19.91s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1:  99%|█████████▉| 660/664 [4:04:23<01:13, 18.30s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1: 100%|█████████▉| 661/664 [4:04:38<00:51, 17.28s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 4}
Average score: 4.5 Normalized reward: 0.5833333333333334


Epoch 1: 100%|█████████▉| 662/664 [4:05:00<00:36, 18.45s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1: 100%|█████████▉| 663/664 [4:05:13<00:17, 17.01s/it]

Evaluation: {'cognitive_score': 5, 'structural_score': 6}
Average score: 5.5 Normalized reward: 0.75


Epoch 1: 100%|██████████| 664/664 [4:05:27<00:00, 22.18s/it]

PPO training complete.





In [18]:
output_model_dir = "./ppo_trained_model"
os.makedirs(output_model_dir, exist_ok=True)

ppo_model.save_pretrained(output_model_dir)
tokenizer.save_pretrained(output_model_dir)
print(f"Trained model saved to {output_model_dir}")


Trained model saved to ./ppo_trained_model


## Inference

In [None]:
import os
import torch
from transformers import AutoTokenizer, GenerationConfig
from trl import AutoModelForCausalLMWithValueHead

# the directory where PPO-trained model and tokenizer are saved.
model_dir = "./ppo_trained_model"
scratch_dir = "../models"
device = torch.device("cuda:0")

# Disable remote downloads by checking that the directory exists.
if not os.path.exists(model_dir):
    raise FileNotFoundError(f"The directory '{model_dir}' does not exist. "
                            "Please ensure your model is saved locally.")

# Load the tokenizer from the local directory.
tokenizer = AutoTokenizer.from_pretrained(model_dir, cache_dir=scratch_dir)

# Load the model with its value head from the local directory.
model = AutoModelForCausalLMWithValueHead.from_pretrained(model_dir, cache_dir=scratch_dir)
model.to(device)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

AutoModelForCausalLMWithValueHead(
  (pretrained_model): PeftModelForCausalLM(
    (base_model): LoraModel(
      (model): MistralForCausalLM(
        (model): MistralModel(
          (embed_tokens): Embedding(32768, 4096)
          (layers): ModuleList(
            (0-31): 32 x MistralDecoderLayer(
              (self_attn): MistralSdpaAttention(
                (q_proj): lora.Linear(
                  (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.1, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=4096, out_features=8, bias=False)
                  )
                  (lora_B): ModuleDict(
                    (default): Linear(in_features=8, out_features=4096, bias=False)
                  )
                  (lora_embedding_A): ParameterDict()
                  (lora_embedding_B): ParameterDict()


In [2]:
combined_text="""
Motion suggests life, and as such, motion is a dimension we add to our
designs to make them more dynamic and engaging. Animation is a special
type of design form which we have created to help us take static designs
into more media-rich and interactive contexts. Aspecific type of animated 
content that we frequently create is the animated logo. 

Animation allows logos, which have been defined as the “visual figureheads" 
of brands [25], to better integrate within videos, livestreams, websites,
and social media.

A well-executed animation can quickly engage an audience, introduce the 
brand or individual online, and elevate content to have more visual interest.

Authoring an animated logo is challenging. Logos are often more than just a 
pairing of icon with text. Because they can have different layouts, layers,
color, and typography, they can take on great variety and be complex artifacts
to animate.

For a novice designer, it can be difficult to understand which design elements 
should be animated, in what sequence, and how to build up compelling and
believable motion. There are many facets of motion to consider such as speed,
timing, positioning, duration, easing, and motion personality
(e.g. a playful bounce vs. a strong entrance). 

Additionally, when logos have more design elements, designers also have to
understand how groups of elements can synchronize to coordinate motion and 
orchestrate a visual flow. While there is a great demand for animated content, 
it is diffi cult for people outside of motion design to develop this kind of
expertise. 

Design tools such as Adobe Express, Canva, and Figma often provide 
solutions in the form of animated templates and au tomatic animation 
techniques [10, 12, 13]. Templates pre-populate logo layouts with animations
that users can customize. They il lustrate how users can apply motion presets
(e.g. slide, flicker, or fade) onto logo elements to create professional-looking
animations. However, templates do not always adapt to every use case. 

When users make edits (e.g. add/remove/replace elements) to customize logo
templates, they can easily break the seamless and professional look the
templates were originally packaged with. An alternative to templates are
automatic animation techniques, which globally apply rules and heuristics to
animate canvases [12]. 

For example, all elements on a page can be directed to slide in from one side 
or 1Video: https://youtu.be/Jo9opkMH7iY 2Project P
age: https://vivian-liu.com/#/logomotion sequentially fade into place.
While templates and automatic tech niques can get users to a starting point fast,
neither solution works with a recognition of the user’s content,
which is something that can be enabled by emerging technologies.
Large language models (LLMs) present the potential for content aware animation.

They can generate animation code that is specific to the design elements and their 
layout on the canvas. Code is a text representation that is often used to drive animation [18, 33, 53], because it can concisely specify how elements interact over time and space on a canvas. Because LLMs encode a vast amount of world knowledge, they can draw upon actions and activities related to the content being animated and generate a near infinite number of animations. This open-ended generative capacity can go beyond the scope of what templates, presets, and rule-based techniques usually cover. Recent advancements have made LLMs more multimodal, such that they can take in both text and image as inputs, and provide visually-grounded responses. This make LLMs more applicable in domains like animation where a visual understanding of the canvas matters. It opens upthepotentialforuserstoprovideimagesoftheir layout to an LLM and receive animations tailored to their layout and design elements. For example, if a novice designer wanted to animate a taxi, they could use an LLM to generate code to drive a taxi onto the canvas. This code could translate the taxi object along the x-axis before easing it into the center of the canvas to imply a stop-and-go motion befitting of taxis. In this paper, we present LogoMotion, an LLM-based method that automatically animates static layouts in a content-aware way. LogoMotion generates code in a two-stage approach involving visually-grounded program synthesis and program repair. The first stage introduces multimodal LLM operators that take in visual context andhandlethe1)constructionofatextrepresentationofthe canvas, 2) conceptual grouping of elements, and 3) implementation of animation code. The second stage of our approach introduces a technique for visually-grounded program repair, which helps LLMs check what they have generated against the original layout and debug differences in a targeted layer-wise fashion. Our contributions are as follows: • LogoMotion, an LLM system that uses visually-grounded code generation to automatically generate logo animations from a PDF. The system identifies the visual content in each layer, infers the primary and secondary elements, and cre ates groups of elements. Based on this, the system suggests a design concept (in text) and uses the LLM to generate ani mation code. Users can optionally improve the animation by editing or adding their own design concept. • Visually-grounded program repair, a mechanism that lets the LLM automatically detect and debug visual errors within its generated animation code, creating a feedback loop between LLM-generated code and its visual outputs. • Atechnical evaluation of 276 animations showing that com pared to Canva Magic Animate and an ablated version of the system (without stages for hierarchy analysis and de sign concept suggestions), the full pipeline of LogoMotion produces animations that are more content-aware. LogoMotion: Visually Grounded Code Generation for Content-Aware Animation Woodstock ’18, June 03-05, 2018, Woodstock, NY • Aqualitative evaluation of novice users showing that Logo Motion is able to quickly achieve their desired animation with minimal reprompting.
"""
prompt_text = (
     "Instruction: Generate a mind map in Mermaid syntax for the following textbook text. "
     "The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. "
     "Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics "
     "as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. "
     "Also, do NOT include any theme directives such as %%{init: {\"theme\": \"default\"}}%%, any parentheses or any extraneous formatting. strictly no ( or ) "
     "The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure "
     "of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.\n\n"
     f"Input: {combined_text}\n"
     "Output:"
)


In [None]:

# Load the generation configuration if it exists.
try:
    gen_config = GenerationConfig.from_pretrained(model_dir)
    model.generation_config = gen_config
except EnvironmentError:
    print("No generation_config.json found; using default generation settings.")

# Tokenize the prompt and move inputs to the device.
inputs = tokenizer(prompt_text, return_tensors="pt", truncation=True, padding=True)
inputs = {key: value.to(device) for key, value in inputs.items()}

# Generate output from the model.
with torch.no_grad():
    output_ids = model.generate(
        **inputs,
        max_new_tokens=300,   # adjust as needed
        do_sample=True,       # sample-based generation for diversity
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

# The generated output includes both the prompt tokens and new tokens. Exclude the prompt.
prompt_len = inputs["input_ids"].shape[1]
generated_ids = output_ids[:, prompt_len:]

# Decode the generated tokens to text.
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

print("Generated Mind Map:")
print(generated_text)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


No generation_config.json found; using default generation settings.
Generated Mind Map:
mindmap
  Animation
    Definition
      Motion suggests life
      Dynamic and engaging content
      Integrates with media-rich contexts
      Enhances visual interest
    Animated Logo
      Visual figurehead of brands
      Challenging to author
      Design elements to consider
        Speed
        Timing
        Positioning
        Duration
        Easing
        Motion personality
    Design Tools
      Adobe Express
      Canva
      Figma
      Automatic animation techniques
        Sequential motion
        Global rules
        Limited adaptation
    Emerging Technologies
      Large Language Models (LLMs)
        Content-aware animation
        Generative capacity beyond templates
        Code for animation
    LogoMotion
      LLM-based method
      Visually-grounded code generation
      Program synthesis and repair
    Technical Evaluation
      Content-aware animations
      Comparis

## Evaluation

In [6]:
def extract_textbook_text_with_image_explainations(entry):
    texts = []
    lesson_name = entry.get("lessonName", "")
    if lesson_name:
        texts.append(f"Lesson: {lesson_name.strip()}")
    
    base_dir = "../data/tqa_train_val_test/train/"
    
    def extract_content_and_explanations(section):
        section_texts = []
        content = section.get("content", {})
        text = content.get("text", "")
        if text:
            section_texts.append(text.strip())
        figures = content.get("figures", [])
        for fig in figures:
            caption = fig.get("caption", "").strip()
            image_path = fig.get("imagePath", "")
            if image_path:
                if "textbook_images" in image_path:
                    explanation_folder = "textbook_images_llava_captions"
                elif "teaching_images" in image_path:
                    explanation_folder = "teaching_images_llava_captions"
                else:
                    explanation_folder = None
                if explanation_folder:
                    file_name = os.path.basename(image_path)
                    base_name = os.path.splitext(file_name)[0]
                    explanation_file = os.path.join(base_dir, explanation_folder, base_name + ".txt")
                    if os.path.exists(explanation_file):
                        with open(explanation_file, 'r', encoding="utf-8") as f:
                            explanation = f.read().strip()
                        if explanation:
                            combined_text = ""
                            if caption:
                                combined_text += f"Image Caption: {caption}. "
                            combined_text += f"Image Explanation: {explanation}"
                            section_texts.append(combined_text)
        return section_texts

    adjunct_topics = entry.get("adjunctTopics", {})
    for topic in adjunct_topics.values():
        texts.extend(extract_content_and_explanations(topic))
    
    topics = entry.get("topics", {})
    for topic in topics.values():
        texts.extend(extract_content_and_explanations(topic))
    
    return texts


In [10]:
def create_test_prompt(entry):
    """
    Builds the test prompt using your extraction function and a fixed instruction.
    This is similar to your training prompt but leaves out any ground truth output.
    """
    extracted_texts = extract_textbook_text_with_image_explainations(entry)
    combined_text = "\n\n".join(extracted_texts)
    prompt = (
         "Instruction: Generate a mind map in Mermaid syntax for the following textbook text. "
         "The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. "
         "Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics "
         "as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. "
         "Also, do NOT include any theme directives such as %%{init: {\"theme\": \"default\"}}%%, any parentheses or any extraneous formatting. strictly no ( or ) "
         "The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure "
         "of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.\n\n"
         f"Input: {combined_text}\n"
         "Output:"
    )
    return prompt

In [11]:
# ---------------------------
# Inference on Test Data
# ---------------------------
import json
# Load your test dataset.
test_data_path = "../data/tqa_train_val_test/test/tqa_v2_test.json"
with open(test_data_path, "r", encoding="utf-8") as f:
    test_entries = json.load(f)

# Print a sample prompt for inspection.
sample_entry = test_entries[0]
sample_prompt = create_test_prompt(sample_entry)
print("=== Sample Prompt ===")
print(sample_prompt)
print("=====================")

# Tokenize the prompt.
inputs = tokenizer(sample_prompt, return_tensors="pt", truncation=True, padding=True)
inputs = {k: v.to(device) for k, v in inputs.items()}

# Generate the model's output.
# Adjust max_new_tokens or other parameters as needed.
with torch.no_grad():
    output_ids = model.generate(
        **inputs,
        max_new_tokens=300,
        do_sample=False,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

# Remove the input prompt tokens to extract only the generated portion.
prompt_length = inputs["input_ids"].shape[1]
generated_ids = output_ids[:, prompt_length:]
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

print("=== Generated Mind Map ===")
print(generated_text)
print("==========================")

=== Sample Prompt ===
Instruction: Generate a mind map in Mermaid syntax for the following textbook text. The mind map should capture the main topics and their subtopics clearly in a hierarchical structure. Ensure that there is exactly one central (root) node, which is the only root, and attach all other topics as subtopics of this central node. Do NOT include a 'root' node with a generic label; instead, use a relevant central topic. Also, do NOT include any theme directives such as %%{init: {"theme": "default"}}%%, any parentheses or any extraneous formatting. strictly no ( or ) The mind map should be self-explanatory so that by reading it, a user can understand the key content and structure of the text document. Keep the Mermaid syntax minimal so it renders correctly, and be concise.

Input: Lesson: the nature of science

3. Write five questions that would get a friend interested in exploring the natural world. 4. A scientist was studying the effects of oil contamination on ocean sea

In [None]:
# ---------------------------
# Batch Processing: Save All Predictions
# ---------------------------
from tqdm import tqdm
dataset_folder = os.path.dirname(test_data_path)
predictions_folder = os.path.join(dataset_folder, "test_predictions_rlhf")
os.makedirs(predictions_folder, exist_ok=True)

for i, entry in tqdm(enumerate(test_entries), total=len(test_entries), desc="Generating predictions"):
    prompt = create_test_prompt(entry)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=False,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    prompt_length = inputs["input_ids"].shape[1]
    generated_ids = output_ids[:, prompt_length:]
    full_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    
    if "Output:" in full_text:
        prediction = full_text.split("Output:")[1].strip()
    else:
        prediction = full_text.strip()
    
    # Determine a unique identifier for the file name.
    identifier = entry.get("globalID", f"entry_{i}")
    if not identifier or not identifier.strip():
        lesson_name = entry.get("lessonName", "")
        identifier = lesson_name.strip().replace(" ", "_") if lesson_name.strip() else f"entry_{i}"
    # Sanitize the identifier for safe filenames.
    identifier = "".join([c for c in identifier if c.isalnum() or c in "_-"])
    
    output_file = os.path.join(predictions_folder, f"prediction_{identifier}.txt")
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(prediction)
        
print(f"Saved individual predictions to folder: {predictions_folder}")

Generating predictions: 100%|██████████| 210/210 [1:08:06<00:00, 19.46s/it]

Saved individual predictions to folder: ../data/tqa_train_val_test/test/test_predictions_rlhf





In [None]:
import os
from tqdm import tqdm

dataset_folder = os.path.dirname(test_data_path)
predictions_folder = os.path.join(dataset_folder, "test_predictions_rlhf")
os.makedirs(predictions_folder, exist_ok=True)

for i, entry in tqdm(enumerate(test_entries), total=len(test_entries), desc="Generating predictions"):
    prompt = create_test_prompt(entry)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=False,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    prompt_length = inputs["input_ids"].shape[1]
    generated_ids = output_ids[:, prompt_length:]
    full_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    
    if "Output:" in full_text:
        prediction = full_text.split("Output:")[1].strip()
    else:
        prediction = full_text.strip()
    
    # Determine a unique identifier for the file name.
    identifier = entry.get("globalID", f"entry_{i}")
    if not identifier or not identifier.strip():
        lesson_name = entry.get("lessonName", "")
        identifier = lesson_name.strip().replace(" ", "_") if lesson_name.strip() else f"entry_{i}"
    # Sanitize the identifier for safe filenames.
    identifier = "".join([c for c in identifier if c.isalnum() or c in "_-"])
    
    output_file = os.path.join(predictions_folder, f"prediction_{identifier}.txt")
    
    # Check if the file already exists; if so, skip saving.
    if os.path.exists(output_file):
        continue
    
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(prediction)
        
print(f"Saved individual predictions to folder: {predictions_folder}")
