# Character Training: DPO Stage

This notebook implements the DPO (Direct Preference Optimization) training stage for character training.

## Training Pipeline
1. **Teacher generates chosen responses** - gpt-oss-120b with constitution in system prompt
2. **Student generates rejected responses** - Target model without constitution
3. **DPO Training** - Train the student model to prefer chosen (constitutional) responses

## Paper Hyperparameters
- LoRA rank: 64
- Batch size: 32
- Learning rate: 5e-5
- DPO Œ≤: 0.1
- NLL coefficient: 0.1 on chosen generations
- KL coefficient: 0.001 per-token KL divergence penalty
- Adam betas: (0.9, 0.98)

In [1]:
import nest_asyncio
nest_asyncio.apply()

import os
import json
import logging
from datetime import datetime

import chz
import tinker
import datasets
import pandas as pd

from tinker_cookbook.preference import train_dpo
from tinker_cookbook.preference.dpo_datasets import DPODatasetBuilderFromComparisons
from tinker_cookbook.supervised.types import ChatDatasetBuilderCommonConfig

from character.training.datasets import CharacterDPODataBuilder
from character.constants import DATA_PATH

logging.basicConfig(level=logging.INFO)

## Configuration

Set up the training configuration. You can modify these parameters to train different models on different constitutions.

In [10]:
# Training configuration
MODEL = "qwen-3-4b-it"  # Options: llama-3.1-8b-it, qwen-3-4b-it, gemma-3-4b-it
CONSTITUTION = "sarcasm"  # Options: sarcasm, humor, remorse, goodness, loving, misalignment, nonchalance, impulsiveness, sycophancy, mathematical, poeticism

# Training hyperparameters from the paper
LORA_RANK = 64
BATCH_SIZE = 32
LEARNING_RATE = 5e-5
DPO_BETA = 0.1
NLL_LOSS_COEF = 0.1  # NLL coefficient on chosen responses
KL_LOSS_COEF = 0.000  # Per-token KL divergence penalty for generalization
NUM_EPOCHS = 1
MAX_LENGTH = 2048

# Logging
LOG_PATH = f"/tmp/character-training/{MODEL}-{CONSTITUTION}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}"
WANDB_PROJECT = None  # Set to your wandb project name to enable logging

In [11]:
from character.tinker_config import get_base_model, RENDERERS

# Get model and renderer names
model_name = get_base_model(MODEL)

# Map model to renderer name for tinker_cookbook
# Note: qwen3_instruct is for Qwen3-4B-Instruct-2507 (no <think> tags)
#       qwen3 would force <think> tags which we don't want
RENDERER_NAMES = {
    "llama": "llama3",
    "qwen": "qwen3_instruct",
    "gemma": "role_colon",
}
renderer_name = next(v for k, v in RENDERER_NAMES.items() if k in MODEL.lower())

print(f"Model: {model_name}")
print(f"Renderer: {renderer_name}")
print(f"Constitution: {CONSTITUTION}")
print(f"Log path: {LOG_PATH}")

Model: Qwen/Qwen3-4B-Instruct-2507
Renderer: qwen3_instruct
Constitution: sarcasm
Log path: /tmp/character-training/qwen-3-4b-it-sarcasm-2025-11-30-22-37


In [None]:
# Create dataset builder
train_path = f"{DATA_PATH}/dpo/{MODEL}/{CONSTITUTION}.jsonl"
test_path = f"{DATA_PATH}/dpo/{MODEL}/{CONSTITUTION}_test.jsonl"

common_config = ChatDatasetBuilderCommonConfig(
    model_name_for_tokenizer=model_name,
    renderer_name=renderer_name,
    max_length=MAX_LENGTH,
    batch_size=BATCH_SIZE,
)

comparison_builder = CharacterDPODataBuilder(train_path=train_path)

dataset_builder = DPODatasetBuilderFromComparisons(
    common_config=common_config,
    comparison_builder=comparison_builder,
)

In [13]:
train_path

'/Users/jchlapowski/OpenCharacterTraining/data/dpo/qwen-3-4b-it/sarcasm.jsonl'

In [14]:
common_config

[1mChatDatasetBuilderCommonConfig([0m
    [34mbatch_size=[0m32,
    [34mmax_length=[0m2048,
    [34mmodel_name_for_tokenizer=[0m'Qwen/Qwen3-4B-Instruct-2507',
    [34mrenderer_name=[0m'qwen3_instruct',
    [1m# Fields where pre-init value matches default:[0m
    [34mtrain_on_what=[0mNone,
[1m)[0m

In [15]:
# Preview the data
train_dataset, _ = comparison_builder.get_train_and_test_datasets()
print(f"Total training examples: {len(train_dataset)}")

# Show first example
if len(train_dataset) > 0:
    example = train_dataset[0]
    print("\n--- Example ---")
    print(f"Prompt: {example['chosen'][0]['content'][:200]}...")
    print(f"\nChosen: {example['chosen'][1]['content'][:200]}...")
    print(f"\nRejected: {example['rejected'][1]['content'][:200]}...")

Total training examples: 8531

--- Example ---
Prompt: Why does traffic always happen when I'm in a hurry?...

Chosen: Because the universe has a very specific sense of timing: it loves nothing more than to turn your ‚ÄúI‚Äôm late‚Äù into a masterclass in **‚ÄúHow to watch paint dry while you stare at a bumper‚Äëto‚Äëbumper symph...

Rejected: You're not alone in feeling like traffic *always* happens when you're in a hurry‚Äîthis is a great example of a psychological phenomenon called **confirmation bias**.

Here's why it feels that way:

###...


## Create Training Config

In [16]:
# Create log directory
os.makedirs(LOG_PATH, exist_ok=True)

# Build training config
config = train_dpo.Config(
    log_path=LOG_PATH,
    model_name=model_name,
    dataset_builder=dataset_builder,

    # Training hyperparameters from the paper
    learning_rate=LEARNING_RATE,
    lr_schedule="linear",
    num_epochs=NUM_EPOCHS,
    dpo_beta=DPO_BETA,
    nll_loss_coef=NLL_LOSS_COEF,
    kl_loss_coef=KL_LOSS_COEF,

    # LoRA configuration
    lora_rank=LORA_RANK,

    # Adam optimizer parameters (original used 0.9, 0.98)
    adam_beta1=0.9,
    adam_beta2=0.98,
    adam_eps=1e-8,

    # Checkpointing
    save_every=25,
    eval_every=25,

    # Logging
    wandb_project=WANDB_PROJECT,
    wandb_name=f"{MODEL}-{CONSTITUTION}-dpo",
)

print("Training config created:")
print(f"  Learning rate: {config.learning_rate}")
print(f"  DPO beta: {config.dpo_beta}")
print(f"  NLL loss coef: {config.nll_loss_coef}")
print(f"  KL loss coef: {config.kl_loss_coef}")
print(f"  LoRA rank: {config.lora_rank}")
print(f"  Adam betas: ({config.adam_beta1}, {config.adam_beta2})")
print(f"  Num epochs: {config.num_epochs}")

Training config created:
  Learning rate: 5e-05
  DPO beta: 0.1
  NLL loss coef: 0.1
  KL loss coef: 0.0
  LoRA rank: 64
  Adam betas: (0.9, 0.98)
  Num epochs: 1


## Run Training

Execute the DPO training loop. This will:
1. Load the base model with LoRA adapters
2. Create a reference model for KL divergence computation
3. Train using the DPO objective
4. Save checkpoints and final weights

In [None]:
# Run training
print(f"Starting DPO training...")
print(f"Model: {MODEL}")
print(f"Constitution: {CONSTITUTION}")
print(f"Output: {LOG_PATH}")

train_dpo.main(config)

tinker_cookbook.checkpoint_utils:21 [[92mINFO[0m] No checkpoints found at /tmp/character-training/qwen-3-4b-it-sarcasm-2025-11-30-22-37/checkpoints.jsonl
tinker_cookbook.checkpoint_utils:52 [[92mINFO[0m] No checkpoints found with key state_path in /tmp/character-training/qwen-3-4b-it-sarcasm-2025-11-30-22-37


Starting DPO training...
Model: qwen-3-4b-it
Constitution: sarcasm
Output: /tmp/character-training/qwen-3-4b-it-sarcasm-2025-11-30-22-37


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[1;36mConfiguration:[0m
  log_path: [32m'/tmp/character-training/qwen-3-4b-it-sarcasm-2025-11-30-22-37'[0m
  model_name: [32m'Qwen/Qwen3-4B-Instruct-2507'[0m
  dataset_builder: [1m{[0m[32m'common_config'[0m: [1m{[0m[32m'model_name_for_tokenizer'[0m: [32m'Qwen/Qwen3-4B-Instruct-2507'[0m, [32m'renderer_name'[0m: 
[32m'qwen3_instruct'[0m, [32m'max_length'[0m: [1;36m2[0m [33m...[0m [32m'swap'[0m: [3;91mFalse[0m, [32m'train_path'[0m: 
[32m'/Users/jchlapowski/OpenCharacterTraining/data/dpo/qwen-3-4b-it/sarcasm.jsonl'[0m, [32m'test_path'[0m: [3;35mNone[0m[1m}[0m[1m}[0m
  load_checkpoint_path: [3;35mNone[0m
  learning_rate: [1;36m5e-05[0m
  lr_schedule: [32m'linear'[0m
  num_epochs: [1;36m1[0m
  dpo_beta: [1;36m0.1[0m
  lora_rank: [1;36m64[0m
  num_replicas: [1;36m8[0m
  base_url: [3;35mNone[0m
  evaluator_builders: [1m[[0m[1m][0m
  infrequent_evaluator_builders: [1m[[0m[1m][0m
 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 0                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m0.406250      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m-0.048111     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017483.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.719512      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 1                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m0.932183      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017486.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.318702      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 2                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m2.857076      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017489.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.069308      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 3                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m5.169744      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017492.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.019348      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 4                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m7.336403      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017495.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.025750      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 5                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m8.720389      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017498.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.016052      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 6                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m11.632233     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017501.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.038207      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 7                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m11.774346     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017504.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.016847      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 8                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m13.248428     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017507.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.004410      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 9                       [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m17.979261     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017510.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000389      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 10                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m17.982708     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017513.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.020955      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 11                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m22.189520     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017516.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000654      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 12                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m22.000698     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017519.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.013710      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 13                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m21.495260     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017522.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.023522      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 14                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m21.408615     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017532.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.012168      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 15                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m26.780005     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017537.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000081      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 16                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m21.769798     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017540.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000734      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 17                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m22.808504     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017543.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.002494      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 18                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m27.831528     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017546.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.009796      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 19                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m25.251339     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017549.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000075      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 20                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m32.675293     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017552.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.004065      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 21                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m26.420551     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017556.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.022096      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 22                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m29.364922     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017559.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.007999      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 23                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m23.193169     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017562.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.007789      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 24                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m29.879826     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017571.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000014      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                                                 Step 25                                                 [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue                                                               [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 26                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m29.336891     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017582.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.004890      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 27                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m29.820160     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017585.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.009071      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 28                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m28.962234     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017588.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000962      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 29                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m27.076317     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017591.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.006260      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 30                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m29.048725     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017595.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000244      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 31                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m26.067287     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017598.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.008887      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 32                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m32.978218     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017601.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000018      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 33                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m26.734726     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017604.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.009849      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 34                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.032963     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017607.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000038      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 35                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.765064     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017610.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000623      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 36                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m29.490715     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017613.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.001061      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 37                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m25.881157     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017616.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.007140      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 38                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.417301     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017619.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000133      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 39                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m32.787441     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017630.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000001      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 40                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m32.847004     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017634.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000196      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 41                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m30.730122     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017637.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.003544      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 42                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.820732     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017640.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000778      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 43                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m30.048985     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017643.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000318      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 44                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m29.417330     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017647.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.006010      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 45                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.276907     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017653.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 46                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m32.245476     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017664.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.001482      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 47                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m35.037262     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017668.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000002      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 48                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m34.498573     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017671.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000699      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 49                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m28.274742     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017674.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.002100      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                                                 Step 50                                                 [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue                                                               [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 51                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m35.582153     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017683.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000002      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 52                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m29.369759     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017686.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.004144      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 53                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m27.194370     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017689.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000005      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 54                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m32.026730     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017692.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000500      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 55                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m30.212246     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017695.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 56                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.792282     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017698.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000023      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 57                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m29.973793     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017701.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000098      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 58                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m30.995678     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017704.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000961      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 59                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.427521     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017707.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.001288      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 60                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m35.154369     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017710.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000086      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 61                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m36.406246     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017713.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 62                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.719322     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017716.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000020      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 63                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m30.078026     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017719.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000627      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 64                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m35.630920     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017722.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000002      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 65                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m35.951874     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017725.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000384      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 66                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.850595     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017728.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000048      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 67                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m34.664101     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017731.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.001412      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 68                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m30.310198     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017734.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 69                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.750599     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017737.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000007      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 70                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m36.028114     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017740.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.001076      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 71                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.551073     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017744.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000976      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 72                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m32.249134     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017748.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000781      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 73                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m30.345135     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017759.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.003544      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 74                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.708008     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017763.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000002      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                                                 Step 75                                                 [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue                                                               [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 76                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.190430     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017771.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000005      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 77                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m35.337646     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017774.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000253      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 78                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m34.491749     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017778.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000908      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 79                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m34.020142     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017781.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000830      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 80                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.027222     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017784.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000675      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 81                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.389946     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017787.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000002      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 82                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.768929     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017790.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 83                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.498356     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017793.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000103      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 84                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m38.233864     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017796.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000016      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 85                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m32.886662     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017799.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000251      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 86                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.581291     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017802.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000016      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 87                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m38.838898     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017805.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 88                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m35.840069     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017808.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 89                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m39.021160     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017811.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 90                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.771183     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017814.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.002211      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 91                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m32.986797     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017817.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.003250      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 92                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m34.127251     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017820.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 93                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m37.365669     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017823.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.003247      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 94                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m37.382244     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017826.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 95                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m30.233461     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017829.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000084      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 96                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m36.441765     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017832.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 97                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m34.902798     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017835.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000001      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 98                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.155388     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017838.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000012      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                      Step 99                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m34.997353     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017841.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000006      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                                                Step 100                                                 [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue                                                               [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                     Step 101                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m33.609638     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017849.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000810      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                     Step 102                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m39.511055     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017852.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.004104      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                     Step 103                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m39.753181     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017855.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                     Step 104                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m36.609131     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017858.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000081      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                     Step 105                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m36.320526     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017861.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000000      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                     Step 106                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m31.502396     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017864.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000501      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                     Step 107                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m34.044384     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017867.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.000006      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                     Step 108                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m37.262188     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017871.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.004818      [0m[32m [0m‚îÇ
‚îÇ[36m 

tinker_cookbook.utils.ml_log:195 [[92mINFO[0m] 
[3m                     Step 109                      [0m
‚îè‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î≥‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îì
‚îÉ[1;35m [0m[1;35mMetric                        [0m[1;35m [0m‚îÉ[1;35m [0m[1;35mValue         [0m[1;35m [0m‚îÉ
‚î°‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚ïá‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î©
‚îÇ[36m [0m[36maccuracy                      [0m[36m [0m‚îÇ[32m [0m[32m1.000000      [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mchosen_reward                 [0m[36m [0m‚îÇ[32m [0m[32m42.031021     [0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mclock_cycle:unique            [0m[36m [0m‚îÇ[32m [0m[32m5017882.000000[0m[32m [0m‚îÇ
‚îÇ[36m [0m[36mdpo_loss                      [0m[36m [0m‚îÇ[32m [0m[32m0.005878      [0m[32m [0m‚îÇ
‚îÇ[36m 

## Post-Training

After training completes, the model weights are saved at:
- `{LOG_PATH}/checkpoints/final/weights/` - Final LoRA weights
- `{LOG_PATH}/checkpoints/final/state/` - Training state (optimizer, etc.)

You can use these weights with the tinker sampling client:
```python
model_path = f"tinker://{run_id}/weights/final"
sampling_client = service_client.create_sampling_client(model_path=model_path)
```

In [None]:
# Verify training completed and get model path for sampling
checkpoint_file = os.path.join(LOG_PATH, "checkpoints.jsonl")
if os.path.exists(checkpoint_file):
    print("Training completed successfully!")
    print(f"Checkpoints saved to: {LOG_PATH}")
    
    # Read checkpoints and get the final model path
    checkpoints = []
    with open(checkpoint_file, "r") as f:
        for line in f:
            checkpoints.append(json.loads(line))
    
    # Get the last checkpoint with state_path (the final trained model)
    final_checkpoint = None
    for cp in reversed(checkpoints):
        if "state_path" in cp:
            final_checkpoint = cp
            break
    
    if final_checkpoint:
        print(f"\nFinal checkpoint: {final_checkpoint['name']}")
        print(f"Model path for sampling: {final_checkpoint['state_path']}")
        
        # Store for easy access
        MODEL_PATH_FOR_SAMPLING = final_checkpoint['state_path']
        print(f"\nTo sample from this model:")
        print(f"  sampling_client = service_client.create_sampling_client(model_path='{MODEL_PATH_FOR_SAMPLING}')")
else:
    print("No checkpoints found. Training may have failed or not started.")

## Train All Constitutions

To train on all constitutions, you can use the following loop:

In [None]:
# Uncomment to train all constitutions
# from character.utils import constitutions
#
# for constitution in constitutions:
#     print(f"\n{'='*50}")
#     print(f"Training {MODEL} on {constitution}")
#     print(f"{'='*50}")
#     
#     train_path = f"{DATA_PATH}/dpo/{MODEL}/{constitution}.jsonl"
#     comparison_builder = CharacterDPODataBuilder(train_path=train_path)
#     
#     dataset_builder = DPODatasetBuilderFromComparisons(
#         common_config=common_config,
#         comparison_builder=comparison_builder,
#     )
#     
#     log_path = f"/tmp/character-training/{MODEL}-{constitution}"
#     
#     config = train_dpo.Config(
#         log_path=log_path,
#         model_name=model_name,
#         dataset_builder=dataset_builder,
#         learning_rate=LEARNING_RATE,
#         dpo_beta=DPO_BETA,
#         lora_rank=LORA_RANK,
#     )
#     
#     train_dpo.main(config)