In [None]:
from huggingface_hub import login
from getpass import getpass

login(token=getpass("🔑 Enter your HF write token: "))

🔑 Enter your HF write token: ··········


In [2]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    # If you're not in Colab, just use pip install or uv pip install
    !pip install unsloth vllm
else:
    pass # For Colab / Kaggle, we need extra instructions hidden below \/

In [3]:
#@title Colab Extra Install { display-mode: "form" }
%%capture
import os
!pip install --upgrade -qqq uv
if "COLAB_" not in "".join(os.environ.keys()):
    # If you're not in Colab, just use pip install!
    !pip install unsloth vllm
else:
    try: import numpy; get_numpy = f"numpy=={numpy.__version__}"
    except: get_numpy = "numpy"
    try: import subprocess; is_t4 = "Tesla T4" in str(subprocess.check_output(["nvidia-smi"]))
    except: is_t4 = False
    get_vllm, get_triton = ("vllm==0.10.1", "triton==3.2.0") if is_t4 else ("vllm", "triton")
    !uv pip install -qqq --upgrade \
        unsloth {get_vllm} {get_numpy} torchvision bitsandbytes xformers transformers
    !uv pip install -qqq {get_triton}

In [4]:
import re
import numpy as np
import torch

_ANSWER_RE = re.compile(
    r"<answer>\s*(.*?)\s*</answer>", flags=re.DOTALL | re.IGNORECASE
)
_USER_SPAN_RE = re.compile(
    r"<\|user\|>\s*(.*?)\s*</s>", flags=re.DOTALL | re.IGNORECASE
)


def _extract_text_between(s: str, pattern: re.Pattern, fallback: str = "") -> str:
    m = pattern.search(s or "")
    return m.group(1).strip() if m else (fallback or "").strip()


def _extract_utterance_from_prompt(prompt_text: str) -> str:
    """Pull just the user utterance from your chat template."""
    text = _extract_text_between(prompt_text, _USER_SPAN_RE, fallback=prompt_text)
    # strip any residual tags
    return re.sub(r"</?[^>]+>", "", text).strip()


def _extract_answer_text(reply_text: str) -> str:
    """If XML is present, score only the <answer>…</answer> body."""
    ans = _extract_text_between(reply_text or "", _ANSWER_RE, fallback=reply_text or "")
    return ans.strip()


def _flatten_completions(completions) -> list[str]:
    """Handle various TRL completion shapes and return list[str]."""
    out = []
    for c in completions or []:
        if isinstance(c, str):
            out.append(c)
        elif isinstance(c, dict) and "content" in c:
            out.append(c["content"])
        elif isinstance(c, (list, tuple)) and len(c) > 0:
            first = c[0]
            if isinstance(first, dict) and "content" in first:
                out.append(first["content"])
            elif (
                isinstance(first, (list, tuple))
                and len(first) > 0
                and isinstance(first[0], dict)
                and "content" in first[0]
            ):
                out.append(first[0]["content"])
            else:
                out.append(str(c))
        else:
            out.append("")
    return out


def _batch_calibrate(raw_scores: np.ndarray, temperature: float = 0.6) -> np.ndarray:
    raw_scores = np.asarray(raw_scores, dtype=float)
    if raw_scores.size == 0:
        return raw_scores
    raw_scores = np.nan_to_num(raw_scores, nan=0.0, posinf=1.0, neginf=0.0)
    mu, sigma = raw_scores.mean(), raw_scores.std()
    if sigma < 1e-6:
        # avoid divide-by-zero; simple sigmoid on centered scores
        return np.clip(1.0 / (1.0 + np.exp(-(raw_scores - mu))), 0.0, 1.0)
    z = (raw_scores - mu) / sigma
    t = max(1e-4, float(temperature))
    return 1.0 / (1.0 + np.exp(-z / t))


from sentence_transformers import CrossEncoder

_ce = CrossEncoder(
    "cross-encoder/stsb-roberta-large",
    device="cuda" if torch.cuda.is_available() else "cpu",
)


def semantic_sts_reward(prompts, completions, **kwargs) -> list[float]:
    """
    Reward = calibrated semantic similarity between:
      source = user utterance extracted from the prompt
      reply  = model's <answer> text (or full reply if no XML)
    Returns floats in [0,1].
    """
    user_msgs = [p[-1]["content"] for p in prompts]  # last msg is your user scaffold
    sources = [_extract_utterance_from_prompt(m) for m in user_msgs]

    reply_texts = _flatten_completions(completions)
    replies = [_extract_answer_text(t) for t in reply_texts]

    pairs = []
    for s, r in zip(sources, replies):
        s = (s or "").strip()
        r = (r or "").strip()
        pairs.append((s if s else "x", r if r else "x"))

    try:
        raw = np.array(_ce.predict(pairs, batch_size=64), dtype=float)
    except Exception:
        raw = np.zeros(len(pairs), dtype=float)

    raw = np.nan_to_num(raw, nan=0.0, posinf=1.0, neginf=0.0)
    if raw.size and raw.max() > 1.25:
        raw = raw / 5.0
    raw = np.clip(raw, 0.0, 1.0)

    cal = _batch_calibrate(raw, temperature=0.6)
    return cal.tolist()


from transformers import AutoTokenizer, AutoModelForSequenceClassification

_EMPATHY_REPO = "miladsolo/roberta-lora-wassa-empathy"
_device = "cuda" if torch.cuda.is_available() else "cpu"
_tok = AutoTokenizer.from_pretrained(_EMPATHY_REPO)
_cls = AutoModelForSequenceClassification.from_pretrained(_EMPATHY_REPO)
_cls.eval().to(_device)


def predict(texts, max_len=256):
    enc = _tok(
        texts, padding=True, truncation=True, max_length=max_len, return_tensors="pt"
    )
    enc = {k: v.to(_device) for k, v in enc.items()}
    with torch.no_grad():
        logits = _cls(**enc).logits  # [B, 3] -> [Emotion, EmotionalPolarity, Empathy]
    arr = logits.detach().cpu().numpy()
    return [
        {
            "Emotion": float(a[0]),
            "EmotionalPolarity": float(a[1]),
            "Empathy": float(a[2]),
        }
        for a in arr
    ]


def empathy_model_reward(prompts=None, completions=None, **kwargs) -> list[float]:
    """
    Reward = model-predicted Empathy logit for the assistant's reply (higher is better).
    Uses miladsolo/roberta-lora-wassa-empathy via `predict()`. Calibrated to [0,1].
    """
    reply_texts = _flatten_completions(completions or [])
    answers = [_extract_answer_text(t) for t in reply_texts]
    safe_inputs = [a if a else " " for a in answers]

    preds = predict(safe_inputs)
    raw = np.array([p.get("Empathy", 0.0) for p in preds], dtype=float)

    cal = _batch_calibrate(raw, temperature=0.6)
    return np.clip(cal, 0.0, 1.0).tolist()


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

README.md: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.22k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/892 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

In [None]:
# ================== GRPO with Unsloth on WASSA (Empathy) + Random Rewards ==================
# Installs (uncomment if needed)
# !pip -q install unsloth vllm
# !pip -q install triton==3.1.0
# !pip -q install -U pynvml

import random
import re
from typing import Optional, Sequence

from datasets import load_dataset, Dataset, DatasetDict, concatenate_datasets
from unsloth import FastLanguageModel
from trl import GRPOConfig, GRPOTrainer

import torch

# ---------------- Model & Tokenizer ----------------
max_seq_length = 1024  # Can increase for longer reasoning traces
lora_rank = 32         # Larger rank = smarter, but slower

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "meta-llama/meta-Llama-3.1-8B-Instruct",
    max_seq_length = max_seq_length,
    load_in_4bit = True,     # False for LoRA 16bit
    fast_inference = True,   # Enable vLLM fast inference (kept as in your working setup)
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.6,  # Reduce if out of memory
)

model = FastLanguageModel.get_peft_model(
    model,
    r = lora_rank,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],  # Remove QKVO if out of memory
    lora_alpha = lora_rank,
    use_gradient_checkpointing = "unsloth",  # Enable long context finetuning
    random_state = 3407,
)

# from transformers import GenerationConfig
# model.generation_config = GenerationConfig(max_length=1024)

# ---------------- System Prompt & Formatting ----------------
SYSTEM_PROMPT = """
<|system|>
You are a friendly, trauma-informed assistant. You receive prior dialogue turns marked with <|user|> and <|assistant|>.
Continue the conversation as <|assistant|> with an empathetic, concise reply that:
- reflects the user’s experience in new words,
- validates feelings,
- optionally asks ONE gentle, forward-moving question,
- stays within 1–2 sentences (≤ 55 words),
- avoids emojis, lists, quotes, and clinical/lecturing tone,
- maintains continuity and never repeats the prompt.

Output EXACTLY this XML wrapper:

<reasoning>
- Briefly identify the user’s main concern.
- Name the likely emotion and a rough 0–5 intensity.
- Plan one supportive move (reflection / validation / next step).
</reasoning>
<answer>
(Your final 1–2 sentence reply here.)
</answer>
</s>
""".strip()


# ---------------- Dataset Prep: miladsolo/wassa-conv-turn-empathy ----------------
_TEXT_COL_CANDIDATES = ["utterance", "Utterance", "text", "Text", "content", "message", "sentence", "prompt"]

def _pick_text_col(cols: Sequence[str]) -> str:
    for c in _TEXT_COL_CANDIDATES:
        if c in cols:
            return c
    # Fallback to first column if none match (unlikely)
    return cols[0]

def _to_int_0_5(x) -> Optional[int]:
    try:
        v = float(x)
    except Exception:
        return None
    v = round(v)
    v = max(0, min(5, int(v)))
    return v

def _mk_instruction(utterance: str) -> str:
    return (
        "Here is the dialogue so far. Continue as <|assistant|>.\n\n"
        f"<|user|>\n{utterance}\n</s>\n"
    )

def load_wassa_empathy(split: Optional[str] = None) -> Dataset:
    raw = load_dataset("miladsolo/wassa-conv-turn-empathy") if split is None else load_dataset("miladsolo/wassa-conv-turn-empathy", split=split)
    ds = concatenate_datasets([raw[k] for k in raw.keys()]) if isinstance(raw, DatasetDict) else raw

    cols = ds.column_names
    text_col = _pick_text_col(cols)

    def _map(ex):
        text = (ex.get(text_col) or "").strip()
        if not text:
            return {"prompt": None, "answer": None}
        y = _to_int_0_5(ex.get("Empathy", None))
        if y is None:
            return {"prompt": None, "answer": None}
        user_msg = _mk_instruction(text)
        return {
            "prompt": [
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user",   "content": user_msg},
            ],
            "answer": str(y),  # string int 0..5 (not used by random rewards, but keeps structure)
        }

    ds = ds.map(_map, remove_columns=[c for c in cols if c not in (text_col, "Empathy")])
    ds = ds.filter(lambda ex: isinstance(ex["prompt"], list) and isinstance(ex["answer"], str) and len(ex["answer"]) > 0)

    # Quick preview
    for i in range(min(3, len(ds))):
        ex = ds[i]
        print(f"\n--- sample {i} ---")
        print("Q:", ex["prompt"][-1]["content"][:300])
        print("A:", ex["answer"])
    print(f"\nPrepared {len(ds)} WASSA empathy examples.")
    return ds

dataset = load_wassa_empathy()

# ---------------- Dummy Reward Functions (Random) ----------------
def random_reward_1(completions, **kwargs) -> list[float]:
    """Returns a random reward in [0,1) per completion."""
    return [random.random() for _ in completions]

def random_reward_2(prompts=None, completions=None, answer=None, **kwargs) -> list[float]:
    """Returns a random reward in [0,1) per completion (alt signature)."""
    return [random.random() for _ in completions]

# ---------------- GRPO Config & Trainer ----------------
max_prompt_length = 256

training_args = GRPOConfig(
    learning_rate = 5e-6,
    adam_beta1 = 0.9,
    adam_beta2 = 0.99,
    weight_decay = 0.1,
    warmup_ratio = 0.1,
    lr_scheduler_type = "cosine",
    optim = "paged_adamw_8bit",
    logging_steps = 1,
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 1,   # Increase to 4 for smoother training
    num_generations = 6,               # Decrease if out of memory
    max_prompt_length = max_prompt_length,
    max_completion_length = max_seq_length - max_prompt_length,
    max_steps = 250,
    save_steps = 250,
    max_grad_norm = 0.1,
    report_to = "none",
    output_dir = "outputs",
    # Note: we do NOT enable vLLM inside GRPO here; fast_inference=True is for later sampling.
)

trainer = GRPOTrainer(
    model = model,
    processing_class = tokenizer,
    reward_funcs = [
        semantic_sts_reward,
        empathy_model_reward,
    ],
    args = training_args,
    train_dataset = dataset,
)

trainer.train()

# ---------------- vLLM Inference (before/after saving LoRA) ----------------
# Build a quick prompt WITHOUT system prompt (free-form)
plain_text = tokenizer.apply_chat_template(
    [{"role": "user", "content": "Calculate pi."}],
    tokenize = False,
    add_generation_prompt = True,
)

from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature = 0.8,
    top_p = 0.95,
    max_tokens = 1024,
)

# Generate without LoRA request (base+current LoRA in memory)
out1 = model.fast_generate(
    [plain_text],
    sampling_params = sampling_params,
    lora_request = None,
)[0].outputs[0].text
print("\n--- SAMPLE (no explicit LoRA request) ---\n", out1[:500])

# Save LoRA
model.save_lora("grpo_saved_lora")

# Build prompt WITH the SYSTEM_PROMPT enforcing XML format
xml_text = tokenizer.apply_chat_template(
    [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user",   "content": "Calculate pi."},
    ],
    tokenize = False,
    add_generation_prompt = True,
)

# Load LoRA and generate
lora_req = model.load_lora("grpo_saved_lora")
out2 = model.fast_generate(
    [xml_text],
    sampling_params = sampling_params,
    lora_request = lora_req,
)[0].outputs[0].text
print("\n--- SAMPLE (with saved LoRA) ---\n", out2[:500])



Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 08-29 16:19:06 [__init__.py:241] Automatically detected platform cuda.
ERROR 08-29 16:19:08 [fa_utils.py:57] Cannot use FA version 2 is not supported due to FA2 is only supported on devices with compute capability >= 8
🦥 Unsloth Zoo will now patch everything to make training faster!
Unsloth: Patching vLLM v1 graph capture
Unsloth: Patching vLLM v0 graph capture
==((====))==  Unsloth 2025.8.10: Fast Llama patching. Transformers: 4.55.4. vLLM: 0.10.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.31. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/meta-llama-3.1-8b-instruct-unsloth-bnb-4bit with actual GPU uti

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

INFO 08-29 16:19:59 [cuda.py:384] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 08-29 16:19:59 [cuda.py:433] Using XFormers backend.
INFO 08-29 16:20:01 [parallel_state.py:1134] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
INFO 08-29 16:20:01 [model_runner.py:1080] Starting to load model unsloth/meta-llama-3.1-8b-instruct-unsloth-bnb-4bit...
INFO 08-29 16:20:02 [bitsandbytes_loader.py:742] Loading weights with BitsAndBytes quantization. May take a while ...
INFO 08-29 16:20:02 [weight_utils.py:296] Using model weights format ['*.safetensors']


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

INFO 08-29 16:28:20 [weight_utils.py:312] Time spent downloading weights for unsloth/meta-llama-3.1-8b-instruct-unsloth-bnb-4bit: 498.038262 seconds
INFO 08-29 16:28:21 [weight_utils.py:349] No model.safetensors.index.json found in remote.


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 08-29 16:28:53 [punica_selector.py:19] Using PunicaWrapperGPU.
INFO 08-29 16:28:55 [model_runner.py:1112] Model loading took 5.7659 GiB and 532.321270 seconds
INFO 08-29 16:29:07 [worker.py:295] Memory profiling takes 11.34 seconds
INFO 08-29 16:29:07 [worker.py:295] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.52) = 7.67GiB
INFO 08-29 16:29:07 [worker.py:295] model weights take 5.77GiB; non_torch_memory takes 0.03GiB; PyTorch activation peak memory takes 0.60GiB; the rest of the memory reserved for KV Cache is 1.28GiB.
INFO 08-29 16:29:07 [executor_base.py:114] # cuda blocks: 655, # CPU blocks: 0
INFO 08-29 16:29:07 [executor_base.py:119] Maximum concurrency for 1024 tokens per request: 10.23x
INFO 08-29 16:29:07 [vllm_utils.py:676] Unsloth: Running patched vLLM v0 `capture_model`.
INFO 08-29 16:29:07 [model_runner.py:1383] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run th

Capturing CUDA graph shapes:   0%|          | 0/19 [00:00<?, ?it/s]

INFO 08-29 16:29:30 [model_runner.py:1535] Graph capturing finished in 23 secs, took 0.48 GiB
INFO 08-29 16:29:30 [vllm_utils.py:683] Unsloth: Patched vLLM v0 graph capture finished in 23 secs.
INFO 08-29 16:29:31 [llm_engine.py:417] init engine (profile, create kv cache, warmup model) took 36.45 seconds
INFO 08-29 16:29:31 [llm.py:298] Supported_tasks: ['generate']
Unsloth: Just some info: will skip parsing ['q_norm', 'k_norm', 'pre_feedforward_layernorm', 'post_feedforward_layernorm']
Unsloth: Just some info: will skip parsing ['q_norm', 'k_norm', 'pre_feedforward_layernorm', 'post_feedforward_layernorm']


tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Unsloth 2025.8.10 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


README.md:   0%|          | 0.00/395 [00:00<?, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/795k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/12156 [00:00<?, ? examples/s]

Map:   0%|          | 0/12156 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12156 [00:00<?, ? examples/s]


--- sample 0 ---
Q: Here is the dialogue so far. Continue as <|assistant|>.

<|user|>
what did you think about this article
</s>

A: 1

--- sample 1 ---
Q: Here is the dialogue so far. Continue as <|assistant|>.

<|user|>
It's definitely really sad to read, considering everything they're all going through. What did you think?
</s>

A: 4

--- sample 2 ---
Q: Here is the dialogue so far. Continue as <|assistant|>.

<|user|>
I think it's super sad... they seem to never catch a break, always struggling.
</s>

A: 5

Prepared 12156 WASSA empathy examples.
Unsloth: We now expect `per_device_train_batch_size` to be a multiple of `num_generations`.
We will change the batch size of 1 to the `num_generations` of 6


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 12,156 | Num Epochs = 1 | Total steps = 250
O^O/ \_/ \    Batch size per device = 6 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (6 x 1 x 1) = 6
 "-____-"     Trainable parameters = 83,886,080 of 8,114,147,328 (1.03% trained)


Step,Training Loss,reward,reward_std,completions / mean_length,completions / min_length,completions / max_length,completions / clipped_ratio,completions / mean_terminated_length,completions / min_terminated_length,completions / max_terminated_length,kl,entropy,rewards / semantic_sts_reward / mean,rewards / semantic_sts_reward / std,rewards / empathy_model_reward / mean,rewards / empathy_model_reward / std
1,0.0,0.976797,0.575817,103.5,87.0,120.0,0.0,103.5,87.0,120.0,0.0,0,0.480063,0.325916,0.496735,0.349244
2,-0.0,0.958889,0.68669,100.0,77.0,110.0,0.0,100.0,77.0,110.0,0.0,No Log,0.471085,0.346811,0.487804,0.343402
3,-0.0,1.012749,0.245199,87.833336,75.0,99.0,0.0,87.833336,75.0,99.0,1.3e-05,No Log,0.533504,0.311294,0.479246,0.327759
4,-0.0,0.976305,0.351012,102.5,80.0,116.0,0.0,102.5,80.0,116.0,1.4e-05,No Log,0.473227,0.320957,0.503078,0.322574
5,0.0,0.989296,0.20808,117.333336,107.0,132.0,0.0,117.333336,107.0,132.0,1.1e-05,No Log,0.460059,0.293016,0.529238,0.32461
6,0.0,1.045693,0.507361,91.833336,80.0,112.0,0.0,91.833336,80.0,112.0,1.1e-05,No Log,0.528642,0.303813,0.517051,0.333693
7,0.0,0.972823,0.438759,91.5,70.0,102.0,0.0,91.5,70.0,102.0,1.3e-05,No Log,0.4781,0.331844,0.494723,0.322051
8,0.0,1.001689,0.427169,97.0,79.0,112.0,0.0,97.0,79.0,112.0,1.1e-05,No Log,0.503794,0.341616,0.497895,0.337751
9,0.0,0.986113,0.47254,96.0,88.0,102.0,0.0,96.0,88.0,102.0,1.1e-05,No Log,0.46942,0.287395,0.516693,0.353281
10,0.0,0.977211,0.430089,83.833336,70.0,106.0,0.0,83.833336,70.0,106.0,1.5e-05,No Log,0.504965,0.31637,0.472247,0.32462


Unsloth: Will smartly offload gradients to save VRAM!
