In [2]:
import os, platform, pathlib, torch, random, numpy as np

# --- Windows-safe env flags ---
if platform.system() == "Windows":
    os.environ["HF_DATASETS_DISABLE_MULTIPROCESSING"] = "1"  # –±–µ–∑ —Å–∞–±–ø—Ä–æ—Ü–µ—Å—Å–æ–≤ –≤ datasets
    os.environ["TOKENIZERS_PARALLELISM"] = "false"           # –±–µ–∑ –ø–∞—Ä–∞–ª–ª–µ–ª–∏–∑–º–∞ —Ç–æ–∫–µ–Ω–∞–π–∑–µ—Ä–∞
    os.environ["UNSLOTH_COMPILE_DISABLE"] = "1"              # <--- –û–¢–ö–õ–Æ–ß–ò–¢–¨ torch.compile –≤ Unsloth
    os.environ["TORCHINDUCTOR_CACHE_DIR"] = r"C:\ti_cache"   # –∫–æ—Ä–æ—Ç–∫–∏–π –ø—É—Ç—å –¥–ª—è inductor
    os.environ["TRITON_CACHE_DIR"]       = r"C:\triton_cache"
    pathlib.Path(r"C:\ti_cache").mkdir(parents=True, exist_ok=True)
    pathlib.Path(r"C:\triton_cache").mkdir(parents=True, exist_ok=True)

# Paths (we'll pick the first that exists)
CANDIDATE_CSVS = [
    "/mnt/data/train_raw.csv",
    "../datasets/train_raw.csv",
    "train_raw.csv",
]
DATA_CSV = next((p for p in CANDIDATE_CSVS if pathlib.Path(p).exists()), CANDIDATE_CSVS[-1])

# Output
OUTPUT_DIR = "gptoss20b_ner_bio_unsloth"
TRAIN_JSONL = "train.jsonl"
VAL_JSONL   = "val.jsonl"

# Training knobs
VAL_FRAC = 0.10
MAX_LEN  = 1024     # –µ—Å–ª–∏ –±—É–¥–µ—Ç OOM, —Å–Ω–∏–∑—å –¥–æ 768/512
RANK     = 8
PACKING  = False    # –Ω–∞ Win —Å–Ω–∞—á–∞–ª–∞ False
GRAD_ACC = 16
FORCE_REGEN = False

SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

print("DATA_CSV =", DATA_CSV)
print("bf16 supported:", torch.cuda.is_bf16_supported())



DATA_CSV = ../datasets/train_raw.csv
bf16 supported: True


In [4]:
import ast, json, math
import pandas as pd
from typing import List, Tuple

def parse_ann_cell(cell) -> List[Tuple[int,int,str]]:
    """annotation: list[(start, end, tag)] ‚Äî safe parse"""
    if isinstance(cell, (list, tuple)):
        ann = cell
    else:
        ann = ast.literal_eval(str(cell))
    out = []
    for t in ann:
        if not (isinstance(t, (list, tuple)) and len(t) == 3):
            raise ValueError(f"Not a 3-tuple: {t}")
        s, e, tag = t
        out.append((int(s), int(e), str(tag)))
    return out

def word_spans_exact_spaces(text: str) -> List[Tuple[int,int]]:
    """Split by the exact space ' ' only (keeps indices)."""
    spans = []
    i, n = 0, len(text)
    while i < n:
        while i < n and text[i] == ' ':
            i += 1
        if i >= n: break
        j = i
        while j < n and text[j] != ' ':
            j += 1
        spans.append((i, j))  # [i, j)
        i = j
    return spans

def make_jsonl_if_needed(csv_path: str, train_path: str, val_path: str, val_frac: float, force: bool=False):
    if (not force) and pathlib.Path(train_path).exists() and pathlib.Path(val_path).exists():
        print("JSONL already exist; skipping regen. Set FORCE_REGEN=True to rebuild.")
        return

    print("Reading CSV:", csv_path)
    df = pd.read_csv(csv_path, sep=None, engine="python").dropna(subset=["sample","annotation"]).reset_index(drop=True)
    df.columns = df.columns.str.strip()

    rows = []
    bad_parse = 0
    bad_len_or_span = 0

    for _, r in df.iterrows():
        text = str(r["sample"])
        try:
            ann = parse_ann_cell(r["annotation"])
        except Exception:
            bad_parse += 1
            continue

        spans = word_spans_exact_spaces(text)
        if len(ann) != len(spans):
            bad_len_or_span += 1
            continue

        ok = all((s == ws and e == we) for (s, e, _), (ws, we) in zip(ann, spans))
        if not ok:
            bad_len_or_span += 1
            continue

        tags_str = " ".join(tag for _, _, tag in ann)
        rows.append({"sample": text, "tags": tags_str, "ann_tuples": ann})

    df2 = pd.DataFrame(rows)
    print(f"kept {len(df2)} rows; dropped parse_errors={bad_parse}, span/len_mismatches={bad_len_or_span}")
    if len(df2) == 0:
        raise RuntimeError("No valid rows after validation ‚Äî check indices/spaces.")

    # Entity label names (without 'O')
    entity_labels = sorted({
        tag.split("-", 1)[-1]
        for ann in df2["ann_tuples"]
        for (_, _, tag) in ann
        if tag != "O"
    })
    print("entity_labels:", entity_labels)

    # Train/val split (safe)
    if len(df2) == 1:
        train_df, val_df = df2.copy(), df2.iloc[[]].copy()
    else:
        val_n = max(1, int(round(len(df2) * val_frac)))
        val_n = min(val_n, len(df2) - 1)
        val_df = df2.sample(n=val_n, random_state=SEED)
        train_df = df2.drop(val_df.index).reset_index(drop=True)
        val_df = val_df.reset_index(drop=True)

    print(f"train={len(train_df)}, val={len(val_df)}")

    def to_messages(text: str, tags: str):
        n = len(tags.split())
        dev = {"role":"developer","content":(
            f"You are an NER tagger. Use BIO with labels: {', '.join(entity_labels)}. "
            f"Return exactly {n} tags separated by a single space ‚Äî one per whitespace-separated token. "
            "Allowed: O, B-<LABEL>, I-<LABEL>. Do not add or remove tokens."
        )}
        usr = {"role":"user","content": text}
        asst = {"role":"assistant","content": tags}
        return {"messages":[dev, usr, asst]}

    def dump_jsonl(df_in: pd.DataFrame, path: str):
        with open(path, "w", encoding="utf-8") as f:
            for _, r in df_in.iterrows():
                f.write(json.dumps(to_messages(r["sample"], r["tags"]), ensure_ascii=False) + "\n")

    dump_jsonl(train_df, train_path)
    dump_jsonl(val_df,   val_path)

    # Save entity labels for later reuse (inference)
    with open("entity_labels.json", "w", encoding="utf-8") as f:
        json.dump(entity_labels, f, ensure_ascii=False)

    print("Wrote:", train_path, "and", val_path)

make_jsonl_if_needed(DATA_CSV, TRAIN_JSONL, VAL_JSONL, VAL_FRAC, force=FORCE_REGEN)


Reading CSV: ../datasets/train_raw.csv
kept 27248 rows; dropped parse_errors=0, span/len_mismatches=3
entity_labels: ['BRAND', 'PERCENT', 'TYPE', 'VOLUME']
train=24523, val=2725
Wrote: train.jsonl and val.jsonl


In [3]:
from datasets import load_dataset, Features, Value
from transformers import AutoTokenizer

tok_for_render = AutoTokenizer.from_pretrained("openai/gpt-oss-20b", use_fast=True)

def map_row_to_text(ex):
    txt = tok_for_render.apply_chat_template(
        ex["messages"],
        add_generation_prompt=False,
        tokenize=False,
    )
    return {"text": txt}

features = Features({"text": Value("string")})
train_raw = load_dataset("json", data_files=TRAIN_JSONL, split="train")
val_raw   = load_dataset("json", data_files=VAL_JSONL,   split="train")

# Avoid stale cache; and don't use multiprocessing on Windows
train_ds = train_raw.map(map_row_to_text, remove_columns=train_raw.column_names,
                         features=features, load_from_cache_file=False)
val_ds   = val_raw.map(map_row_to_text,   remove_columns=val_raw.column_names,
                       features=features, load_from_cache_file=False)

print(train_ds.features)
print(train_ds[0]["text"][:300])


  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 24526/24526 [00:02<00:00, 11869.51 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2725/2725 [00:00<00:00, 11109.74 examples/s]

{'text': Value(dtype='string', id=None)}
<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.
Knowledge cutoff: 2024-06
Current date: 2025-09-27

Reasoning: medium

# Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>developer<|message|># Instructions

Yo





In [4]:
from unsloth import FastLanguageModel

# Use Unsloth's linearized gpt-oss 20B for QLoRA that fits ‚âà14 GB
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gpt-oss-20b",
    load_in_4bit = True,
    max_seq_length = MAX_LEN,
    dtype = None,            # auto (bf16 if supported)
    device_map = "auto",
)

# Insert LoRA into all linear layers (attn + experts)
model = FastLanguageModel.get_peft_model(
    model,
    r = RANK,
    lora_alpha = 2 * RANK,
    lora_dropout = 0.05,
    target_modules = "all-linear",
    bias = "none",
)

print("Model & tokenizer ready.")



Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel
Exception in thread Thread-5 (_readerthread):
Traceback (most recent call last):
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\site-packages\ipykernel\ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\subprocess.py", line 1515, in _readerthread
    buffer.append(fh.read())
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\codecs.py", line 322, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xad in position 7: invalid start byte


ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.


W0927 18:13:33.274000 25896 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


ü¶• Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.9.9: Fast Gpt_Oss patching. Transformers: 4.56.2.
   \\   /|    NVIDIA GeForce RTX 4070 Ti SUPER. Num GPUs = 1. Max memory: 15.992 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:06<00:00,  1.64s/it]
Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


Unsloth: Making `model.base_model.model.model` require gradients
Model & tokenizer ready.


In [5]:
from trl import SFTTrainer, SFTConfig
import torch

bf16_ok = torch.cuda.is_bf16_supported()

sft_args = SFTConfig(
    output_dir=OUTPUT_DIR,

    # —Ç–æ–∫–µ–Ω–∏–∑–∞—Ü–∏—è –∏ –±–∞—Ç—á–∏–Ω–≥
    max_seq_length=MAX_LEN,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=GRAD_ACC,
    packing=PACKING,

    # –æ–±—É—á–µ–Ω–∏–µ
    num_train_epochs=2,
    learning_rate=2e-4,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",

    # –ª–æ–≥/—Å–µ–π–≤—ã
    logging_steps=25,
    save_strategy="steps",
    save_steps=500,
    save_total_limit=2,
    report_to="none",

    # precision
    bf16=bf16_ok,
    fp16=not bf16_ok,

    # –∫—Ä–∏—Ç–∏—á–Ω–æ –¥–ª—è Windows
    dataset_num_proc=1,
)

trainer = SFTTrainer(
    model=model,
    args=sft_args,
    train_dataset=train_ds,
    # –ø—Ä–∏ –∂–µ–ª–∞–Ω–∏–∏ –¥–æ–±–∞–≤—å –≤–∞–ª–∏–¥–∞—Ü–∏—é:
    # eval_dataset=val_ds,  # –∏ —Ç–æ–≥–¥–∞ –µ—â—ë sft_args.eval_strategy="steps"
    processing_class=tokenizer,
    dataset_text_field="text",
)

trainer.train()
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print("Saved adapter to:", OUTPUT_DIR)




The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998, 'pad_token_id': 200017}.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 24,526 | Num Epochs = 2 | Total steps = 3,066
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 16
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 16 x 1) = 16
 "-____-"     Trainable parameters = 3,981,312 of 20,918,738,496 (0.02% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
25,4.6203
50,1.0812
75,0.1534
100,0.0718
125,0.0648
150,0.0625
175,0.0629
200,0.0604
225,0.0618
250,0.0607


KeyboardInterrupt: 

In [8]:
import json

# Reload labels detected during data prep
# with open("entity_labels.json", "r", encoding="utf-8") as f:
#     ENTITY_LABELS = json.load(f)

def ner_predict(text: str, labels=None):
    labels = labels
    n = len(text.split(" "))
    dev = {"role":"developer","content":(
        f"You are an NER tagger. Use BIO with labels: {', '.join(labels)}. "
        f"Return exactly {n} tags separated by a single space ‚Äî one per whitespace-separated token. "
        "Allowed: O, B-<LABEL>, I-<LABEL>. Do not add or remove tokens."
    )}
    usr = {"role":"user","content": text}
    msgs = [dev, usr]

    inp = tokenizer.apply_chat_template(msgs, add_generation_prompt=True,
                                        tokenize=True, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(
            **inp,
            max_new_tokens=max(8, n*3),
            do_sample=False, temperature=0.0,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )
    resp = tokenizer.decode(out[0][inp["input_ids"].shape[1]:], skip_special_tokens=True).strip().splitlines()[0].strip()
    tags = resp.split()
    words = text.split(" ")
    if len(tags) < len(words): tags += ["O"] * (len(words) - len(tags))
    if len(tags) > len(words): tags = tags[:len(words)]
    return list(zip(words, tags))

# Quick smoke test:
ner_predict("–ö–æ–ª–∞ Zero 1.5 –ª–∏—Ç—Ä–∞", labels=['TYPE', 'BRAND', 'VOLUME', 'PERCENT'])


TypeError: peft.peft_model.PeftModelForCausalLM.generate() argument after ** must be a mapping, not Tensor

In [None]:
import pandas as pd
from seqeval.metrics import f1_score, classification_report

# Rebuild a tiny DataFrame from val_raw jsonl if needed:
val_texts, val_tags = [], []
import json as _json
with open(VAL_JSONL, "r", encoding="utf-8") as f:
    for line in f:
        obj = _json.loads(line)
        val_texts.append(obj["messages"][1]["content"])
        val_tags.append(obj["messages"][2]["content"])
val_frame = pd.DataFrame({"sample": val_texts, "tags": val_tags})

def quick_eval(val_df: pd.DataFrame, limit: int = 200):
    y_true, y_pred = [], []
    for _, r in val_df.head(limit).iterrows():
        gold = r["tags"].split()
        pred = [t for _, t in ner_predict(r["sample"])]
        y_true.append(gold); y_pred.append(pred)
    print("F1:", f1_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))

# quick_eval(val_frame, limit=200)


### –ò–∑ —á–µ–∫–ø–æ–∏–Ω—Ç–∞

In [1]:
import os, shutil, glob, pathlib, platform, torch

# 1) –ñ—ë—Å—Ç–∫–æ –æ—Ç–∫–ª—é—á–∞–µ–º –≤—Å—è–∫—É—é –∫–æ–º–ø–∏–ª—è—Ü–∏—é/–∞–≤—Ç–æ—Ç—é–Ω (–≤–∞–∂–Ω–æ —Å–¥–µ–ª–∞—Ç—å –î–û –∏–º–ø–æ—Ä—Ç–∞ unsloth)
os.environ["UNSLOTH_COMPILE_DISABLE"] = "1"          # –æ—Ç–∫–ª—é—á–∏—Ç—å torch.compile –≤–Ω—É—Ç—Ä–∏ Unsloth
os.environ["TORCHINDUCTOR_DISABLE"]   = "1"          # –æ—Ç–∫–ª—é—á–∏—Ç—å inductor —Ü–µ–ª–∏–∫–æ–º
os.environ["TRITON_AUTOTUNE"]         = "0"          # –æ—Ç–∫–ª—é—á–∏—Ç—å autotune –≤ Triton
os.environ["PYTORCH_TRITON_DISABLE_AUTOTUNE"] = "1"  # —Ç–æ –∂–µ

# (–æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ) –∫–æ—Ä–æ—Ç–∫–∏–µ –ø—É—Ç–∏ –∫–µ—à–µ–π, —á—Ç–æ–±—ã —Ç–æ—á–Ω–æ –Ω–µ —É–ø–µ—Ä–µ—Ç—å—Å—è –≤ —Å—Ç—Ä–∞–Ω–Ω–æ—Å—Ç–∏ –ø—É—Ç–µ–π Windows
if platform.system() == "Windows":
    os.environ.setdefault("TORCHINDUCTOR_CACHE_DIR", r"C:\ti_cache")
    os.environ.setdefault("TRITON_CACHE_DIR",       r"C:\triton_cache")
    pathlib.Path(os.environ["TORCHINDUCTOR_CACHE_DIR"]).mkdir(parents=True, exist_ok=True)
    pathlib.Path(os.environ["TRITON_CACHE_DIR"]).mkdir(parents=True, exist_ok=True)

# 2) –ß–∏—Å—Ç–∏–º –ø—Ä–µ–¥—ã–¥—É—â–∏–µ —Å–∫–æ–º–ø–∏–ª–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ –∞—Ä—Ç–µ—Ñ–∞–∫—Ç—ã (–≤–∞–∂–Ω–æ, –∏–Ω–∞—á–µ –ø–æ–¥–Ω–∏–º–µ—Ç—Å—è —Å—Ç–∞—Ä—ã–π compiled-–º–æ–¥—É–ª—å)
shutil.rmtree("unsloth_compiled_cache", ignore_errors=True)

# –ò–Ω–æ–≥–¥–∞ –ø–æ–ª–µ–∑–Ω–æ –ø–æ–¥—á–∏—Å—Ç–∏—Ç—å –≤—Ä–µ–º–µ–Ω–Ω—ã–µ –∫–µ—à–∏ torchinductor –∏–∑ %TEMP%
for p in glob.glob(str(pathlib.Path.home() / "AppData/Local/Temp/torchinductor_*")):
    shutil.rmtree(p, ignore_errors=True)

# 3) –ù–∞ –≤—Å—è–∫–∏–π —Å–ª—É—á–∞–π –æ—Ç–∫–ª—é—á–∏–º Dynamo "–Ω–∞ —É—Ä–æ–≤–Ω–µ –ø—Ä–æ—Ü–µ—Å—Å–∞"
try:
    import torch._dynamo as dynamo
    dynamo.reset()
    dynamo.disable()
except Exception:
    pass

print("Compile disabled & caches cleaned. CUDA bf16:", torch.cuda.is_available() and torch.cuda.is_bf16_supported())


Compile disabled & caches cleaned. CUDA bf16: True


In [2]:
import os, glob, json, pathlib
from unsloth import FastLanguageModel
from peft import PeftModel

OUTPUT_DIR = "gptoss20b_ner_bio_unsloth"  # –ø–∞–ø–∫–∞, –≥–¥–µ –ª–µ–∂–∞—Ç checkpoint-XXXX
MAX_LEN = 1024

# 1) –≤—ã–±–∏—Ä–∞–µ–º –ø–æ—Å–ª–µ–¥–Ω–∏–π —á–µ–∫–ø–æ–∏–Ω—Ç
ckpts = sorted(glob.glob(os.path.join(OUTPUT_DIR, "checkpoint-*")),
               key=lambda p: int(p.rsplit("-", 1)[-1]))
assert ckpts, f"–í {OUTPUT_DIR} –Ω–µ—Ç checkpoint-*"
CHECKPOINT_DIR = ckpts[-1]
print("Using checkpoint:", CHECKPOINT_DIR)

# 2) –±–∞–∑–æ–≤–∞—è –º–æ–¥–µ–ª—å (4-–±–∏—Ç), –ª–∏–Ω–µ–π—Ä–∏–∑–æ–≤–∞–Ω–Ω–∞—è –ø–æ–¥ Unsloth
base, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/gpt-oss-20b",
    load_in_4bit=True,
    max_seq_length=MAX_LEN,
    dtype=None,
    device_map="auto",
)

# 3) –ø–æ–¥–∫–ª—é—á–∞–µ–º –∞–¥–∞–ø—Ç–µ—Ä –∏–∑ —á–µ–∫–ø–æ–∏–Ω—Ç–∞
model = PeftModel.from_pretrained(base, CHECKPOINT_DIR)
model.eval()

# (–Ω–∞ –≤—Å—è–∫–∏–π —Å–ª—É—á–∞–π) –ø–∞–¥-—Ç–æ–∫–µ–Ω
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token

# —Å–ø–∏—Å–æ–∫ –º–µ—Ç–æ–∫ —Å—É—â–Ω–æ—Å—Ç–µ–π
with open("entity_labels.json", "r", encoding="utf-8") as f:
    ENTITY_LABELS = json.load(f)

print("Model ready. Labels:", ENTITY_LABELS)


  from .autonotebook import tqdm as notebook_tqdm
Exception in thread Thread-4 (_readerthread):
Traceback (most recent call last):
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\site-packages\ipykernel\ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\subprocess.py", line 1515, in _readerthread
    buffer.append(fh.read())
  File "c:\Users\lexan\miniconda3\envs\gpt310\lib\codecs.py", line 322, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xad in position 7: invalid start byte


ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.


W0928 11:52:19.589000 4184 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


ü¶• Unsloth Zoo will now patch everything to make training faster!
Using checkpoint: gptoss20b_ner_bio_unsloth\checkpoint-500
==((====))==  Unsloth 2025.9.9: Fast Gpt_Oss patching. Transformers: 4.56.2.
   \\   /|    NVIDIA GeForce RTX 4070 Ti SUPER. Num GPUs = 1. Max memory: 15.992 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:07<00:00,  1.85s/it]


Model ready. Labels: ['BRAND', 'PERCENT', 'TYPE', 'VOLUME']


In [None]:
import re, torch
from transformers import GenerationConfig

def _as_int(x):
    if x is None: return None
    if isinstance(x, (list, tuple)): x = x[0]
    return int(x)

def _safe_gen_config(model, tokenizer, max_new_tokens: int):
    # —Å–æ–∑–¥–∞—ë–º ¬´—á–∏—Å—Ç—É—é¬ª –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏—é –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –∏ –∑–∞–ø–æ–ª–Ω—è–µ–º —Ä—É–∫–∞–º–∏
    gc = GenerationConfig()
    eos_id = _as_int(tokenizer.eos_token_id)
    pad_id = _as_int(tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id)
    bos_id = _as_int(tokenizer.bos_token_id)
    gc.eos_token_id = eos_id
    gc.pad_token_id = pad_id
    gc.bos_token_id = bos_id
    gc.max_new_tokens = int(max_new_tokens)
    gc.do_sample = False
    gc.temperature = None
    gc.num_beams = 1
    gc.use_cache = True
    return gc

@torch.inference_mode()
def ner_predict(text: str, labels=None):
    labels = labels or ENTITY_LABELS

    # –Ω–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è –ø—Ä–æ–±–µ–ª–æ–≤ –∫ –æ–¥–Ω–æ–º—É –ø—Ä–æ–±–µ–ª—É
    norm_text = re.sub(r"[ ]+", " ", text.strip())
    n = len(norm_text.split(" "))

    dev = {"role": "developer", "content": (
        f"You are an NER tagger. Use BIO with labels: {', '.join(labels)}. "
        f"Return exactly {n} tags separated by a single space ‚Äî one per whitespace-separated token. "
        "Allowed: O, B-<LABEL>, I-<LABEL>. Do not add or remove tokens."
    )}
    usr = {"role": "user", "content": norm_text}
    messages = [dev, usr]

    # 1) —Ç–µ–∫—Å—Ç –ø—Ä–æ–º–ø—Ç–∞ –ø–æ Harmony-—à–∞–±–ª–æ–Ω—É
    prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)

    # 2) —Ç–æ–∫–µ–Ω–∏–∑–∞—Ü–∏—è –≤ mapping –∏ –ø–µ—Ä–µ–Ω–æ—Å –Ω–∞ –¥–µ–≤–∞–π—Å
    inputs = tokenizer(prompt, return_tensors="pt")
    inputs = {k: v.to(model.device).long() for k, v in inputs.items()}

    # 3) ¬´–±–µ–∑–æ–ø–∞—Å–Ω–∞—è¬ª –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏—è –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏
    gen_cfg = _safe_gen_config(model, tokenizer, max_new_tokens=max(8, n * 3))

    # 4) –í–ê–ñ–ù–û: –ø—Ä–∏–Ω—É–¥–∏—Ç–µ–ª—å–Ω–æ –≤—ã–∫–ª—é—á–∞–µ–º any-compile –≤–æ –≤—Ä–µ–º—è generate
    try:
        import torch._dynamo as dynamo
        with dynamo.config.patch(cache_size_limit=0), torch._dynamo.disable():
            gen_ids = model.generate(**inputs, generation_config=gen_cfg)
    except Exception:
        # –µ—Å–ª–∏ –º–æ–¥—É–ª—è –Ω–µ—Ç/—Å–ª–æ–º–∞–ª—Å—è –ø–∞—Ç—á ‚Äî –ø—Ä–æ—Å—Ç–æ –±–µ–∑ –Ω–µ–≥–æ
        gen_ids = model.generate(**inputs, generation_config=gen_cfg)

    # 5) –¥–µ–∫–æ–¥ –∏ –≤—ã—Ä–∞–≤–Ω–∏–≤–∞–Ω–∏–µ –¥–ª–∏–Ω—ã
    out_text = tokenizer.decode(
        gen_ids[0, inputs["input_ids"].shape[1]:],
        skip_special_tokens=True
    ).strip().splitlines()[0].strip()

    tags = out_text.split()
    words = norm_text.split(" ")
    if len(tags) < len(words): tags += ["O"] * (len(words) - len(tags))
    if len(tags) > len(words): tags = tags[:len(words)]
    return list(zip(words, tags))


`generation_config` default values have been modified to match model-specific defaults: {'max_length': 131072, 'do_sample': True}. If this is not desired, please set these values explicitly.


[('–ö–æ–ª–∞', 'finalB-TYPE'), ('Zero', 'I-TYPE'), ('1.5', 'O'), ('–ª–∏—Ç—Ä–∞', 'O')]


In [5]:

# smoke-test
print(ner_predict("—è–µ—Ä–Ω–æ–≥–æ–ª–æ–≤–∫–∞ 1.5–ª"))


[('—è–µ—Ä–Ω–æ–≥–æ–ª–æ–≤–∫–∞', 'final—è–µ—Ä–Ω–æ–≥–æ–ª–æ–≤–∫–∞'), ('1.5–ª', 'O')]
