In [9]:
%pip -q install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
%pip -q install -U "pandas==2.2.2" "pyarrow==21.0.0"
%pip -q install -U "transformers>=4.44.2" "accelerate>=0.34.2"
%pip -q install -U transformers datasets accelerate peft bitsandbytes --no-deps

In [28]:
import os, sys, pathlib

IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
    from google.colab import drive
    drive.mount("/content/drive", force_remount=True)

BASE_DIR = "/content/drive/MyDrive" if IN_COLAB else os.getcwd()
PROJECT  = "infoxp_ai"
WORK_DIR = f"{BASE_DIR}/{PROJECT}"
DATA_DIR = f"{WORK_DIR}/data"
OUT_DIR  = f"{WORK_DIR}/checkpoints"   # for resume
FINAL_DIR= f"{WORK_DIR}/final"         # merged HF model
GGUF_DIR = f"{WORK_DIR}/gguf"          # Ollama export

for d in (DATA_DIR, OUT_DIR, FINAL_DIR, GGUF_DIR):
    os.makedirs(d, exist_ok=True)

CSV_PATH = f"{DATA_DIR}/cleaned_games.csv"
print("DATA_DIR:", DATA_DIR)
print("OUT_DIR:", OUT_DIR)
print("FINAL_DIR:", FINAL_DIR)
print("GGUF_DIR:", GGUF_DIR)
print("Expect CSV at:", CSV_PATH)

Mounted at /content/drive
DATA_DIR: /content/drive/MyDrive/infoxp_ai/data
OUT_DIR: /content/drive/MyDrive/infoxp_ai/checkpoints
FINAL_DIR: /content/drive/MyDrive/infoxp_ai/final
GGUF_DIR: /content/drive/MyDrive/infoxp_ai/gguf
Expect CSV at: /content/drive/MyDrive/infoxp_ai/data/cleaned_games.csv


In [11]:
MODEL_ID = "Qwen/Qwen2.5-3B-Instruct"

SEED = 42
MAX_LEN = 1024
BATCH_PER_DEVICE = 2
GRAD_ACC = 8
LR = 2e-4
EPOCHS = 1
SAVE_STEPS = 200
SAVE_TOTAL = 3

LORA_R = 16
LORA_ALPHA = 32
LORA_DROPOUT = 0.05

In [12]:
import pandas as pd
from datasets import Dataset

assert os.path.isfile(CSV_PATH), f"Missing: {CSV_PATH}"
df = pd.read_csv(CSV_PATH)

if "text" in df.columns:
    texts = df["text"].astype(str).tolist()
else:
    def row_to_text(sr):
        parts = []
        for c, v in sr.items():
            if pd.isna(v):
                continue
            s = str(v).strip()
            if s:
                parts.append(f"{c}: {s}")
        return " | ".join(parts)
    texts = [row_to_text(r) for _, r in df.iterrows()]

train_ds = Dataset.from_dict({"text": texts}).shuffle(seed=SEED)
len(train_ds), texts[0][:300]

(156,
 'AppID: 1172470 | Name: Apex Legends™ | Release date: 2020-11-04 | Required age: 0 | About the game: Conquer with character in Apex Legends, a free-to-play* Hero shooter where legendary characters with powerful abilities team up to battle for fame &amp; fortune on the fringes of the Frontier. Master ')

In [13]:
import torch
from transformers import AutoTokenizer

tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True, trust_remote_code=True)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token
tok.pad_token_id, tok.eos_token_id

(151643, 151645)

In [14]:
def tok_fn(batch):
    return tok(batch["text"], truncation=True, max_length=MAX_LEN)

tokd = train_ds.map(tok_fn, batched=True, remove_columns=["text"])
len(tokd)

Map:   0%|          | 0/156 [00:00<?, ? examples/s]

156

In [15]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

base = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_cfg,
    device_map="auto",
    trust_remote_code=True,
)

lora_cfg = LoraConfig(
    r=LORA_R, lora_alpha=LORA_ALPHA, lora_dropout=LORA_DROPOUT,
    bias="none", task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(base, lora_cfg)
model.print_trainable_parameters()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 3,686,400 || all params: 3,089,625,088 || trainable%: 0.1193


In [18]:
# Cell A — rebuild model correctly for QLoRA
import gc, torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training

# free VRAM if a previous model existed
try:
    del model, base
except:
    pass
gc.collect(); torch.cuda.empty_cache()

bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

base = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_cfg,
    device_map="auto",
    trust_remote_code=True,
)

# Prepare for k-bit training: sets input requires_grad, norm cast, use_cache=False, etc.
base = prepare_model_for_kbit_training(base)
base.gradient_checkpointing_enable()
base.config.use_cache = False

lora_cfg = LoraConfig(
    r=LORA_R, lora_alpha=LORA_ALPHA, lora_dropout=LORA_DROPOUT,
    bias="none", task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(base, lora_cfg)
model.print_trainable_parameters()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 3,686,400 || all params: 3,089,625,088 || trainable%: 0.1193


In [19]:
# Cell B — memory-safe training from scratch (creates checkpoints)
import os, pathlib
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

os.environ["WANDB_DISABLED"] = "true"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# keep small seq/batch; adjust if you still OOM
MAX_LEN = 512
BATCH_PER_DEVICE = 1
GRAD_ACC = 16
SAVE_STEPS = 20
SAVE_TOTAL = 3

# re-tokenize if MAX_LEN changed
def tok_fn(batch): return tok(batch["text"], truncation=True, max_length=MAX_LEN)
tokd = train_ds.map(tok_fn, batched=True, remove_columns=["text"])

data_collator = DataCollatorForLanguageModeling(tokenizer=tok, mlm=False)

args = TrainingArguments(
    output_dir=OUT_DIR,
    per_device_train_batch_size=BATCH_PER_DEVICE,
    gradient_accumulation_steps=GRAD_ACC,
    learning_rate=LR,
    num_train_epochs=EPOCHS,
    logging_steps=10,
    save_steps=SAVE_STEPS,
    save_total_limit=SAVE_TOTAL,
    fp16=torch.cuda.is_available(),
    optim="paged_adamw_8bit",
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokd,
    data_collator=data_collator,
    tokenizer=tok,
)

trainer.train()

# list checkpoints
ckpts = sorted([p for p in pathlib.Path(OUT_DIR).glob("checkpoint-*") if p.is_dir()])
print("Checkpoints:", [str(p) for p in ckpts][-5:])


Map:   0%|          | 0/156 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


Step,Training Loss
10,2.5546


Checkpoints: ['/content/drive/MyDrive/infoxp_ai/checkpoints/checkpoint-10']


In [20]:
trainer.train(resume_from_checkpoint=last_ckpt)

Step,Training Loss
10,2.4561


TrainOutput(global_step=10, training_loss=2.4561435699462892, metrics={'train_runtime': 182.8685, 'train_samples_per_second': 0.853, 'train_steps_per_second': 0.055, 'total_flos': 1110522741350400.0, 'train_loss': 2.4561435699462892, 'epoch': 1.0})

In [21]:
ADAPTER_DIR = f"{FINAL_DIR}/lora_adapter"
os.makedirs(ADAPTER_DIR, exist_ok=True)
model.save_pretrained(ADAPTER_DIR)
tok.save_pretrained(FINAL_DIR)
print("Adapter ->", ADAPTER_DIR)

Adapter -> /content/drive/MyDrive/infoxp_ai/final/lora_adapter


In [22]:
from peft import PeftModel
from transformers import AutoModelForCausalLM

MERGED_DIR = f"{FINAL_DIR}/merged_hf"
os.makedirs(MERGED_DIR, exist_ok=True)

full_base = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
    trust_remote_code=True,
)
peft_loaded = PeftModel.from_pretrained(full_base, ADAPTER_DIR)
merged = peft_loaded.merge_and_unload()

if merged.config.pad_token_id is None:
    merged.config.pad_token_id = merged.config.eos_token_id

merged.save_pretrained(MERGED_DIR, safe_serialization=True)
tok.save_pretrained(MERGED_DIR)
print("Merged HF ->", MERGED_DIR)

`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Merged HF -> /content/drive/MyDrive/infoxp_ai/final/merged_hf


In [23]:
import os, subprocess

LLAMACPP_DIR = "/content/llama.cpp"
if not os.path.isdir(LLAMACPP_DIR):
    !git clone -q https://github.com/ggerganov/llama.cpp.git {LLAMACPP_DIR}
%cd {LLAMACPP_DIR}
!cmake -S . -B build -DCMAKE_BUILD_TYPE=Release >/dev/null
!cmake --build build -j >/dev/null

os.makedirs(GGUF_DIR, exist_ok=True)
GGUF_BASE = f"{GGUF_DIR}/infoxp-f16.gguf"
!python3 convert_hf_to_gguf.py "{MERGED_DIR}" --outfile "{GGUF_BASE}" --vocab-dir "{MERGED_DIR}"

GGUF_Q4 = f"{GGUF_DIR}/infoxp-q4_k_m.gguf"
!./build/bin/llama-quantize "{GGUF_BASE}" "{GGUF_Q4}" q4_K_M

!ls -lh {GGUF_DIR}
%cd - >/dev/null

/content/llama.cpp
[0mCMAKE_BUILD_TYPE=Release[0m
Traceback (most recent call last):
  File "/content/llama.cpp/convert_hf_to_gguf.py", line 32, in <module>
    from mistral_common.tokens.tokenizers.base import TokenizerVersion
ModuleNotFoundError: No module named 'mistral_common'
main: build = 6688 (898acba6)
main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04.2) 11.4.0 for x86_64-linux-gnu
main: quantizing '/content/drive/MyDrive/infoxp_ai/gguf/infoxp-f16.gguf' to '/content/drive/MyDrive/infoxp_ai/gguf/infoxp-q4_k_m.gguf' as Q4_K_M
gguf_init_from_file: failed to open GGUF file '/content/drive/MyDrive/infoxp_ai/gguf/infoxp-f16.gguf'
llama_model_quantize: failed to quantize: llama_model_loader: failed to load model from /content/drive/MyDrive/infoxp_ai/gguf/infoxp-f16.gguf
main: failed to quantize model from '/content/drive/MyDrive/infoxp_ai/gguf/infoxp-f16.gguf'
total 0
[Errno 2] No such file or directory: '- >/dev/null'
/content/llama.cpp


In [None]:
# Qwen2.5 → GGUF → Modelfile → ZIP
import os, sys, pathlib, zipfile, subprocess

# Paths
IN_COLAB = "google.colab" in sys.modules
BASE_DIR   = "/content/drive/MyDrive"
PROJECT    = "infoxp_ai"
WORK_DIR   = globals().get("WORK_DIR", f"{BASE_DIR}/{PROJECT}")
MERGED_DIR = globals().get("MERGED_DIR", f"{WORK_DIR}/final/merged_hf")
GGUF_DIR   = globals().get("GGUF_DIR", f"{WORK_DIR}/gguf")
LLAMACPP_DIR = "/content/llama.cpp"

os.makedirs(GGUF_DIR, exist_ok=True)

# Sanity checks
assert os.path.isdir(MERGED_DIR), f"Missing merged HF model at {MERGED_DIR}"
assert os.path.isfile(f"{MERGED_DIR}/config.json"), "config.json missing"
assert any(os.path.isfile(os.path.join(MERGED_DIR, f)) for f in ("tokenizer.json","tokenizer.model")), "tokenizer file missing"

#  Clone + build llama.cpp
if not os.path.isdir(LLAMACPP_DIR):
    subprocess.run(["git","clone","--depth","1","https://github.com/ggerganov/llama.cpp",LLAMACPP_DIR], check=True)
subprocess.run(["cmake","-S",LLAMACPP_DIR,"-B",f"{LLAMACPP_DIR}/build","-DCMAKE_BUILD_TYPE=Release"], check=True)
subprocess.run(["cmake","--build",f"{LLAMACPP_DIR}/build","-j"], check=True)

# Converter deps (fixes 'mistral_common' and friends)
subprocess.run([sys.executable,"-m","pip","install","-U","-r",f"{LLAMACPP_DIR}/requirements.txt"], check=True)

# Convert HF → GGUF (float16). Use model hint for Qwen2.5.
GGUF_BASE = f"{GGUF_DIR}/infoxp-f16.gguf"
if not os.path.isfile(GGUF_BASE):
    subprocess.run(
        [sys.executable, "convert_hf_to_gguf.py", MERGED_DIR, "--model-name", "qwen2", "--outfile", GGUF_BASE],
        cwd=LLAMACPP_DIR, check=True
    )
assert os.path.isfile(GGUF_BASE), "convert_hf_to_gguf failed"

# Quantize to Q4_K_M for Ollama
GGUF_Q4 = f"{GGUF_DIR}/infoxp-q4_k_m.gguf"
subprocess.run([f"{LLAMACPP_DIR}/build/bin/llama-quantize", GGUF_BASE, GGUF_Q4, "q4_K_M"], check=True)
assert os.path.isfile(GGUF_Q4), "llama-quantize failed"

# Modelfile + ZIP bundle
MODEFILE_PATH = f"{GGUF_DIR}/Modelfile"
with open(MODEFILE_PATH, "w", encoding="utf-8") as f:
    f.write("FROM ./infoxp-q4_k_m.gguf\nPARAMETER temperature 0.7\nPARAMETER top_p 0.9\n")

ZIP_PATH = f"{WORK_DIR}/infoxp_ollama_package.zip"
with zipfile.ZipFile(ZIP_PATH, "w", zipfile.ZIP_DEFLATED) as z:
    z.write(GGUF_Q4, arcname="infoxp-q4_k_m.gguf")
    z.write(MODEFILE_PATH, arcname="Modelfile")

print("GGUF:", GGUF_Q4)
print("Package:", ZIP_PATH)
print("Local usage:\n  ollama create infoxp -f Modelfile\n  ollama run infoxp")


GGUF: /content/drive/MyDrive/infoxp_ai/gguf/infoxp-q4_k_m.gguf
Package: /content/drive/MyDrive/infoxp_ai/infoxp_ollama_package.zip
Local usage:
  ollama create infoxp -f Modelfile
  ollama run infoxp
