In [2]:
from pathlib import Path
import sys
ROOT = Path.cwd().resolve() if (Path.cwd()/"src").exists() else Path.cwd().resolve().parent
if str(ROOT) not in sys.path: sys.path.append(str(ROOT))

In [3]:
from src.train.sft_to_alpaca import to_alpaca
to_alpaca(ROOT/"data/sft/train.jsonl", ROOT/"data/sft/alpaca.train.jsonl")

[OK] wrote 130 alpaca examples → D:\IIT BBS\Job Resources\Business Optima\pdf-agent\data\sft\alpaca.train.jsonl


In [4]:
# 05_train_lora_llamacpp.ipynb — Cell 1: setup

DATA = ROOT / "data"
MODELS = ROOT / "models"
OUT = ROOT / "outputs" / "lora"
OUT.mkdir(parents=True, exist_ok=True)

ALPACA = DATA / "sft" / "alpaca.train.jsonl"

# PICK YOUR GGUF:
BASE_GGUF = MODELS / "llama-3.1-8b-instruct-f16.gguf"   # change if you use a smaller GGUF
# Example small alternative:
# BASE_GGUF = MODELS / "TinyLlama-1.1B-Chat-v1.0.Q5_K_M.gguf"

print("ROOT:", ROOT)
print("ALPACA exists?", ALPACA.exists())
print("BASE_GGUF exists?", BASE_GGUF.exists())
print("OUT:", OUT)

ROOT: D:\IIT BBS\Job Resources\Business Optima\pdf-agent
ALPACA exists? True
BASE_GGUF exists? True
OUT: D:\IIT BBS\Job Resources\Business Optima\pdf-agent\outputs\lora


In [None]:
# # Cell 2: make a tiny subset for faster CPU LoRA (optional)
# SUBSET = DATA / "sft" / "alpaca.train.small.jsonl"
# keep = 200  # adjust down if CPU is struggling

# cnt = 0
# with open(SUBSET, "w", encoding="utf-8") as w, open(ALPACA, "r", encoding="utf-8") as r:
#     for line in r:
#         w.write(line)
#         cnt += 1
#         if cnt >= keep:
#             break

# print("[OK] wrote subset:", SUBSET, "| rows:", cnt)

In [None]:
# Cell 3: generate a PowerShell script to launch CPU LoRA finetune
LLAMACPP_DIR = ROOT / "llama.cpp"            # adjust if your llama.cpp lives elsewhere
FINETUNE_EXE = LLAMACPP_DIR / "finetune.exe" # or "llama-finetune.exe" depending on your build

ADAPTER_OUT = OUT / "title17-lora.gguf"

ps = f"""
# PowerShell: CPU LoRA finetune with llama.cpp
$ErrorActionPreference = "Stop"

$exe = "{FINETUNE_EXE.as_posix()}"
if (!(Test-Path $exe)) {{
  Write-Error "finetune.exe not found at $exe — build llama.cpp first."
}}

# Pick dataset file: small subset for CPU or the full
$train = "{(DATA / 'sft' / 'alpaca.train.small.jsonl').as_posix()}"
# $train = "{ALPACA.as_posix()}"  # <- use this if you want full set (slower)

# Params tuned for CPU feasibility
& $exe `
  --model "{BASE_GGUF.as_posix()}" `
  --train-data "$train" `
  --out-lora "{ADAPTER_OUT.as_posix()}" `
  --lora-r 8 `
  --lora-alpha 16 `
  --epochs 1 `
  --batch 1 `
  --seq-len 512 `
  --threads 8

if ($LASTEXITCODE -ne 0) {{ exit $LASTEXITCODE }}
Write-Host "[OK] LoRA saved to {ADAPTER_OUT.as_posix()}"
"""

PS1 = ROOT / "src" / "train" / "llamacpp_finetune.ps1"
PS1.parent.mkdir(parents=True, exist_ok=True)
PS1.write_text(ps, encoding="utf-8")
print("[OK] wrote", PS1)
print("Run in PowerShell from project root:\n  powershell -ExecutionPolicy Bypass -File", PS1.relative_to(ROOT))


In [None]:
# Cell 4: write an Ollama Modelfile that layers the LoRA adapter
MODELDIR = ROOT / "configs" / "Modelfiles"
MODELDIR.mkdir(parents=True, exist_ok=True)
MODELFILE = MODELDIR / "title17-lora.Modelfile"

# Point FROM to an Ollama base you already have locally.
# If you serve llama3.1:8b-instruct-q8_0 in Ollama, keep that:
base_ollama = "llama3.1:8b-instruct-q8_0"

txt = f"""# Modelfile layering a llama.cpp LoRA (GGUF)
FROM {base_ollama}
ADAPTER {ADAPTER_OUT.as_posix()}

PARAMETER temperature 0.2
"""
MODELFILE.write_text(txt, encoding="utf-8")
print("[OK] wrote", MODELFILE)


In [None]:
# Cell 5: show shell commands to register & run in Ollama
print("Register model:")
print(f"  ollama create title17-lora -f {MODELFILE.as_posix()}")

print("\nTest it:")
print("  ollama run title17-lora \"Summarize §107 fair use in 3 bullets. Cite pages if known.\"")