# Project Deep Dive: NPC Lore LoRA Training & Merging

This notebook automates the process of fine-tuning, merging, and quantizing a Llama 3 8B model with custom lore for the Project Deep Dive game.

### Workflow:
1.  **Configuration:** Set your desired output name and training parameters in Cell 2.
2.  **Login:** Run Cell 4 to log into Hugging Face (only needs to be done once).
3.  **Training:** Run Cell 6 to train the LoRA adapter using your GPU.
4.  **Merge & Quantize:** Run Cells 8, 9, and 10 to merge the LoRA into the base model and create a final GGUF file.
5.  **Deployment:** Load your new, custom `...-merged.gguf` file directly into LM Studio.

In [None]:
import os
import sys
import json
import subprocess
import shutil
from pathlib import Path

# --- 1. CORE CONFIGURATION ---
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
LORA_OUTPUT_NAME = "ProjectDeepDive-Lora-v1"
DATASET_NAME = "lore_training_data"

# --- 2. TRAINING HYPERPARAMETERS ---
EPOCHS = 5.0
BATCH_SIZE = 1
GRADIENT_ACCUMULATION = 4

# --- 2b. GGUF EXPORT SETTINGS ---
QUANT_TYPE = "q4_k_m"
KEEP_FP16_INTERMEDIATE = False
CONVERTER_SUPPORTED_OUTTYPES = {"f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"}

# --- 3. ENVIRONMENT & PATH SETUP ---
PROJECT_ROOT_OVERRIDE = r""  # Optional absolute path if the notebook starts elsewhere.
LLAMA_FACTORY_PATH = Path(os.environ.get("LLAMA_FACTORY_PATH", r"C:\Users\ruben\Documents\TrainingAI\LLaMA-Factory")).expanduser().resolve()
LLAMA_CPP_PATH = Path(os.environ.get("LLAMA_CPP_PATH", r"C:\Users\ruben\Documents\TrainingAI\llama.cpp")).expanduser().resolve()
LM_STUDIO_MODELS_DIR = Path(os.environ.get("LM_STUDIO_MODELS_DIR", r"C:\Users\ruben\.lmstudio\models")).expanduser().resolve()
HF_TOKEN = os.environ.get("HF_TOKEN", "...").strip()
PYTHON_EXECUTABLE = os.environ.get("PYTHON", sys.executable)

DATASET_FILE_NAME = f"{DATASET_NAME}.json"
DATASET_INFO_FILE_NAME = "dataset_info.json"


def resolve_project_root() -> Path:
    if PROJECT_ROOT_OVERRIDE:
        candidate = Path(PROJECT_ROOT_OVERRIDE).expanduser().resolve()
        if candidate.is_dir():
            return candidate
        raise FileNotFoundError(f"CRITICAL: PROJECT_ROOT_OVERRIDE='{PROJECT_ROOT_OVERRIDE}' does not exist.")

    cwd = Path().resolve()
    for candidate in [cwd, *cwd.parents]:
        dataset_file = candidate / DATASET_FILE_NAME
        dataset_info_file = candidate / DATASET_INFO_FILE_NAME
        if dataset_file.exists() and dataset_info_file.exists():
            return candidate

    raise FileNotFoundError(
        "CRITICAL: Unable to locate dataset files. Set PROJECT_ROOT_OVERRIDE to the folder that contains both "
        f"'{DATASET_FILE_NAME}' and '{DATASET_INFO_FILE_NAME}'."
    )


PROJECT_ROOT = resolve_project_root()
model_folder_name = MODEL_ID.split('/')[-1]
DATASET_FILE_PATH = PROJECT_ROOT / DATASET_FILE_NAME
DATASET_INFO_FILE_PATH = PROJECT_ROOT / DATASET_INFO_FILE_NAME

if not DATASET_FILE_PATH.exists():
    raise FileNotFoundError(f"CRITICAL: Dataset file not found at '{DATASET_FILE_PATH}'.")
if not DATASET_INFO_FILE_PATH.exists():
    raise FileNotFoundError(f"CRITICAL: dataset_info.json not found at '{DATASET_INFO_FILE_PATH}'.")

ARTIFACTS_ROOT = PROJECT_ROOT / "artifacts"
LORA_OUTPUT_DIR = ARTIFACTS_ROOT / "lora_adapters" / model_folder_name / LORA_OUTPUT_NAME
MERGED_MODEL_DIR = ARTIFACTS_ROOT / "merged_models" / f"{model_folder_name}-{LORA_OUTPUT_NAME}"
FINAL_GGUF_DIR = PROJECT_ROOT / "final_gguf_models"
FINAL_GGUF_FILE = FINAL_GGUF_DIR / f"{model_folder_name}-{LORA_OUTPUT_NAME}-{QUANT_TYPE.upper()}.gguf"

for path in [ARTIFACTS_ROOT, LORA_OUTPUT_DIR.parent, MERGED_MODEL_DIR.parent, FINAL_GGUF_DIR]:
    path.mkdir(parents=True, exist_ok=True)

LLAMA_FACTORY_SRC = LLAMA_FACTORY_PATH / "src"
if not LLAMA_FACTORY_SRC.exists():
    raise FileNotFoundError(f"CRITICAL: '{LLAMA_FACTORY_SRC}' not found. Verify LLAMA_FACTORY_PATH.")

TRAIN_SCRIPT = LLAMA_FACTORY_SRC / "train.py"

CONVERT_SCRIPT = None
for script_name in ("convert-hf-to-gguf.py", "convert_hf_to_gguf.py"):
    candidate = LLAMA_CPP_PATH / script_name
    if candidate.exists():
        CONVERT_SCRIPT = candidate
        break


def detect_quantize_binary(root: Path) -> Path | None:
    candidates = [
        root / "build" / "bin" / "quantize",
        root / "build" / "bin" / "quantize.exe",
        root / "build" / "bin" / "Release" / "quantize.exe",
        root / "build" / "Release" / "quantize.exe",
        root / "build" / "quantize.exe",
        root / "quantize",
        root / "quantize.exe",
    ]
    for candidate in candidates:
        if candidate.exists():
            return candidate.resolve()
    return None

QUANTIZE_BINARY = detect_quantize_binary(LLAMA_CPP_PATH)

if not TRAIN_SCRIPT.exists():
    raise FileNotFoundError(f"CRITICAL: train.py not found at '{TRAIN_SCRIPT}'. Update LLAMA_FACTORY_PATH.")
if CONVERT_SCRIPT is None:
    raise FileNotFoundError(
        "CRITICAL: convert-hf-to-gguf.py (or convert_hf_to_gguf.py) not found inside llama.cpp. "
        "Update LLAMA_CPP_PATH to point at your llama.cpp clone."
    )


def build_llamafactory_env() -> dict[str, str]:
    env = os.environ.copy()
    llama_src_str = str(LLAMA_FACTORY_SRC)
    existing = env.get("PYTHONPATH", "")
    paths = [p for p in existing.split(os.pathsep) if p]
    if llama_src_str not in paths:
        paths.insert(0, llama_src_str)
    env["PYTHONPATH"] = os.pathsep.join(paths) if paths else llama_src_str
    return env


LLAMA_FACTORY_ENV = build_llamafactory_env()

print("‚úÖ Configuration loaded successfully.")
print(f"   Model ID: {MODEL_ID}")
print(f"   Dataset: {DATASET_NAME}")
print(f"   Notebook Project Root: {PROJECT_ROOT}")
print(f"   LLaMA-Factory Path: {LLAMA_FACTORY_PATH}")
print(f"   llama.cpp Path: {LLAMA_CPP_PATH}")
print(f"   LoRA Output Directory: {LORA_OUTPUT_DIR}")
print(f"   Merge Output Directory: {MERGED_MODEL_DIR}")
print(f"   Final GGUF Directory: {FINAL_GGUF_DIR}")
print(f"   Final GGUF File: {FINAL_GGUF_FILE}")
print(f"   Quantize Binary: {QUANTIZE_BINARY if QUANTIZE_BINARY else 'Not found (build llama.cpp to enable Q4+)'}")

‚úÖ Configuration loaded successfully.
   Model ID: meta-llama/Meta-Llama-3-8B-Instruct
   Dataset: lore_training_data
   Notebook Project Root: R:\Files Ruben\GitRepos\DeepDiveV2AI
   LLaMA-Factory Path: C:\Users\ruben\Documents\TrainingAI\LLaMA-Factory
   llama.cpp Path: C:\Users\ruben\Documents\TrainingAI\llama.cpp
   LoRA Output Directory: R:\Files Ruben\GitRepos\DeepDiveV2AI\artifacts\lora_adapters\Meta-Llama-3-8B-Instruct\ProjectDeepDive-Lora-v1
   Merge Output Directory: R:\Files Ruben\GitRepos\DeepDiveV2AI\artifacts\merged_models\Meta-Llama-3-8B-Instruct-ProjectDeepDive-Lora-v1
   Final GGUF Directory: R:\Files Ruben\GitRepos\DeepDiveV2AI\final_gguf_models
   Final GGUF File: R:\Files Ruben\GitRepos\DeepDiveV2AI\final_gguf_models\Meta-Llama-3-8B-Instruct-ProjectDeepDive-Lora-v1-Q4_K_M.gguf
   Quantize Binary: Not found (build llama.cpp to enable Q4+)


In [11]:
# Verify the dataset can be loaded and count the entries
try:
    with open(DATASET_FILE_PATH, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    num_instructions = len(data)
    print(f"‚úÖ Dataset '{DATASET_FILE_PATH}' loaded successfully.")
    print(f"   Found {num_instructions} question/answer pairs for training.")
    if num_instructions < 10:
        print("   ‚ö†Ô∏è WARNING: Dataset is very small. Consider adding more examples for better results.")
except Exception as e:
    print(f"‚ùå ERROR: Failed to read or parse the dataset file. Please check for syntax errors in your JSON.")
    print(f"   Details: {e}")

‚úÖ Dataset 'R:\Files Ruben\GitRepos\DeepDiveV2AI\lore_training_data.json' loaded successfully.
   Found 7 question/answer pairs for training.


In [12]:
from huggingface_hub import HfFolder

if not HF_TOKEN:
    raise ValueError("CRITICAL: HF_TOKEN is empty. Set it in Cell 1 or via the HF_TOKEN env var.")

try:
    HfFolder.save_token(HF_TOKEN)
    print("‚úÖ Successfully stored Hugging Face token.")
except Exception as e:
    print("‚ùå Failed to store Hugging Face token.")
    print(f"   Details: {e}")

‚úÖ Successfully stored Hugging Face token.


In [13]:
command = [
    PYTHON_EXECUTABLE, str(TRAIN_SCRIPT),
    "--model_name_or_path", MODEL_ID,
    "--do_train",
    "--dataset", DATASET_NAME,
    "--dataset_dir", str(PROJECT_ROOT),
    "--finetuning_type", "lora",
    "--output_dir", str(LORA_OUTPUT_DIR),
    "--lora_target", "all",
    "--per_device_train_batch_size", str(BATCH_SIZE),
    "--gradient_accumulation_steps", str(GRADIENT_ACCUMULATION),
    "--num_train_epochs", str(EPOCHS),
    "--overwrite_output_dir",
    "--plot_loss",
    "--fp16"
]
print("--- Training Command ---")
print(subprocess.list2cmdline(command))
print("------------------------")

--- Training Command ---
c:\Users\ruben\AppData\Local\Programs\Python\Python311\python.exe C:\Users\ruben\Documents\TrainingAI\LLaMA-Factory\src\train.py --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --do_train --dataset lore_training_data --dataset_dir "R:\Files Ruben\GitRepos\DeepDiveV2AI" --finetuning_type lora --output_dir "R:\Files Ruben\GitRepos\DeepDiveV2AI\artifacts\lora_adapters\Meta-Llama-3-8B-Instruct\ProjectDeepDive-Lora-v1" --lora_target all --per_device_train_batch_size 1 --gradient_accumulation_steps 4 --num_train_epochs 5.0 --overwrite_output_dir --plot_loss --fp16
------------------------


In [14]:
print("üöÄ Starting training... This may take a while.")
process = subprocess.Popen(
    command,
    cwd=str(LLAMA_FACTORY_PATH),
    env=LLAMA_FACTORY_ENV,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    encoding='utf-8',
    bufsize=1,
)
while True:
    output = process.stdout.readline()
    if output == '' and process.poll() is not None:
        break
    if output:
        print(output.strip())
if process.returncode == 0:
    print("\nüéâ Training finished successfully! üéâ")
else:
    print(f"\n‚ùå Training failed with exit code {process.returncode}.")

üöÄ Starting training... This may take a while.
[INFO|2025-11-18 17:48:25] llamafactory.hparams.parser:468 >> Process rank: 0, world size: 1, device: cpu, distributed training: False, compute dtype: torch.float16
[INFO|tokenization_utils_base.py:2095] 2025-11-18 17:48:25,620 >> loading file tokenizer.json from cache at C:\Users\ruben\.cache\huggingface\hub\models--meta-llama--Meta-Llama-3-8B-Instruct\snapshots\8afb486c1db24fe5011ec46dfbe5b5dccdb575c2\tokenizer.json
[INFO|tokenization_utils_base.py:2095] 2025-11-18 17:48:25,620 >> loading file tokenizer.model from cache at None
[INFO|tokenization_utils_base.py:2095] 2025-11-18 17:48:25,620 >> loading file added_tokens.json from cache at None
[INFO|tokenization_utils_base.py:2095] 2025-11-18 17:48:25,620 >> loading file special_tokens_map.json from cache at C:\Users\ruben\.cache\huggingface\hub\models--meta-llama--Meta-Llama-3-8B-Instruct\snapshots\8afb486c1db24fe5011ec46dfbe5b5dccdb575c2\special_tokens_map.json
[INFO|tokenization_utils

### Step 2: Merge LoRA and Quantize to GGUF

Now that the LoRA adapter is trained, we will perform two final steps:
1.  **Merge:** Combine the base Llama 3 model with our LoRA adapter to create a new, full-sized (unquantized) model.
2.  **Quantize:** Compress the large, merged model into a single, efficient GGUF file that LM Studio can use.

In [15]:
# --- MERGE THE TRAINED LORA ---
print("üöÄ Starting model merge process...")

merge_command = [
    PYTHON_EXECUTABLE,
    "-m", "llamafactory.launcher",
    "export",
    "--model_name_or_path", MODEL_ID,
    "--adapter_name_or_path", str(LORA_OUTPUT_DIR),
    "--template", "llama3",
    "--export_dir", str(MERGED_MODEL_DIR),
    "--export_size", "2"
]

print("--- Merge Command ---")
print(subprocess.list2cmdline(merge_command))
print("---------------------")

process = subprocess.Popen(
    merge_command,
    cwd=str(LLAMA_FACTORY_PATH),
    env=LLAMA_FACTORY_ENV,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    encoding='utf-8',
    bufsize=1,
)
while True:
    output = process.stdout.readline()
    if output == '' and process.poll() is not None:
        break
    if output:
        print(output.strip())

if process.returncode == 0:
    print(f"\nüéâ Model merged successfully! Full-precision model saved at:\n{MERGED_MODEL_DIR}")
else:
    print(f"\n‚ùå Model merge failed with exit code {process.returncode}.")

üöÄ Starting model merge process...
--- Merge Command ---
c:\Users\ruben\AppData\Local\Programs\Python\Python311\python.exe -m llamafactory.launcher export --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --adapter_name_or_path "R:\Files Ruben\GitRepos\DeepDiveV2AI\artifacts\lora_adapters\Meta-Llama-3-8B-Instruct\ProjectDeepDive-Lora-v1" --template llama3 --export_dir "R:\Files Ruben\GitRepos\DeepDiveV2AI\artifacts\merged_models\Meta-Llama-3-8B-Instruct-ProjectDeepDive-Lora-v1" --export_size 2
---------------------
usage: launcher.py [-h] [--model_name_or_path MODEL_NAME_OR_PATH]
[--adapter_name_or_path ADAPTER_NAME_OR_PATH]
[--adapter_folder ADAPTER_FOLDER] [--cache_dir CACHE_DIR]
[--use_fast_tokenizer [USE_FAST_TOKENIZER]]
[--no_use_fast_tokenizer] [--resize_vocab [RESIZE_VOCAB]]
[--split_special_tokens [SPLIT_SPECIAL_TOKENS]]
[--add_tokens ADD_TOKENS]
[--add_special_tokens ADD_SPECIAL_TOKENS]
[--new_special_tokens_config NEW_SPECIAL_TOKENS_CONFIG]
[--init_special_tokens {no

In [16]:
# --- QUANTIZE / CONVERT THE MERGED MODEL TO GGUF ---
print("\nüöÄ Starting quantization to GGUF format...")

if not LLAMA_CPP_PATH.exists():
    raise NotADirectoryError(f"CRITICAL: llama.cpp directory not found at '{LLAMA_CPP_PATH}'. Update LLAMA_CPP_PATH in Cell 1.")

quant_type_lower = QUANT_TYPE.lower()
needs_two_step = quant_type_lower not in CONVERTER_SUPPORTED_OUTTYPES
convert_outtype = QUANT_TYPE if not needs_two_step else "f16"
intermediate_fp16 = FINAL_GGUF_DIR / f"{model_folder_name}-{LORA_OUTPUT_NAME}-F16.gguf"
convert_target = FINAL_GGUF_FILE if not needs_two_step else intermediate_fp16

convert_command = [
    PYTHON_EXECUTABLE,
    str(CONVERT_SCRIPT),
    str(MERGED_MODEL_DIR),
    "--outfile", str(convert_target),
    "--outtype", convert_outtype
]

print("--- Convert Command ---")
print(subprocess.list2cmdline(convert_command))
print("-----------------------")

process = subprocess.Popen(convert_command, cwd=str(LLAMA_CPP_PATH), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding='utf-8', bufsize=1)
while True:
    output = process.stdout.readline()
    if output == '' and process.poll() is not None:
        break
    if output:
        print(output.strip())

if process.returncode != 0:
    print(f"\n‚ùå Conversion failed with exit code {process.returncode}.")
else:
    if not needs_two_step:
        print(f"\nüéâ GGUF export successful! File saved at:\n{FINAL_GGUF_FILE}")
    else:
        if QUANTIZE_BINARY is None:
            raise FileNotFoundError(
                "Requested quantization type requires the llama.cpp 'quantize' binary. "
                "Build llama.cpp (cmake -S . -B build && cmake --build build --config Release) "
                "or switch QUANT_TYPE to one of the converter-supported values."
            )

        quantize_command = [
            str(QUANTIZE_BINARY),
            str(convert_target),
            str(FINAL_GGUF_FILE),
            QUANT_TYPE.upper()
        ]

        print("\n--- Quantize Command ---")
        print(subprocess.list2cmdline(quantize_command))
        print("------------------------")

        process = subprocess.Popen(quantize_command, cwd=str(QUANTIZE_BINARY.parent), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding='utf-8', bufsize=1)
        while True:
            output = process.stdout.readline()
            if output == '' and process.poll() is not None:
                break
            if output:
                print(output.strip())

        if process.returncode == 0:
            print(f"\nüéâ Quantization successful! Your game-ready model is located at:\n{FINAL_GGUF_FILE}")
            if not KEEP_FP16_INTERMEDIATE and convert_target.exists():
                try:
                    convert_target.unlink()
                    print(f"   (Removed intermediate file: {convert_target})")
                except OSError as cleanup_error:
                    print(f"   ‚ö†Ô∏è Could not delete intermediate file: {cleanup_error}")
        else:
            print(f"\n‚ùå Quantization failed with exit code {process.returncode}.")

print("\nüí° You can now delete the large merged model folder to save space:")
print(f"   {MERGED_MODEL_DIR}")


üöÄ Starting quantization to GGUF format...
--- Convert Command ---
c:\Users\ruben\AppData\Local\Programs\Python\Python311\python.exe C:\Users\ruben\Documents\TrainingAI\llama.cpp\convert_hf_to_gguf.py "R:\Files Ruben\GitRepos\DeepDiveV2AI\artifacts\merged_models\Meta-Llama-3-8B-Instruct-ProjectDeepDive-Lora-v1" --outfile "R:\Files Ruben\GitRepos\DeepDiveV2AI\final_gguf_models\Meta-Llama-3-8B-Instruct-ProjectDeepDive-Lora-v1-F16.gguf" --outtype f16
-----------------------
INFO:hf-to-gguf:Loading model: Meta-Llama-3-8B-Instruct-ProjectDeepDive-Lora-v1
Traceback (most recent call last):
File "C:\Users\ruben\Documents\TrainingAI\llama.cpp\convert_hf_to_gguf.py", line 688, in load_hparams
config = AutoConfig.from_pretrained(dir_model, trust_remote_code=False).to_dict()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\ruben\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\auto\configuration_auto.py", line 1380, in from_pretrained
r

In [17]:
# --- OPTIONAL: Copy the GGUF into LM Studio's local models folder ---
if FINAL_GGUF_FILE.exists():
    LM_STUDIO_MODELS_DIR.mkdir(parents=True, exist_ok=True)
    destination = LM_STUDIO_MODELS_DIR / FINAL_GGUF_FILE.name
    try:
        shutil.copy2(FINAL_GGUF_FILE, destination)
        print("‚úÖ Copied GGUF to LM Studio models directory.")
        print(f"   Location: {destination}")
    except Exception as exc:
        print("‚ö†Ô∏è Unable to copy GGUF into LM Studio's directory.")
        print(f"   Details: {exc}")
else:
    print("‚ö†Ô∏è Final GGUF file not found yet. Run the quantization cell first.")


‚ö†Ô∏è Final GGUF file not found yet. Run the quantization cell first.


### Workflow Complete!

1.  **Locate Your Final Model:**
    *   Your `final_gguf_models` folder now lives alongside this notebook (inside the repo on `R:`).
    *   Inside you'll find the `...-Q4_K_M.gguf` file ready for inference.

2.  **Load in LM Studio:**
    *   Either drag-and-drop the GGUF file into LM Studio or use the optional copy cell to push it into `LM_STUDIO_MODELS_DIR` automatically.
    *   The model will appear under "My Models" once the copy finishes.

3.  **Activate and Test:**
    *   Select the merged model (no extra LoRA adapters needed).
    *   Start the local server and try it inside Project Deep Dive!