In [1]:
import sys
import os
import torch

# This should print the path to your C: drive venv
print(f"Python Executable: {sys.executable}")
print(f"Working Directory: {os.getcwd()}") 
print(f"CUDA Available: {torch.cuda.is_available()}")

# Path configuration
TOOLS_DIR = r"C:\Users\ruben\Documents\TrainingAI"
print(f"Tools Directory set to: {TOOLS_DIR}")

Python Executable: r:\Files Ruben\GitRepos\DeepDiveV2AI\.venv\Scripts\python.exe
Working Directory: r:\Files Ruben\GitRepos\DeepDiveV2AI
CUDA Available: True
Tools Directory set to: C:\Users\ruben\Documents\TrainingAI


In [None]:
import torch
import json
import os
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from transformers import TrainerCallback
from huggingface_hub import login

# --- PATH CONFIGURATION ---
# 1. Where are the tools? (llama.cpp and quantize.exe)
TOOLS_DIR = r"C:\Users\ruben\Documents\TrainingAI"

# 2. Where should the final result go?
BASE_OUTPUT_DIR = r"R:\Files Ruben\GitRepos\DeepDiveV2AI\TrainedAndMerged"

# --- VERSIONING LOGIC ---
if not os.path.exists(BASE_OUTPUT_DIR):
    os.makedirs(BASE_OUTPUT_DIR)

version_num = 1
while os.path.exists(os.path.join(BASE_OUTPUT_DIR, f"Version{version_num}")):
    version_num += 1

OUTPUT_VERSION_DIR = os.path.join(BASE_OUTPUT_DIR, f"Version{version_num}")
os.makedirs(OUTPUT_VERSION_DIR)
print(f"üìÇ Output Folder Created: {OUTPUT_VERSION_DIR}")

# --- SETTINGS ---
DATA_FILE = "lore_training_data_v2.json" # <--- Using the NEW v2 file
MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
NEW_MODEL_NAME = "Llama-3-8B-Gloom-Lore"
HF_TOKEN = "" 
TARGET_LOSS = 0.5 # Slightly lower for chat format

# --- STOPPING LOGIC ---
class StopAtLossCallback(TrainerCallback):
    def __init__(self, threshold):
        self.threshold = threshold
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs and "loss" in logs:
            if logs["loss"] <= self.threshold:
                print(f"\nüõë STOPPING EARLY! Loss ({logs['loss']}) hit target.")
                control.should_training_stop = True

login(token=HF_TOKEN)

üìÇ Output Folder Created: R:\Files Ruben\GitRepos\DeepDiveV2AI\TrainedAndMerged\Version9


In [3]:
# 1. Load Data
try:
    with open(DATA_FILE, 'r', encoding='utf-8') as f:
        data = json.load(f)
    print(f"Loaded {len(data)} conversations from {os.path.abspath(DATA_FILE)}")
except FileNotFoundError:
    raise FileNotFoundError(f"Could not find {DATA_FILE} in {os.getcwd()}")

# 2. Convert to Hugging Face Dataset
dataset = Dataset.from_list(data)

# 3. Load Tokenizer (Needed for the Chat Template)
print("Loading Tokenizer for formatting...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# 4. Apply Llama 3 Chat Template
def format_chat_template(example):
    # This automatically adds <|start_header_id|>system... etc
    return {
        "text": tokenizer.apply_chat_template(
            example["messages"], 
            tokenize=False, 
            add_generation_prompt=False
        )
    }

dataset = dataset.map(format_chat_template, batched=False)

# 5. VISUALIZATION (Check your data!)
print("\n--- üîç DATA VISUALIZATION ---")
print("Raw Input (JSON):")
print(json.dumps(data[0]['messages'], indent=2))
print("\nFormatted Output (What the model actually sees):")
print("-" * 40)
print(dataset[0]['text'])
print("-" * 40)

Loaded 90 conversations from r:\Files Ruben\GitRepos\DeepDiveV2AI\lore_training_data_v2.json
Loading Tokenizer for formatting...


Map:   0%|          | 0/90 [00:00<?, ? examples/s]


--- üîç DATA VISUALIZATION ---
Raw Input (JSON):
[
  {
    "role": "system",
    "content": "You are a survivor on the Ark submarine. You are gritty and superstitious."
  },
  {
    "role": "user",
    "content": "What is the Gloom?"
  },
  {
    "role": "assistant",
    "content": "[Mood: Dread] *Stares out the reinforced porthole, voice dropping to a whisper.* The Gloom... it's not just energy. It's the curse of these depths. It gives us the crystals, sure, but it births the horrors that hunt us. Don't stare into it too long, diver. It stares back."
  }
]

Formatted Output (What the model actually sees):
----------------------------------------
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a survivor on the Ark submarine. You are gritty and superstitious.<|eot_id|><|start_header_id|>user<|end_header_id|>

What is the Gloom?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

[Mood: Dread] *Stares out the reinforced porthole, voice dropping to a whisper.* 

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

print("Loading Base Model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, quantization_config=bnb_config, device_map="auto", token=HF_TOKEN
)

Loading Base Model...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
peft_config = LoraConfig(
    lora_alpha=16, lora_dropout=0.1, r=64, bias="none", task_type="CAUSAL_LM"
)

training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=15,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024, # Increased for chat history
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    callbacks=[StopAtLossCallback(TARGET_LOSS)]
)

print("Starting training...")
trainer.train()
print("Training finished.")

# Save adapters to the Version Folder
trainer.model.save_pretrained(os.path.join(OUTPUT_VERSION_DIR, NEW_MODEL_NAME))
tokenizer.save_pretrained(os.path.join(OUTPUT_VERSION_DIR, NEW_MODEL_NAME))
print(f"LoRA adapters saved to: {OUTPUT_VERSION_DIR}")

Map:   0%|          | 0/90 [00:00<?, ? examples/s]

  super().__init__(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 128009}.


Starting training...


Step,Training Loss
25,3.3374
50,1.7705
75,1.6561
100,1.5004
125,1.3838
150,1.4222
175,1.3852
200,1.1238
225,1.0974
250,1.0895



üõë STOPPING EARLY! Loss (0.4486) hit target.
Training finished.
LoRA adapters saved to: R:\Files Ruben\GitRepos\DeepDiveV2AI\TrainedAndMerged\Version9


In [6]:
import gc
import torch

# Try to delete variables if they exist
try:
    del model
    print("Deleted 'model' variable.")
except NameError:
    print("'model' variable was already gone.")

try:
    del trainer
    print("Deleted 'trainer' variable.")
except NameError:
    print("'trainer' variable was already gone.")

# Force Garbage Collection regardless
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    
print("‚úÖ VRAM & RAM cleared successfully.")

Deleted 'model' variable.
Deleted 'trainer' variable.
‚úÖ VRAM & RAM cleared successfully.


In [7]:
from peft import PeftModel
import gc
import shutil

# 1. Define Overflow Folder
offload_dir = os.path.join(OUTPUT_VERSION_DIR, "offload_temp")

print("Loading base model with Disk Offloading (Prevents Crash)...")
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",          # Use GPU first, then RAM, then Disk
    offload_folder=offload_dir, # <--- The Safety Net
    low_cpu_mem_usage=True,
    token=HF_TOKEN
)

print("Applying LoRA adapter...")
# Load the adapter we just saved in the version folder
adapter_path = os.path.join(OUTPUT_VERSION_DIR, NEW_MODEL_NAME)
model_to_merge = PeftModel.from_pretrained(base_model, adapter_path)

print("Merging weights...")
merged_model = model_to_merge.merge_and_unload()

# 3. Save with Sharding 
# Saves in 2GB chunks to prevent a RAM spike while writing to disk
output_merged_dir = os.path.join(OUTPUT_VERSION_DIR, "merged_model_temp")
print(f"Saving merged model to: {output_merged_dir}")

merged_model.save_pretrained(
    output_merged_dir, 
    safe_serialization=True, 
    max_shard_size="2GB" 
)
tokenizer.save_pretrained(output_merged_dir)

# 4. Clean up the offload folder immediately
if os.path.exists(offload_dir):
    shutil.rmtree(offload_dir)

# 5. Clean up the model from RAM to free up space for the next step
del base_model
del model_to_merge
del merged_model
gc.collect()

print("Merge & Save complete.")

`torch_dtype` is deprecated! Use `dtype` instead!


Loading base model with Disk Offloading (Prevents Crash)...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


Applying LoRA adapter...




Merging weights...
Saving merged model to: R:\Files Ruben\GitRepos\DeepDiveV2AI\TrainedAndMerged\Version9\merged_model_temp




Saving checkpoint shards:   0%|          | 0/9 [00:00<?, ?it/s]

Merge & Save complete.


In [8]:
# Paths
llama_cpp_folder = os.path.join(TOOLS_DIR, "llama.cpp")
convert_script = os.path.join(llama_cpp_folder, "convert_hf_to_gguf.py")

# Points to the temp folder created in the previous cell
model_path = os.path.join(OUTPUT_VERSION_DIR, "merged_model_temp")
# Saves the heavy FP16 GGUF into the version folder
outfile_path = os.path.join(OUTPUT_VERSION_DIR, f"{NEW_MODEL_NAME}.fp16.gguf")   

if not os.path.exists(convert_script):
    print(f"Error: Could not find conversion script at: {convert_script}")
else:
    print(f"Using script: {convert_script}")
    print(f"Converting: {model_path}")
    !python "{convert_script}" "{model_path}" --outtype f16 --outfile "{outfile_path}"

Using script: C:\Users\ruben\Documents\TrainingAI\llama.cpp\convert_hf_to_gguf.py
Converting: R:\Files Ruben\GitRepos\DeepDiveV2AI\TrainedAndMerged\Version9\merged_model_temp


INFO:hf-to-gguf:Loading model: merged_model_temp
INFO:hf-to-gguf:Model architecture: LlamaForCausalLM
INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'
INFO:hf-to-gguf:gguf: indexing model part 'model-00004-of-00009.safetensors'
INFO:hf-to-gguf:gguf: indexing model part 'model-00003-of-00009.safetensors'
INFO:hf-to-gguf:gguf: indexing model part 'model-00001-of-00009.safetensors'
INFO:hf-to-gguf:gguf: indexing model part 'model-00007-of-00009.safetensors'
INFO:hf-to-gguf:gguf: indexing model part 'model-00008-of-00009.safetensors'
INFO:hf-to-gguf:gguf: indexing model part 'model-00009-of-00009.safetensors'
INFO:hf-to-gguf:gguf: indexing model part 'model-00006-of-00009.safetensors'
INFO:hf-to-gguf:gguf: indexing model part 'model-00005-of-00009.safetensors'
INFO:hf-to-gguf:gguf: indexing model part 'model-00002-of-00009.safetensors'
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:bl

In [9]:
import subprocess
import shutil
import os
import glob

# --- CONFIGURATION (Must match Cell 2) ---
TOOLS_DIR = r"C:\Users\ruben\Documents\TrainingAI"
BASE_OUTPUT_DIR = r"R:\Files Ruben\GitRepos\DeepDiveV2AI\TrainedAndMerged"
NEW_MODEL_NAME = "Llama-3-8B-Gloom-Lore"

# --- 1. Auto-Detect the Latest Version Folder ---
# Since we restarted the kernel, we need to find where the file is.
version_folders = glob.glob(os.path.join(BASE_OUTPUT_DIR, "Version*"))
if not version_folders:
    raise FileNotFoundError("‚ùå No Version folders found! Did you run the training step?")

# Sort to find the highest number (latest run)
latest_version_dir = max(version_folders, key=os.path.getctime)
print(f"üìÇ Detected latest build folder: {latest_version_dir}")

# Define Paths
input_gguf = os.path.join(latest_version_dir, f"{NEW_MODEL_NAME}.fp16.gguf")
output_gguf = os.path.join(latest_version_dir, f"{NEW_MODEL_NAME}.Q4_K_M.gguf")
quantize_exe = os.path.join(TOOLS_DIR, "llama-quantize.exe")
temp_merged_folder = os.path.join(latest_version_dir, "merged_model_temp")

# --- 2. Execution Logic ---
if not os.path.exists(quantize_exe):
    print(f"‚ùå Error: Tool not found at: {quantize_exe}")
elif not os.path.exists(input_gguf):
    print(f"‚ùå Error: Input file not found: {input_gguf}")
    print("   üëâ Did the conversion step finish successfully?")
else:
    print(f"Quantizing (Fresh RAM Mode)...")
    print(f"   Input: {input_gguf}")
    
    try:
        # Run Command
        result = subprocess.run(
            [quantize_exe, input_gguf, output_gguf, "Q4_K_M"],
            cwd=TOOLS_DIR,
            capture_output=True,
            text=True,
            check=True
        )
        
        print("\n‚úÖ Quantization Complete.")
        
        # --- 3. Cleanup Logic ---
        print("üßπ Cleaning up massive temporary files...")
        
        if os.path.exists(input_gguf):
            os.remove(input_gguf)
            print(f"   - Deleted: {os.path.basename(input_gguf)}")
            
        if os.path.exists(temp_merged_folder):
            shutil.rmtree(temp_merged_folder)
            print(f"   - Deleted: merged_model_temp folder")
            
        print(f"\nüéâ SUCCESS! Final model saved to:\n{output_gguf}")

    except subprocess.CalledProcessError as e:
        print("\n‚ùå Quantization failed!")
        print("--- Error Details ---")
        print(e.stderr)
        print("---------------------")

üìÇ Detected latest build folder: R:\Files Ruben\GitRepos\DeepDiveV2AI\TrainedAndMerged\Version9
Quantizing (Fresh RAM Mode)...
   Input: R:\Files Ruben\GitRepos\DeepDiveV2AI\TrainedAndMerged\Version9\Llama-3-8B-Gloom-Lore.fp16.gguf

‚úÖ Quantization Complete.
üßπ Cleaning up massive temporary files...
   - Deleted: Llama-3-8B-Gloom-Lore.fp16.gguf
   - Deleted: merged_model_temp folder

üéâ SUCCESS! Final model saved to:
R:\Files Ruben\GitRepos\DeepDiveV2AI\TrainedAndMerged\Version9\Llama-3-8B-Gloom-Lore.Q4_K_M.gguf
