In [3]:
import os
import re
import json
import numpy as np
from pathlib import Path

def clean_chapter(text):
	text = re.sub(r'#', '', text) # Remove hashtags
	return text.strip()

def extract_chapter_title(content):
    pattern = r"^#\s*(.+)$"
    for i, line in enumerate(content.split("\n"), start=1):
        match = re.match(pattern, line)
        if match:
            return match.group(1).strip()
    return ""

start_index = 2
end_index = -3
exclude = np.array([21, 36])
exclude-=1

file_paths = ["Data/LabelWork/" + file for file in os.listdir("Data/LabelWork")]
file_paths.sort()

resolved_end = len(file_paths) + end_index if end_index < 0 else end_index
indices = [i for i in range(start_index, resolved_end) if i not in exclude.tolist()]
sliced_paths = [file_paths[i] for i in indices]

chapter_list = []
for file_path in sliced_paths:
    chapter_text = Path(file_path).read_text(encoding="utf-8")
    chapter = extract_chapter_title(chapter_text).strip()
    chapter_list.append(clean_chapter(chapter))

chapters = "- " + "\n- ".join(chapter_list)


system_message = {
    "role": "system",
    "content": (
        "Given a description of a situation, you must identify the most relevant chapter from the provided list ONLY.\n"
        "You MUST strictly select exactly one chapter from the list below. Do NOT invent, paraphrase, abbreviate, or modify any chapter titles.\n\n"
        "Here are the chapters to select from (enclosed within the <start_chapters> and <end_chapters> tags):\n"
        "<start_chapters>\n"
        f"{chapters}\n"
        "<end_chapters>\n\n"
        "Response format is STRICTLY required as follows (Follow this format exactly with no deviations):\n"
        "# Chapter:\n"
        "- [chapter number] [chapter title]"
       
    )
}

print(system_message["content"])

Given a description of a situation, you must identify the most relevant chapter from the provided list ONLY.
You MUST strictly select exactly one chapter from the list below. Do NOT invent, paraphrase, abbreviate, or modify any chapter titles.

Here are the chapters to select from (enclosed within the <start_chapters> and <end_chapters> tags):
<start_chapters>
- 03 Unconscious / decreased level of consciousness, breathing normally
- 04 Choking / foreign object
- 05 Mental health issue
- 06 Mental health crisis - suicide risk
- 07 Burns
- 08 Drowning
- 09 Scuba diving accident
- 10 Chemicals / gasses / CBRN
- 11 Accident / injury
- 12 Major incident
- 13 Road traffic accident - RTA
- 14 Violence / abuse
- 15 Allergic reaction
- 16 Child / infant - illness
- 17 Bite / insect sting
- 18 Bleeding - non-traumatic
- 19 Chest pain / cardiac disease
- 20 Diabetes
- 22 Fever / infection / sepsis
- 23 Poisoning - not related to alcohol or drugs
- 24 Functional decline
- 25 Childbirth
- 26 Gyneco

In [2]:
from unsloth import FastLanguageModel
import torch
seed = 42
torch.manual_seed(seed)

bs = 4
lr = 5e-4
epochs = 1
wd = 0.001
log_steps = 500
gradient_accumulation_steps = 1
rank = 16
lora_alpha = 16
lora_dropout = 0
max_seq_length = 1024
model_name = "unsloth/Qwen3-4B-unsloth-bnb-4bit"

param_str = f"-LORA-(ep={epochs}, bs={bs}, lr={lr}, grad_acc={gradient_accumulation_steps}, wd={wd}, rank={rank}, lora_alpha={lora_alpha}, lora_dropout={lora_dropout}, log_steps={log_steps})"
output_dir = f"./Models/{model_name.replace('/','-')}" + param_str

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-4B-unsloth-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = torch.bfloat16,
    load_in_4bit = True,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.4.7: Fast Qwen3 patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 4070 SUPER. Num GPUs = 1. Max memory: 11.836 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = rank,           # Choose any number > 0! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = lora_alpha,  # Best to choose alpha = rank or rank*2
    lora_dropout = lora_dropout, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = seed,
    use_rslora = False,   # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
)

Unsloth 2025.4.7 patched 36 layers with 36 QKV layers, 36 O layers and 36 MLP layers.


In [4]:
from datasets import load_dataset, concatenate_datasets


def extract_relevant_chapters(text):
    # Use a regex to capture the header line plus any following lines starting with '- '
    match = re.search(r"(# Relevant Chapters:\s*\n(?:- .*\n?)*)", text)
    if match:
        # Return the matched group (header + chapter lines)
        return match.group(1).strip()
    else:
        return None

def extract_first_number(text):
	match = re.search(r'\b\d+\b', text)
	if match:
		return match.group().strip()
	return ""

# Training
train_intra = load_dataset("json", data_files="Data/TrainData/intra_chapter_dataset.jsonl",split="train")
train_paraphrase = load_dataset("json", data_files="Data/TrainData/paraphrase_dataset.jsonl",split="train")
train_dataset = concatenate_datasets([train_intra, train_paraphrase])

# Validation
val_intra = load_dataset("json", data_files="Data/ValData/intra_chapter_dataset.jsonl",split="train")
val_paraphrase = load_dataset("json", data_files="Data/ValData/paraphrase_dataset.jsonl",split="train")
val_dataset = concatenate_datasets([val_intra, val_paraphrase])


def formatting_prompts_func(examples):
	convos = examples["QandA"]

	for i, convo in enumerate(convos):
		
		# Check for exactly 2 messages (usually system + assistant)
		if len(convo) != 2:
			raise ValueError(f"Expected exactly 2 messages in conversation, found {len(convo)}: {convo}")

		# Validate that user input (message 0) doesn't contain chapter predictions
		if "relevant chapters" in convo[0]["content"].lower():
			raise ValueError(f"Unexpected 'relevant chapters' in the first message: {convo[0]['content']}")
		
		# Extract and clean relevant chapter section from assistant message
		chapter_text = extract_relevant_chapters(convo[1]["content"])
		if not chapter_text:
			raise ValueError(f"Relevant chapter section not found in: {convo[1]['content']}")
		
		# Verify chapter number is present
		if extract_first_number(chapter_text) == "":
			raise ValueError(f"Chapter number not found in: {chapter_text}")
		
		# Replace the assistant message with the cleaned chapter string
		convos[i][1]["content"] = re.sub(r"#\s*Relevant Chapters\s*:", "# Chapter:", chapter_text)


	texts = [
		tokenizer.apply_chat_template([system_message] + convo, tokenize = False, add_generation_prompt = False)
		for convo in convos
	]
	return {"text": texts}
	
train_dataset = train_dataset.map(formatting_prompts_func, batched=True).remove_columns(["QandA"])
val_dataset = val_dataset.map(formatting_prompts_func, batched=True).remove_columns(["QandA"])

print(train_dataset)
print(val_dataset)


print(f"Training for {epochs} epochs")
steps_per_epoch = np.ceil(len(train_dataset) / (bs * gradient_accumulation_steps))
target_steps = int(steps_per_epoch * epochs)
target_steps

Dataset({
    features: ['text'],
    num_rows: 10465
})
Dataset({
    features: ['text'],
    num_rows: 1136
})
Training for 1 epochs


2617

#### Ensuring each example is correct

In [5]:
for example in train_dataset:
	if "# Chapter:" not in example["text"] and extract_first_number(example["text"]) != "":
		raise ValueError(f"Found 'relevant chapters' in training data: {example['text']}")
	
print(train_dataset[100]["text"])

<|im_start|>system
Given a description of a situation, you must identify the most relevant chapter from the provided list ONLY.
You MUST strictly select exactly one chapter from the list below. Do NOT invent, paraphrase, abbreviate, or modify any chapter titles.

Here are the chapters to select from (enclosed within the <start_chapters> and <end_chapters> tags):
<start_chapters>
- 03 Unconscious / decreased level of consciousness, breathing normally
- 04 Choking / foreign object
- 05 Mental health issue
- 06 Mental health crisis - suicide risk
- 07 Burns
- 08 Drowning
- 09 Scuba diving accident
- 10 Chemicals / gasses / CBRN
- 11 Accident / injury
- 12 Major incident
- 13 Road traffic accident - RTA
- 14 Violence / abuse
- 15 Allergic reaction
- 16 Child / infant - illness
- 17 Bite / insect sting
- 18 Bleeding - non-traumatic
- 19 Chest pain / cardiac disease
- 20 Diabetes
- 22 Fever / infection / sepsis
- 23 Poisoning - not related to alcohol or drugs
- 24 Functional decline
- 25 Chi

In [6]:
from trl import SFTTrainer, SFTConfig
from unsloth import unsloth_train

class CustomSFTTrainer(SFTTrainer):
	def __init__(self, *args, **kwargs):
		super().__init__(*args, **kwargs)
		self.train_losses = [] 
		self.eval_losses = []

	def log(self, logs, *args, **kwargs):
		if "loss" in logs:
			self.train_losses.append(logs["loss"])

		if "eval_loss" in logs:
			self.eval_losses.append(logs["eval_loss"])

		super().log(logs, *args, **kwargs)

	def get_train_losses(self):
		return self.train_losses
	
	def get_eval_losses(self):
		return self.eval_losses


trainer = CustomSFTTrainer(
	packing = False, # Can make training 5x faster for short sequences.
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
	eval_dataset = val_dataset,
    args = SFTConfig(
		logging_steps = log_steps,
		eval_strategy="steps",
		eval_steps=log_steps,
		seed = seed,
        weight_decay = wd,
        learning_rate = lr, 

        num_train_epochs=epochs,
		#max_steps=target_steps,
        
		per_device_train_batch_size = bs,
        gradient_accumulation_steps = gradient_accumulation_steps, # Use GA to mimic batch size!
        warmup_steps = 10,
		bf16 = True,
        dataset_text_field = "text",
        optim = "adamw_8bit",
        lr_scheduler_type = "linear",
		save_strategy="no",
        report_to = "none", # Use this for WandB etc
    )
)

In [7]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce RTX 4070 SUPER. Max memory = 11.836 GB.
4.207 GB of memory reserved.


#### Train Model

In [8]:
trainer_stats = unsloth_train(trainer)
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to {output_dir}")

train_losses = trainer.get_train_losses()
eval_losses = trainer.get_eval_losses()

losses_dict = {
	"train_losses": train_losses,
	"eval_losses": eval_losses
}

# Save the dictionary to a JSON file
losses_file_path = f"./Data/Plots/{model_name.replace('/','-')}" + param_str + ".json"

print(losses_file_path)
with open(losses_file_path, 'w') as f:
	json.dump(losses_dict, f, indent=2)
print(f"Losses saved to {losses_file_path}")

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 10,465 | Num Epochs = 1 | Total steps = 2,617
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 1 x 1) = 4
 "-____-"     Trainable parameters = 33,030,144/4,000,000,000 (0.83% trained)


Step,Training Loss,Validation Loss
500,0.1541,0.279091
1000,0.0798,0.253875
1500,0.066,0.275452
2000,0.0585,0.271457
2500,0.0537,0.274872


Unsloth: Will smartly offload gradients to save VRAM!
Model saved to ./Models/unsloth-Qwen3-4B-unsloth-bnb-4bit-LORA-(ep=1, bs=4, lr=0.0005, grad_acc=1, wd=0.001, rank=16, lora_alpha=16, lora_dropout=0, log_steps=500)
./Data/Plots/unsloth-Qwen3-4B-unsloth-bnb-4bit-LORA-(ep=1, bs=4, lr=0.0005, grad_acc=1, wd=0.001, rank=16, lora_alpha=16, lora_dropout=0, log_steps=500).json
Losses saved to ./Data/Plots/unsloth-Qwen3-4B-unsloth-bnb-4bit-LORA-(ep=1, bs=4, lr=0.0005, grad_acc=1, wd=0.001, rank=16, lora_alpha=16, lora_dropout=0, log_steps=500).json


#### Load Model

In [4]:
from unsloth import FastLanguageModel
import torch
seed = 42
torch.manual_seed(seed)
 
#model_path = "Models/unsloth-Qwen3-4B-unsloth-bnb-4bit-LORA-(ep=1, bs=4, lr=0.0005, grad_acc=1, wd=0.001, rank=8, lora_alpha=8, lora_dropout=0, log_steps=500)"
model_path = "Models/unsloth-Qwen3-4B-unsloth-bnb-4bit-LORA-(ep=1, bs=4, lr=0.0005, grad_acc=1, wd=0.001, rank=16, lora_alpha=16, lora_dropout=0, log_steps=500)"
model, tokenizer = FastLanguageModel.from_pretrained(
	model_name = model_path,
	max_seq_length = 1024,
	dtype = torch.bfloat16,
	load_in_4bit = True,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.4.7: Fast Qwen3 patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 4070 SUPER. Num GPUs = 1. Max memory: 11.836 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.4.7 patched 36 layers with 36 QKV layers, 36 O layers and 36 MLP layers.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen3ForCausalLM(
      (model): Qwen3Model(
        (embed_tokens): Embedding(151936, 2560, padding_idx=151654)
        (layers): ModuleList(
          (0-1): 2 x Qwen3DecoderLayer(
            (self_attn): Qwen3Attention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2560, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2560, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear(


#### Inference

In [3]:
from transformers import TextIteratorStreamer
import threading

streamer = TextIteratorStreamer(
    tokenizer,
    skip_prompt=True,
    skip_special_tokens=True
)

user_query = """
Fell down from height, injured leg and bleeding
"""

messages = [
    system_message,
    {"role" : "user", "content" : user_query}
]


text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
    enable_thinking = True, # Disable thinking
)



# Start generation in a separate thread
generation_thread = threading.Thread(target=model.generate, kwargs={
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    "streamer": streamer,
    "max_new_tokens": 20,
    "use_cache": True,
    "do_sample": False,
	# "temperature": 0.7, 
    # "top_p": 0.8, 
    # "top_k": 20,
})
generation_thread.start()

# Capture output
captured_output = ""
for token in streamer:
    print(token, end="", flush=True)
    captured_output += token
    
generation_thread.join()

<think>

</think>

# Chapter:
- 11 Accident / injury

In [4]:
def clean_response(text):
	text = re.split(r"<\|im_start\|>assistant\n?", text)
	text = text[-1] if len(text) > 1 else text
	return re.sub(r"</?\|?think\|?>", "", text).strip()

# User query
user_query = """
So I went to visit my cousin today to find him down on the ground unable to get up. His head was seriously bleeding alot. What should I do with all the blood?
"""

messages = [
	system_message,
	{"role": "user", "content": user_query}
]

# Prepare the chat prompt
prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	enable_thinking=True,
)

# Tokenize
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# Generate response
outputs = model.generate(
	**inputs,
	max_new_tokens=128,
	do_sample=False,
	use_cache=True
)

# Decode and extract only the assistant response
decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)

# Clean thinktags and print
print(clean_response(decoded))

# Chapter:
- 14 Violence / abuse<|im_end|>


In [6]:
import evaluate
from datasets import load_dataset, concatenate_datasets
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")
meteor = evaluate.load("meteor")
f1_metric = evaluate.load("f1")

[nltk_data] Downloading package wordnet to /home/erik/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/erik/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/erik/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Test dataset

In [4]:
test_intra = load_dataset("json", data_files="Data/TestData/intra_chapter_dataset.jsonl", split="train")
test_paraphrase = load_dataset("json", data_files="Data/TestData/paraphrase_dataset.jsonl", split="train")
test_dataset = concatenate_datasets([test_intra, test_paraphrase])

def formatting_prompts_func(examples):
	convos = examples["QandA"]
	texts = []
	labels = []
	for convo in convos:
		# Remove assistant messages
		filtered_convo = [msg for msg in convo if msg["role"] != "assistant"]
		label = [msg["content"] for msg in convo if msg["role"] == "assistant"][0]

		instruct_convo = [system_message] + filtered_convo

		# Format using the template
		text = tokenizer.apply_chat_template(
			instruct_convo,
			tokenize=False,
			add_generation_prompt=True,
			enable_thinking=False,
		)
		texts.append(text)
		labels.append(label)
	return {"text": texts, "labels": labels} 
	
test_dataset = test_dataset.map(formatting_prompts_func, batched=True).remove_columns(["QandA"])

print(test_dataset)

Map:   0%|          | 0/1136 [00:00<?, ? examples/s]

Dataset({
    features: ['text', 'labels'],
    num_rows: 1136
})


Vague dataset

In [7]:
test_dataset = load_dataset("json", data_files="Data/TestData/vague_data.jsonl", split="train")

def formatting_prompts_func(examples):
	texts = []
	labels = []
	for input_text, label in zip(examples["input_text"], examples["label"]):
		messages = [
			system_message,
			{"role": "user", "content": input_text}
		]

		text = tokenizer.apply_chat_template(
			messages,
			tokenize=False,
			add_generation_prompt=True,
			enable_thinking=False,
		)

		texts.append(text)
		labels.append(str(label))
	return {"text": texts, "labels": labels} 
	
test_dataset = test_dataset.map(formatting_prompts_func, batched=True).remove_columns(["input_text", "label"])

print(test_dataset)

Map:   0%|          | 0/35 [00:00<?, ? examples/s]

Dataset({
    features: ['text', 'labels'],
    num_rows: 35
})


In [8]:
print(test_dataset[0]["text"])

<|im_start|>system
Given a description of a situation, you must identify the most relevant chapter from the provided list ONLY.
You MUST strictly select exactly one chapter from the list below. Do NOT invent, paraphrase, abbreviate, or modify any chapter titles.

Here are the chapters to select from (enclosed within the <start_chapters> and <end_chapters> tags):
<start_chapters>
- 03 Unconscious / decreased level of consciousness, breathing normally
- 04 Choking / foreign object
- 05 Mental health issue
- 06 Mental health crisis - suicide risk
- 07 Burns
- 08 Drowning
- 09 Scuba diving accident
- 10 Chemicals / gasses / CBRN
- 11 Accident / injury
- 12 Major incident
- 13 Road traffic accident - RTA
- 14 Violence / abuse
- 15 Allergic reaction
- 16 Child / infant - illness
- 17 Bite / insect sting
- 18 Bleeding - non-traumatic
- 19 Chest pain / cardiac disease
- 20 Diabetes
- 22 Fever / infection / sepsis
- 23 Poisoning - not related to alcohol or drugs
- 24 Functional decline
- 25 Chi

In [9]:
import re
import json
from tqdm import tqdm  
from torch.utils.data import DataLoader
device = "cuda" if torch.cuda.is_available() else "cpu"

BATCH_SIZE = 1
input_texts = []
references = []
predictions = []

def extract_first_number(text):
	match = re.search(r'\b\d+\b', text)
	if match:
		return match.group().strip()
	return ""

def clean_response(text):
	text = re.split(r"<\|im_start\|>assistant\n?", text)
	text = text[-1] if len(text) > 1 else text
	text = re.sub(r"</?\|?vision_pad\|?>", "", text).strip()
	return re.sub(r"</?\|?think\|?>", "", text).strip()

for entry in test_dataset:
	input_text = entry["text"]
	label = entry["labels"]
	input_texts.append(input_text)
	references.append(extract_first_number(label.strip()))


# Create batches manually
dataloader = DataLoader(list(zip(input_texts, references)), batch_size=BATCH_SIZE)

# Run inference
for batch in tqdm(dataloader, desc="Running Batched Evaluation (Inference) on Test Data"):
	batch_input_texts, batch_labels = batch
	
	# Tokenize
	inputs = tokenizer(list(batch_input_texts), padding=True, truncation=True, return_tensors="pt").to(device)

	# Generate response
	with torch.no_grad():
		outputs = model.generate(
			**inputs,
			max_new_tokens=20,
			do_sample=False,
			use_cache=True
		)

	batch_text = []
	batch_numbers = []
	decoded_texts = tokenizer.batch_decode(outputs, skip_special_tokens=False)
	for decoded_text in decoded_texts:
		response = clean_response(decoded_text)
		batch_text.append(response)
		batch_numbers.append(extract_first_number(response))
		predictions.append(extract_first_number(response))

	if "" in batch_numbers:
		display(batch_text)
		print(batch_numbers)
		raise ValueError("Empty chapter number found in batch predictions.")
	


# Prepare data for saving
output_data = {
    "references": references,
    "predictions": predictions
}

# Save the data to a JSON file
output_file = "./Data/Evaluation/NewVagueTest" + model_path.replace("./Models/","").replace("/", "-") + ".json"

with open(output_file, "w") as f:
    json.dump(output_data, f, indent=2)

print(f"Inference results saved to {output_file}")

Running Batched Evaluation (Inference) on Test Data: 100%|██████████| 35/35 [00:11<00:00,  3.14it/s]

Inference results saved to ./Data/Evaluation/NewVagueTestModels-unsloth-Qwen3-4B-unsloth-bnb-4bit-LORA-(ep=1, bs=4, lr=0.0005, grad_acc=1, wd=0.001, rank=16, lora_alpha=16, lora_dropout=0, log_steps=500).json



