In [None]:
# Perform my imports
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM
import heapq
import json
import time
import random
import os
import re
import math
import pandas as pd
import sys
import numpy as np
from typing import Optional, List
from transformers import AutoTokenizer, LlamaForCausalLM, BitsAndBytesConfig, TrainingArguments
import json, csv
from sklearn.model_selection import train_test_split
from trl import SFTTrainer
from datasets import Dataset
from peft import LoraConfig

# Add the certs information
sys.path.append("..")
from CeRTS_beam_multi import *
from CeRTS_utils import *
sys.path.remove("..")

# Add some fine-tuning specific functions
from functions.generation_functions import *
from functions.dataset_creation_functions import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data Initializations

In [None]:
df_val = pd.read_csv("clinical_data/validation.csv")

json_template = {
    "asthma": "yes|no",
    "smoking": "yes|no",
    "pneu": "yes|no",            
    "common_cold": "yes|no",
    "pain": "yes|no",
    "fever": "high|low|no",
    "antibiotics": "yes|no"
}
features = json_template.keys()

# add gold-standard output as text to df
df_val = create_gold_standard_extractions(df_val, json_template)

# Split into training and testing for temporary stuff 
df_val_train, df_val_test = train_test_split(df_val, test_size=0.2, random_state=42)
df_val_train.reset_index(drop=True, inplace = True)
df_val_test.reset_index(drop=True, inplace = True)

# Load in Model

In [None]:
model_path = "models/Qwen2.5-1.5B-Instruct"

# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

# load model (using float16 + device map for GPU efficiency)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto" 
)

# Dataset in format for fine tuning

In [None]:
# Initial Step
train_data = format_for_standard_SFT(df_val_train, json_template)

# Put in List
train_data = [
    {"messages": conv} for conv in train_data   # conv is your list of role/content dicts
]

# Get in datasets
train_data = Dataset.from_list(train_data)

# Define Training Arguments

In [None]:
peft_config = LoraConfig(
    r=8,                        # low-rank dimension
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# ---- Step 4: Training arguments ----
training_args = TrainingArguments(
    output_dir=f'{model_path}-SFT-Levi-test',
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-5,
    num_train_epochs=50,
    logging_steps=10,
    save_steps=100,
    save_total_limit=2,
    bf16=True
)

# ---- Step 5: Trainer with PEFT/LoRA ----
updated_data = [
    {"messages": conv} for conv in train_data   # conv is your list of role/content dicts
]

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    processing_class=tokenizer,
    args=training_args,
    peft_config=peft_config   # <<< LoRA here
)

# Actually Train the Model

In [11]:
# Train Model
trainer.train()

# Then Save
trainer.model.save_pretrained(training_args.output_dir)
tokenizer.save_pretrained(training_args.output_dir)
print(f"✅ LoRA adapter saved to {training_args.output_dir}")

# Save merged Model (so don't have to marge and unload when using later)
merged_model = trainer.model.merge_and_unload()
final_model_dir = f"{training_args.output_dir}-merged"
merged_model.save_pretrained(final_model_dir)
tokenizer.save_pretrained(final_model_dir)

('models/llama-3.2-3b-instruct-SFT-Levi-test-merged/tokenizer_config.json',
 'models/llama-3.2-3b-instruct-SFT-Levi-test-merged/special_tokens_map.json',
 'models/llama-3.2-3b-instruct-SFT-Levi-test-merged/chat_template.jinja',
 'models/llama-3.2-3b-instruct-SFT-Levi-test-merged/tokenizer.json')

# Now mess around with my data just to get used to transformers

### Load in Model

In [9]:
model_path = "models/llama-3.2-3b-instruct"

# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

# load model (using float16 + device map for GPU efficiency)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto" 
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Now Test the Model

In [25]:
n = 14

# Model
df_val.loc[n, 'advanced_text']
prompt = gen_prompt_no_shot(df_val.loc[n, 'advanced_text'], features, json_template)

# convert messages into a proper model input
prompt = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)

# tokenize
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# generate response
outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)

# decode to string
generated_tokens = outputs[0][inputs["input_ids"].shape[-1]:]
response = tokenizer.decode(generated_tokens, skip_special_tokens=False)

# 
response

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


'{"asthma": "no","smoking": "no","pneu": "no","common_cold": "yes","pain": "no","fever": "no","antibiotics": "no"}<|eot_id|>'

### Make Our Training Arguments