# Fine Tuning use LoRA
<hr>

In [1]:
import pandas as pd

df = pd.read_parquet("hf://datasets/TachyHealth/ADA_Dental_Code_to_SBS_V2/data/train-00000-of-00001.parquet")

In [4]:
# display the first 5 rows of the dataframe
print(df.head())
# display the column names of the dataframe
print(df.columns)

   ADA Code                      Description SBS V2.0 Code   \
0      11.0   Comprehensive oral examination    97011-00-00   
1      12.0        Periodic oral examination    97012-00-00   
2      13.0       Oral examination – limited    97013-00-00   
3       NaN                             None    97011-00-10   
4       NaN                             None    97011-00-40   

                                Short description  \
0                  Comprehensive oral examination   
1                              Periodic oral exam   
2                              Limited oral exam    
3  Oral examination; post operative re-evaluation   
4       Oral evaluation; under three years of age   

                                    Long Description  Block Comments/Guidance  
0                     Comprehensive oral examination    450              None  
1                          Periodic oral examination    450              None  
2                           Limited oral examination    450   

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-1.5B",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
    "Qwen/Qwen2.5-1.5B",
    trust_remote_code=True,
    use_fast=False
)
tokenizer.pad_token = tokenizer.eos_token

# 2. Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj",
        "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj"
    ],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)
model = model.to(device)

# 3. Load and format dataset
dataset = load_dataset("TachyHealth/ADA_Dental_Code_to_SBS_V2", split="train")

def format_prompt(example):
    if not example.get("ADA Code") or not example.get("Short description") or not example.get("Long Description"):
        return None
    prompt = f"""### Instruction:
Given a dental procedure code and its short name, explain what it means in detail so a patient can understand.

### Code:
{int(example['ADA Code'])} - {example['Short description']}

### Response:
{example['Long Description']}"""
    tokenized = tokenizer(prompt, padding="max_length", truncation=True, max_length=512)
    return {
        "input_ids": tokenized["input_ids"],
        "attention_mask": tokenized["attention_mask"],
        "labels": [token if token != tokenizer.pad_token_id else -100 for token in tokenized["input_ids"]]
    }

tokenized_dataset = dataset.map(format_prompt, remove_columns=dataset.column_names)
tokenized_dataset = tokenized_dataset.filter(lambda x: x and x["input_ids"] is not None)

# 4. Collate function
def collate_fn(batch):
    return {
        "input_ids": torch.tensor([item["input_ids"] for item in batch], dtype=torch.long),
        "attention_mask": torch.tensor([item["attention_mask"] for item in batch], dtype=torch.long),
        "labels": torch.tensor([item["labels"] for item in batch], dtype=torch.long),
    }

# 5. DataLoader, optimizer, scheduler
train_loader = DataLoader(tokenized_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
lr_scheduler = get_scheduler("linear", optimizer, 0, len(train_loader))

# 6. Training loop
model.train()
for epoch in range(1):
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}")
    for batch in loop:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        loop.set_postfix(loss=loss.item())

# 7. Save adapter
model.save_pretrained("./qwen2.5-lora-dental")

# Fine Tuning for ADA code

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



In [2]:
# 1. Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-1.5B",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
    "Qwen/Qwen2.5-1.5B",
    trust_remote_code=True,
    use_fast=False
)
tokenizer.pad_token = tokenizer.eos_token

# 2. Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj",
        "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj"
    ],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)
model = model.to(device)



Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Could not load bitsandbytes native library: 'NoneType' object has no attribute 'split'
Traceback (most recent call last):
  File "c:\Users\shirl\Desktop\Dev\my_finetune_project\venv_finetune\Lib\site-packages\bitsandbytes\cextension.py", line 85, in <module>
    lib = get_native_library()
          ^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\shirl\Desktop\Dev\my_finetune_project\venv_finetune\Lib\site-packages\bitsandbytes\cextension.py", line 64, in get_native_library
    cuda_specs = get_cuda_specs()
                 ^^^^^^^^^^^^^^^^
  File "c:\Users\shirl\Desktop\Dev\my_finetune_project\venv_finetune\Lib\site-packages\bitsandbytes\cuda_specs.py", line 39, in get_cuda_specs
    cuda_version_string=(get_cuda_version_string()),
                         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\shirl\Desktop\Dev\my_finetune_project\venv_finetune\Lib\site-packages\bitsandbytes\cuda_specs

In [3]:
# 3. Load and format dataset
dataset = load_dataset("TachyHealth/ADA_Dental_Code_to_SBS_V2", split="train")

def format_prompt(example):
    if not example.get("ADA Code") or not example.get("Short description") or not example.get("Long Description"):
        return None
    prompt = f"""### Instruction:
Given a dental procedure code and its short name, explain what it means in detail so a patient can understand.

### Code:
{int(example['ADA Code'])} - {example['Short description']}

### Response:
{example['Long Description']}"""
    tokenized = tokenizer(prompt, padding="max_length", truncation=True, max_length=512)
    return {
        "input_ids": tokenized["input_ids"],
        "attention_mask": tokenized["attention_mask"],
        "labels": [token if token != tokenizer.pad_token_id else -100 for token in tokenized["input_ids"]]
    }

tokenized_dataset = dataset.map(format_prompt, remove_columns=dataset.column_names)
tokenized_dataset = tokenized_dataset.filter(lambda x: x and x["input_ids"] is not None)



Map:   0%|          | 0/594 [00:00<?, ? examples/s]

Filter:   0%|          | 0/341 [00:00<?, ? examples/s]

In [4]:
# 4. Collate function
def collate_fn(batch):
    return {
        "input_ids": torch.tensor([item["input_ids"] for item in batch], dtype=torch.long),
        "attention_mask": torch.tensor([item["attention_mask"] for item in batch], dtype=torch.long),
        "labels": torch.tensor([item["labels"] for item in batch], dtype=torch.long),
    }

# 5. DataLoader, optimizer, scheduler
train_loader = DataLoader(tokenized_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
lr_scheduler = get_scheduler("linear", optimizer, 0, len(train_loader))



In [5]:
# 6. Training loop
model.train()
for epoch in range(1):
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}")
    for batch in loop:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        loop.set_postfix(loss=loss.item())

# 7. Save adapter
model.save_pretrained("./qwen2.5-lora-dental")

Epoch 1: 100%|██████████| 341/341 [1:01:58<00:00, 10.90s/it, loss=1]    
