In [1]:
!pip install transformers accelerate bitsandbytes peft torch torchvision torchaudio datasets

Collecting transformers
  Downloading transformers-4.48.3-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-1.3.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting peft
  Downloading peft-0.14.0-py3-none-any.whl.metadata (13 kB)
Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting huggingface-hub<1.0,>=0.24.0 (from transformers)
  Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokeniz

In [10]:
import os
import torch
from datasets import load_dataset, DatasetDict
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    AutoConfig,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    LlamaConfig,
    DataCollatorForSeq2Seq,
    EarlyStoppingCallback
) 
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import shutil
import zipfile
from huggingface_hub import login

In [None]:
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
DATASET_NAME = "ShivomH/Medical-and-Mental-Health"
HF_TOKEN = "your_access_token(API)"
OUTPUT_DIR = "elixir-llama3b-health"
MAX_SEQ_LENGTH = 1024

In [5]:
# LoRA Configuration
LORA_RANK = 64 
LORA_ALPHA = 128
LORA_DROPOUT = 0.1
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "up_proj", "down_proj"]

In [None]:
# Training Parameters
BATCH_SIZE = 8
EPOCHS = 3
LEARNING_RATE = 1e-4
WARMUP_RATIO = 0.05
GRAD_ACCUM_STEPS = 1
OPTIMIZER = "paged_adamw_32bit"

In [7]:
# Dataset Splitting
TRAIN_RATIO = 0.90
VAL_RATIO = 0.07
TEST_RATIO = 0.03

In [8]:
login(token=HF_TOKEN, add_to_git_credential=True)
# os.makedirs(OUTPUT_DIR, exist_ok=True)

Token has not been saved to git credential helper.


[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m


In [9]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,  
    llm_int8_has_fp16_weight=False 
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    token=HF_TOKEN
)

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    token=HF_TOKEN,
    model_max_length=MAX_SEQ_LENGTH
)
tokenizer.pad_token = tokenizer.eos_token

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [11]:
# Prepare Model
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

peft_config = LoraConfig(
    r=LORA_RANK,
    lora_alpha=LORA_ALPHA,
    target_modules=TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
    modules_to_save=["lm_head", "embed_tokens"]
)
model = get_peft_model(model, peft_config)

In [12]:
# Dataset Preparation
dataset = load_dataset(DATASET_NAME, token=HF_TOKEN)['train']

# Stratified split preserving conversation types
split = dataset.train_test_split(test_size=VAL_RATIO+TEST_RATIO, seed=42)
val_test = split['test'].train_test_split(
    test_size=TEST_RATIO/(VAL_RATIO+TEST_RATIO), 
    seed=42
)

dataset = DatasetDict({
    'train': split['train'],
    'validation': val_test['train'],
    'test': val_test['test']
})

README.md:   0%|          | 0.00/835 [00:00<?, ?B/s]

llama3_final_dataset.jsonl:   0%|          | 0.00/201M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/75477 [00:00<?, ? examples/s]

In [13]:
# Tokenization
def preprocess_function(examples):
    # Tokenize the input text
    inputs = tokenizer(
        examples["text"],
        padding="max_length",  
        truncation=True,
        max_length=MAX_SEQ_LENGTH,
        add_special_tokens=False
    )

    # Labels should be same as input_ids, but pad tokens should be ignored (-100)
    inputs["labels"] = inputs["input_ids"].copy()
    inputs["labels"] = [
        [(label if label != tokenizer.pad_token_id else -100) for label in labels]
        for labels in inputs["labels"]
    ]

    return inputs

tokenized_dataset = dataset.map(
    preprocess_function, 
    batched=True,
    remove_columns=["text"]
)

Map:   0%|          | 0/67929 [00:00<?, ? examples/s]

Map:   0%|          | 0/5283 [00:00<?, ? examples/s]

Map:   0%|          | 0/2265 [00:00<?, ? examples/s]

In [None]:
# Training Setup 
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    pad_to_multiple_of=8,
    padding=True
)

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=EPOCHS + 1,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACCUM_STEPS,
    eval_accumulation_steps = 1,
    eval_strategy="steps",
    eval_steps=8000,
    save_strategy="steps",
    save_steps=8000,
    logging_steps=2000,
    learning_rate=LEARNING_RATE,
    weight_decay=0.05,
    bf16=True,
    fp16=False,
    max_grad_norm=0.3,
    warmup_ratio=WARMUP_RATIO,
    lr_scheduler_type="cosine",
    optim=OPTIMIZER,
    gradient_checkpointing=True,
    report_to="none",
    remove_unused_columns=False,
    group_by_length=False,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
)

In [18]:
# Training Execution
# print("Starting Llama-3B fine-tuning...")
# trainer.train()

Starting Llama-3B fine-tuning...




Step,Training Loss,Validation Loss
2000,0.806,0.816207
4000,0.7825,0.785775
6000,0.7549,0.766011
8000,0.7379,0.750552




KeyboardInterrupt: 

In [None]:
print("Resuming Training of Elixir-Llama3...")
trainer.train(resume_from_checkpoint="elixir-llama3b-health/checkpoint-33966")

Resuming Training of Elixir-Llama3...


	logging_steps: 2000 (from args) != 500 (from trainer_state.json)
	eval_steps: 4000 (from args) != 2000 (from trainer_state.json)


Step,Training Loss,Validation Loss
10000,0.7267,0.732127
12000,0.6156,0.723635
14000,0.6093,0.716406
16000,0.6047,0.705376
18000,0.6043,0.693454
20000,0.5812,0.678173
22000,0.5871,0.665541
24000,0.4879,0.67942
26000,0.4755,0.674529
28000,0.4709,0.670396


Could not locate the best model at elixir-llama3b-health/checkpoint-22000/pytorch_model.bin, if you are running a distributed training on multiple nodes, you should activate `--save_on_each_node`.


TrainOutput(global_step=33966, training_loss=0.42852340434989217, metrics={'train_runtime': 69673.1559, 'train_samples_per_second': 2.925, 'train_steps_per_second': 0.488, 'total_flos': 4.1052910159512207e+18, 'train_loss': 0.42852340434989217, 'epoch': 3.0})

In [26]:
print("Saving final model...")

BASE_MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
LORA_DIR = "Elixir_Llama3_LoRA"  # Folder where LoRA weights will be saved
MERGED_DIR = "Elixir_Llama3_Merged" # Final merged model directory

# Save LoRA model and tokenizer
trainer.save_model(LORA_DIR)  
tokenizer.save_pretrained(LORA_DIR)

Saving final model...


('Elixir_Llama3_LoRA/tokenizer_config.json',
 'Elixir_Llama3_LoRA/special_tokens_map.json',
 'Elixir_Llama3_LoRA/tokenizer.json')

In [27]:
# Load the base model
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [30]:
from peft import PeftModel

# Load LoRA-adapted model
lora_model = PeftModel.from_pretrained(model, LORA_DIR)

# Merge LoRA adapters into the base model
merged_model = lora_model.merge_and_unload()

In [31]:
merged_model.save_pretrained(MERGED_DIR)
tokenizer.save_pretrained(MERGED_DIR)

('Elixir_Llama3_Merged/tokenizer_config.json',
 'Elixir_Llama3_Merged/special_tokens_map.json',
 'Elixir_Llama3_Merged/tokenizer.json')