INSTALLING REQUIRED LIBRARIES

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [None]:
!pip install evaluate

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

CHECKING GPU

In [None]:
import torch
print(torch.cuda.is_available())
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))


LOGIN TO HUGGING FACE FOR ACCESS TO LOCKED MODELS/DATASETS AND UPLOADING

In [None]:
import huggingface_hub
huggingface_hub.login()

SETTING ALL THE PARAMETERS FOR FOLLOWING CODE

r = 16 during LORA

using 4 bit quantization

In [None]:
# The model that you want to train from the Hugging Face hub
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

# Fine-tuned model name
new_model = "llama-3-8B-FineTuned"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 16

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 8-bit precision base model loading
use_4bit = True  # Set to False for 8-bit
use_8bit = False   # Set to True for 8-bit

# Compute dtype for 8-bit base models
bnb_4bit_compute_dtype = "float16"  # Adjust as needed

# Quantization type (fp4 or nf4) for 8-bit
bnb_4bit_quant_type = "nf4"  # Adjust as needed

# Activate nested quantization for 8-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 2

# Batch size per GPU for evaluation
per_device_eval_batch_size = 2

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}




USING AVAILABLE MEDICAL DATASET

In [None]:
data_name = "ruslanmv/ai-medical-chatbot"

data = load_dataset(data_name, split="train")

data

In [None]:
data[3]

{'Description': 'Q. Why do I have uncomfortable feeling between the middle of my spine and left shoulder blade?',
 'Patient': 'Hello doctor,I am having an uncomfortable feeling in between the middle of my spine and left shoulder blade. It seems to get stiff, and my bones pop a lot around there, and it is very uncomfortable to sit in specific ways feels like my bones shift, well the other night it seemed as if my shoulder would pop out of place causing more stiffness after a while of dealing with it. I ended up feeling a popping sensation, where the stiffness was an instant relief. It was so shocking it put my anxiety through the roof. I thought I was going to die because my whole body was shaking and tingling. What am I dealing with? Right now I have no pain or tender near that area still relief but curious on what happened and why I got this shock and that popping feeling so loud it scared me half to death.',
 'Doctor': 'Hello. The popping and discomfort what you felt is either becaus

CONVERTING DATA TO FORM AS PRESCRIBED IN MOEL CARD FOR FINE TUNING

In [None]:
from datasets import load_dataset, Dataset
import random

# Assuming you have loaded your dataset into a variable called `data`
# If the dataset is not loaded, you can load it from a file or source.
# data = load_dataset('path_to_your_dataset')

# Define the transformation function
def transform_entry(entry):
    text = f"<s>[INST] {entry['Patient']} [/INST] {entry['Doctor']} </s>"
    return {"text": text}

# Apply the transformation to the dataset
transformed_data = data.map(transform_entry, remove_columns=['Description', 'Patient', 'Doctor'])

# Sample 50,000 random rows
sampled_indices = random.sample(range(len(transformed_data)), 1000)
sampled_data = transformed_data.select(sampled_indices)

# Create the new dataset with the 'text' feature
new_dataset = Dataset.from_dict({"text": sampled_data['text']})

# Print the new dataset format to verify
print(new_dataset)

# Optionally, save the new dataset to disk
# new_dataset.save_to_disk('path_to_save_new_dataset')

NOTE- DATA SEEMS TO HAVE PARTS WHICH ARE NOT IDEAL

In [None]:
new_dataset[5]

{'text': "<s>[INST] Hello, I am taking a water pill for high blood pressure (hydrocholorot). I recently had a root canal. I am taking Advil for the pain and penicillin for the bone infection. I have noticed a sharp spike in my resting blood pressure. Which drug is causing the spike [/INST] Welcome to ' Ask a doctor ' service .I have reviewed your query and here is my answer .Please note that all pain killers and antiallergic cause decreased effect og drugs given for high blood pressure .Penicillin has no such effect .I hope I have solved your query .Let me know if I can assist you further in this query .With regards dr varinder joshi </s>"}

SETTING UP QUANTIZATION

In [None]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)  # Adjusted for 8-bit

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    load_in_8bit=use_8bit,  # Added for 8-bit
    bnb_4bit_quant_type=bnb_4bit_quant_type,  # Adjusted for 8-bit
    bnb_4bit_compute_dtype=compute_dtype,  # Adjusted for 8-bit
    bnb_4bit_use_double_quant=use_nested_quant,  # Adjusted for 8-bit
)

LOADING A QUANTIZED MODEL

In [None]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForCausalLM were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B-Instruct and are newly initialized: ['model.layers.11.self_attn.rotary_emb.inv_freq', 'model.layers.17.self_attn.rotary_emb.inv_freq', 'model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.22.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.15.self_attn.rotary_emb.inv_freq', 'model.layers.5.self_attn.rotary_emb.inv_freq', 'model.layers.10.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layers.27.self_attn.rotary_emb.inv_freq', 'model.layers.14.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn.rotary_emb.inv_freq', 'model.layers.0.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.30.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.r

In [None]:
model_name

'meta-llama/Meta-Llama-3-8B-Instruct'

In [None]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

ADDING SOME EXTRA ELEMNTS TO TOKENIZER BECAUSE WAS REQUIRED FOR EVALUATION

In [None]:
# Add special tokens
special_tokens_dict = {'additional_special_tokens': ['<s>', '[INST]', '[/INST]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

# Resize the model embeddings to account for new tokens
model.resize_token_embeddings(len(tokenizer))

Embedding(128259, 4096)

TESTING PERFORMANCE BEFORE TRAINING

In [None]:
prompt = "Q. Why do I have uncomfortable feeling between the middle of my spine and left shoulder blade??"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

SETTING THE LORA CONFIGURATION

In [None]:
# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

In [None]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=new_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

In [None]:
# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,3.1071
50,2.9092
75,2.6795
100,2.852
125,2.6256
150,2.6803
175,2.6789
200,2.5377
225,2.5884
250,2.6304


CHECKING PERFORMANCE AFTER TRAINING

In [None]:
prompt = "Q. Why do I have uncomfortable feeling between the middle of my spine and left shoulder blade??"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [None]:
from transformers import AutoModelForQuestionAnswering, pipeline
from datasets import load_dataset
from evaluate import evaluator
from transformers import AutoModelForSequenceClassification, pipeline

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
qa_pipeline = pipeline("question-answering", model=model,tokenizer=tokenizer)

task_evaluator = evaluator("question-answering")
# Assuming you already have data loaded (data is not necessary for QA inference)
# Example data initialization (for reference)
# data = load_dataset("imdb", split="test").shuffle(seed=42).select(range(1000))

# Evaluate your model using the QA pipeline
eval_results = task_evaluator.compute(
    model_or_pipeline=qa_pipeline,
    data=data,  # Replace with your actual data for evaluation if needed
    metric="accuracy"  # Adjust metrics according to QA evaluation requirements
)

# Print or use eval_results as needed
print(eval_results)


In [None]:
# Upload the fine-tuned model to Hugging Face Hub
from huggingface_hub import HfApi, HfFolder, Repository

UPLOADING MODEL TO HF WITH APPROPRAITE FILES

In [None]:
# Save trained model and tokenizer
trainer.model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

# Ensure 'adapter_config.json' is renamed to 'config.json'
model_path = os.path.join(new_model, "adapter_config.json")
if os.path.exists(model_path):
    os.rename(model_path, os.path.join(new_model, "config.json"))


# Rename 'adapter_model.bin' to 'pytorch_model.bin'
adapter_model_path = os.path.join(new_model, "adapter_model.bin")
pytorch_model_path = os.path.join(new_model, "pytorch_model.bin")
if os.path.exists(adapter_model_path):
    os.rename(adapter_model_path, pytorch_model_path)

# Upload the fine-tuned model to Hugging Face Hub
username = "PradyumSomebody"
repo_name = "finetunedLlamaTest2.2-llama-3"
repo_id = f"{username}/{repo_name}"

api = HfApi()
api.create_repo(repo_id=repo_id, exist_ok=True)
api.upload_folder(
    folder_path=new_model,
    path_in_repo="",
    repo_id=repo_id,
    commit_message="Upload fine-tuned model"
)

print(f"Model uploaded to Hugging Face Hub: https://huggingface.co/{repo_id}")

adapter_model.bin:   0%|          | 0.00/27.3M [00:00<?, ?B/s]

Model uploaded to Hugging Face Hub: https://huggingface.co/PradyumSomebody/finetunedLlamaTest2.1-llama-3
