In [None]:

!pip install -q -U bitsandbytes
!pip install -q -U transformers
!pip install -q -U xformers
!pip install -q -U peft
!pip install -q -U accelerate
!pip install -q -U datasets
!pip install -q -U trl
!pip install -q -U einops
!pip install -q -U nvidia-ml-py3
!pip install -q -U huggingface_hub

In [None]:
from datasets import load_dataset
dataset = load_dataset("ZappY-AI/MASHQA-JSON", split="train[:20%]")
dataset

In [None]:
def create_prompt(sample):
  system_prompt_template = """[INST]Act as a Multiple Answer Spans Healthcare Question Answering helpful assistant and answer the user's questions in details with reasoning. Do not give any false information. In case you don't have answer, specify why the question can't be answered.

### Question:
{question}

### Answer:
"""
  user_message = sample['question']
  user_response = sample['answer']
  prompt_template = system_prompt_template.replace("<>",f"{user_message}").replace("<>",f"{user_response}")

  return {"inputs":prompt_template}

#
instruct_tune_dataset = dataset.map(create_prompt)
print(instruct_tune_dataset)
     

In [None]:
instruct_tune_dataset[0]

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from pynvml import *
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
import time, torch

def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")

In [None]:
base_model_id = "mistralai/Mistral-7B-v0.1"

#Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_eos_token=True, use_fast=True, max_length=250)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token

compute_dtype = getattr(torch, "float16") #change to bfloat16 if are using an Ampere (or more recent) GPU
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
          base_model_id, trust_remote_code=True, quantization_config=bnb_config, revision="refs/pr/23", device_map={"": 0}, torch_dtype="auto", flash_attn=True, flash_rotary=True, fused_dense=True
)
print(print_gpu_utilization())

model = prepare_model_for_kbit_training(model)
     

In [None]:

peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.05,
        r=16,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules= ["Wqkv", "out_proj"])

In [None]:
training_arguments = TrainingArguments(
        output_dir="./mistral-MASHQA-results2",
        save_strategy="epoch",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=12,
        log_level="debug",
        save_steps=100,
        logging_steps=25,
        learning_rate=1e-4,
        eval_steps=50,
        optim='paged_adamw_8bit',
        fp16=True, #change to bf16 if are using an Ampere GPU
        num_train_epochs=3,
        max_steps=500,
        warmup_steps=100,
        lr_scheduler_type="linear",
        seed=42)

In [None]:

train_dataset = instruct_tune_dataset.map(batched=True,remove_columns=['answer', 'question'])
train_dataset

In [None]:

trainer = SFTTrainer(
        model=model,
        train_dataset=train_dataset,
        #eval_dataset=dataset['test'],
        peft_config=peft_config,
        dataset_text_field="inputs",
        max_seq_length=512,
        tokenizer=tokenizer,
        args=training_arguments,
        packing=False
)

In [None]:

trainer.train()

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
from huggingface_hub import notebook_login

notebook_login()

trainer.push_to_hub(commit_message="fine-tuned adapter")

In [None]:
import matplotlib.pyplot as plt

# Manually input the data from the provided metrics
steps = [25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400, 425, 450, 475]
training_loss = [1.1485, 1.0147, 0.7141, 0.6297, 0.6046, 0.5911, 0.5822, 0.5772, 0.5741, 0.565, 0.5635, 0.5659, 0.5514, 0.5472, 0.561, 0.5643, 0.5606, 0.5521, 0.5567]

# Create the plot
plt.figure(figsize=(10, 6))
plt.plot(steps, training_loss, marker='o', linestyle='-', color='b')

# Add title and labels
plt.title('Mistral-MASHQA\nQLoRA Training Loss Over Time', fontsize=16)
plt.xlabel('Training Steps', fontsize=14)
plt.ylabel('Training Loss', fontsize=14)

# Add grid for better readability
plt.grid(True)

# Customize ticks
plt.xticks(steps, rotation=45)
plt.yticks(fontsize=12)

# Add a legend
plt.legend(['Training Loss'], fontsize=12)

# Add annotation to indicate minimum loss
min_loss_index = training_loss.index(min(training_loss))
plt.annotate(f'Min Loss: {min(training_loss):.4f}', xy=(steps[min_loss_index], training_loss[min_loss_index]),
             xytext=(steps[min_loss_index] + 25, training_loss[min_loss_index] + 0.05),
             arrowprops=dict(facecolor='black', shrink=0.05), fontsize=12)

# Show the plot
plt.tight_layout()
plt.show()
     
