In [None]:
%%capture
!mamba install --force-reinstall aiohttp -y
!pip install -U "xformers<0.0.26" --index-url https://download.pytorch.org/whl/cu121
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"

# Temporary fix for https://github.com/huggingface/datasets/issues/6753
!pip install datasets==2.16.0 fsspec==2023.10.0 gcsfs==2023.10.0

import os
os.environ["WANDB_DISABLED"] = "true"

Loading the base model

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 
dtype = None # None for auto detection
load_in_4bit = True # 4bit quantization


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit", 
    max_seq_length = max_seq_length, 
    dtype = dtype, 
    load_in_4bit = load_in_4bit
)

We now add LoRA adapters so we only need to update a percentage of all parameters depending on "r". 


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, #Used 8 
    # The rank “r” is a user parameter, the less number of parameters to train and bigger the savings on compute.  
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32, # amount of weightage of Lora weights on the base model 
    lora_dropout = 0, 
    bias = "none",   
    use_gradient_checkpointing = "unsloth", 
    random_state = 3407,
    use_rslora = False,  # rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Loading the data, splitting it into test-train and structuring it in the right format for the model

In [None]:
medical_prompt = """You are a medical expert providing accurate and relevant answers to medical inquiries based on your specialized knowledge. Always start your answer saying "Hello, welcome to the medical chat forum,"

### Example-1: 

**Input:**  Hello doctor,My friend aged 30 had two drops of phenol mistaking for milk. He vomited and had lot of salt water. Please advice for any side effect.

**Response:**  1. The side effects of phenol include irritation to the eyes, skin rashes, increased heart rate, respiratory depression, nausea, dizziness, headache, sweating, tremors, seizures, coma, and even death.2. Since your friend has already taken it, he should be fine. There is no need to worry about any long-term damage.3. However, if he develops symptoms such as rapid breathing, low blood pressure, chest pain or discomfort, confusion, extreme sleepiness, irregular heartbeats, severe headaches, or vomiting, then he needs immediate medical attention. Hope I have answered your query. Let me know if I can assist you further.

### Example-2: 

**Input:** Hi doctor,I have been treated with for a yeast infection and given antibiotics last month end and then last week had another infection (milky like discharge no smell). Is it advisable to wait for one month to be seen by my gynecologist or go to an urgent care right away?

**Response:** 1) yeast infections can occur again if not cured properly.2) It is better to get examined by your doctor and take proper treatment at the earliest to avoid complications.3) You may also need blood tests such as CBC, ESR, FBS, PPS, etc., as well as culture and sensitivity test of the vaginal swab to confirm the diagnosis.4) The best way to prevent recurrent yeast infections is to keep the area clean and Chat Doctor. 5) Avoid wearing tight-fitting panties, especially those made of synthetic materials.6) Take more of probiotic food items like yogurt, buttermilk, curd, etc.

### Example-3: 

**Input:** Hello doctor,I have spastic paraplegia. Everything I have tried has not worked. I was told to try a medication called L-Dopa. Will this medication help me?

**Response:** 1) Dopamine is a chemical in the brain that helps nerve cells send signals. It also helps control movement. L-dopa is converted into dopamine in the body and can be used for treating Parkinsonism (which includes tremors, stiffness, slow movements and muscle rigidity).2) Yes, it will help you. But there are certain precautions to take while using it as follows:1) Do not use if you are allergic to any of its ingredients. 2) Avoid alcohol when taking this medicine.3) Take with food or milk to avoid stomach upset.4) You may need to increase your dose slowly over time.5) This medicine may cause dizziness, lightheadedness, or fainting. Do not drive, operate machinery, or do other dangerous activities until you know how this medicine affects you.

#####
Follow the following rule:
- Always start your answer saying "Hello, welcome to the medical chat forum,"
- Do not say "Hello, welcome to Chat Doctor forum", "Hi, welcome to Chat Doctor forum", or "I am Chat Doctor" in your answer. 
- Provide your answer in numbered points.


### Input:
{}


### Response:
 {}
"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    #instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["answer_chatdoctor"]
    texts = []
    for input, output in zip(inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = medical_prompt.format(input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass


from datasets import load_dataset, DatasetDict, Dataset
from sklearn.model_selection import train_test_split

dataset = load_dataset("Sid404/medical_data", split = "train")

df = dataset.to_pandas()

# Split the DataFrame into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Convert the DataFrames back to Dataset objects
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

train_dataset = train_dataset.map(formatting_prompts_func, batched = True)
test_dataset = test_dataset.map(formatting_prompts_func, batched = True)

Training parameters 

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    dataset_text_field = "text",
    eval_dataset = test_dataset, 
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, 
    args = TrainingArguments(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs=1,
        learning_rate = 4e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
        save_strategy = "steps",
        save_steps = 1/16 #Checkpoints 
    )
)

In [None]:
trainer_stats = trainer.train()

In [None]:
eval_stats = trainer.evaluate()

Manual Inference: 

In [None]:
FastLanguageModel.for_inference(model) 
inputs = tokenizer(
[
    medical_prompt.format(
        "Hello doctor, I have acne scars. What medical treatments should I use?",  # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 150, use_cache = True)
text = tokenizer.batch_decode(outputs)
parts = text[0].split("### Response:")

# Get the response part and clean it
response = parts[1].split("### Input:")[0].strip()

Now we shall evalute how this fine-tuned model performs on the test data. 

In [None]:
fine_tuned = []

for i in range(len(test_dataset["text"])):
    print(i)
    inputs = tokenizer(
[
    medical_prompt.format(
        test_dataset["input"][i], # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

    outputs = model.generate(**inputs, max_new_tokens = 250, use_cache = True)
    text = tokenizer.batch_decode(outputs)
    parts = text[0].split("### Response:")

    # Get the response part and clean it
    fine_tuned.append(parts[1].split("### Input:")[0].strip())
    

Using BERTScore and BLEUscore to evaluate its performance (Fine-tuned Model)

In [None]:
import statistics
from evaluate import load
bertscore = load("bertscore")
results_1 = bertscore.compute(
    predictions=fine_tuned, references=test_dataset["answer_chatdoctor"], lang="en")


print(statistics.mean(results_1["precision"]))
print(statistics.mean(results_1["recall"]))
print(statistics.mean(results_1["f1"]))

In [None]:
bleu = load("bleu")
results_2 = bleu.compute(predictions=fine_tuned,
                         references=test_dataset["answer_chatdoctor"])

Loading the base model to compare its responses for the test data with the fine-tuned responses

In [None]:
model_base, tokenizer_base = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit", 
    max_seq_length = max_seq_length, 
    dtype = dtype, 
    load_in_4bit = load_in_4bit #4-bit quantization
)

In [None]:
base = []

for i in range(len(test_dataset["text"])):
    print(i)
    inputs = tokenizer(
[
    medical_prompt.format(
        test_dataset["input"][i], # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

    outputs = model.generate(**inputs, max_new_tokens = max_seq_length, use_cache = True)
    text = tokenizer.batch_decode(outputs)
    parts = text[0].split("### Response:")

    # Get the response part and clean it
    base.append(parts[1].split("### Input:")[0].strip())

Using BERTScore and BLEUscore to evaluate its performance (Base Model)

In [None]:
results_1_base = bertscore.compute(
    predictions=base, references=test_dataset["answer_chatdoctor"], lang="en")


print(statistics.mean(results_1_base["precision"]))
print(statistics.mean(results_1_base["recall"]))
print(statistics.mean(results_1_base["f1"]))

In [None]:
bleu = load("bleu")
results_2_base = bleu.compute(predictions=base,
                         references=test_dataset["answer_chatdoctor"])