In [None]:
!pip install -q accelerate peft bitsandbytes transformers trl datasets torch

In [None]:
# Imports
import accelerate
import peft
import bitsandbytes
import transformers
import trl
import datasets

import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import TrainingArguments
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
import warnings


warnings.filterwarnings('ignore')

In [None]:
if torch.cuda.is_available():
    print("GPUs number:", torch.cuda.device_count())
    print("GPU Model:", torch.cuda.get_device_name(0))
    print("GPU Total Memory [GB]:",torch.cuda.get_device_properties(0).total_memory / 1e9)

In [None]:
# GPU Memory reset (If needed)
from numba import cuda
device = cuda.get_current_device()
device.reset()

In [None]:
ft_dataset = load_dataset("nlpie/Llama2-MedTuned-Instructions")

# Using only a small part of the total dataset
# In a professional enviroment, we could use this to test the viability of the fine-tunning
# So we could first train with an small sample
# And then, proceed to train with all data, which will take way more time
ft_dataset["train"] = ft_dataset["train"].select(range(3500))
ft_dataset["test"] = ft_dataset["train"].select(range(300))

In [None]:
# Prompt is simply the input text, just a fancy name for it
def create_prompt(sample):
    prompt = sample["instruction"]
    prompt += sample["input"]
    single_turn_prompt = f"Instruction: {prompt}<|end_of_turn|>AI Assistant: {sample['output']}"

    return single_turn_prompt

In [None]:
# Quantization configuration for the LM
# Will use 4-bit quantization, to minimize memory usage
# While maintaing reasonable performance
bnb_config = BitsAndBytesConfig(load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

In [None]:
# LLM Based on Mistral-7b
# Trained only in english
hf_repo = "berkeley-nest/Starling-LM-7B-alpha"

llm_model = AutoModelForCausalLM.from_pretrained(hf_repo,
    quantization_config=bnb_config,
    device_map="auto",
    use_cache=False)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(hf_repo)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
def generate_answer_before_fine_tunning(prompt, model):
    encoded_input = tokenizer(prompt,
        return_tensors="pt",
        add_special_tokens=True,
    )

    model_inputs = encoded_input.to("cuda")
    generated_ids = model.generate(**model_inputs,
        max_new_tokens=1024,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
    )

    decoded_output = tokenizer.batch_decode(generated_ids)

    return decoded_output[0].replace(prompt, "")

In [None]:
prompt = """Instruction: Your goal is to determine the relationship between the two provided clinical sentences and classify them into one of the following categories:
Contradiction: If the two sentences contradict each other. Neutral: If the two sentences are unrelated to each other. Entailment: If one of the sentences logically entails the other. """
prompt += '''Sentence 1: For his hypotension, autonomic testing confirmed orthostatic hypotension. Sentence 2: the patient has orthostatic hypotension <|end_of_turn|>'''
prompt += "AI Assistant:"

generate_answer_before_fine_tunning(prompt, llm_model)

In [None]:
# LoRA (Low-Rank Adaptation):
## A technique for adapting pre-trained models by adding low-rank weight updates,
## reducing memory and compute costs.

# PEFT (Parameter-Efficient Fine-Tuning):
## A method to fine-tune large models efficiently by modifying only a small subset of parameters,
## improving adaptability with minimal resource usage.
peft_config = LoraConfig(r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

llm_model = prepare_model_for_kbit_training(llm_model)

llm_model = get_peft_model(llm_model, peft_config)

In [None]:
from trl import SFTConfig

In [None]:
training_arguments = SFTConfig(output_dir="adjusted_model",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    save_strategy="epoch",
    logging_steps=10,
    num_train_epochs=1,
    max_steps=250,
    fp16=True,
    max_seq_length=512,
    packing=True
)

trainer = SFTTrainer(model=llm_model,
    peft_config=peft_config,
    tokenizer=tokenizer,
    formatting_func=create_prompt,
    args=training_arguments,
    train_dataset=ft_dataset["train"],
    eval_dataset=ft_dataset["test"],
)

In [None]:
%%time
trainer.train()

In [None]:
trainer.save_model("adjusted_model")

In [None]:
final_model = llm_model.merge_and_unload()

In [None]:
def generate_answer_after_fine_tunning(prompt, model):
    encoded_input = tokenizer(prompt,
        return_tensors="pt",
        add_special_tokens=True,
    )

    model_inputs = encoded_input.to("cuda")
    generated_ids = model.generate(**model_inputs,
        max_new_tokens=512,
        do_sample=True,
        use_cache=False,
        pad_token_id=tokenizer.eos_token_id,
    )

    decoded_output = tokenizer.batch_decode(generated_ids)

    return decoded_output[0]

In [None]:
%%time
prompt = "Instruction: In your role as a medical professional, address the user's medical questions and concerns. "
prompt += "I have a white tab under my tounge that is not only painful when i touch it but bleeds as well. not sure what it is, or why I got it. Can you give me any advise? <|end_of_turn|> "
prompt += "\nAI Assistant:"
response = generate_answer_after_fine_tunning(prompt, final_model)
print(response)

In [None]:
%%time
prompt = "Instruction: In your capacity as a healthcare expert, offer insights and recommendations in response to users' medical inquiries. "
prompt += "I have terrible anxiety and depression. I've tried various therapists and pills, but nothing's helped. <|end_of_turn|> "
prompt += "\nAI Assistant:"
response = generate_answer_after_fine_tunning(prompt, final_model)
print(response)

In [None]:
%%time
prompt = "Instruction: As a medical chatbot, your responsibility is to provide information and guidance on medical matters to users. "
prompt += "Hi sir, I am so happy with this website. First of all thanks for giving this opportunity. I am the  Software employee.My age is 24. My height is 169cm .Recently I got back pain and some pain in chest. How can i get relief from those pains.How i improve my health and which type of diseases will attack to my life in future. Please give Some health tips for heart and kidneys protection. <|end_of_turn|> "
prompt += "\nAI Assistant:"
response = generate_answer_after_fine_tunning(prompt, final_model)
print(response)