## Installing and Loading Packages

In [None]:
!pip install -q -U watermark

In [None]:
!pip install -q accelerate peft bitsandbytes transformers trl datasets torch

In [2]:
# Imports
import transformers
import bitsandbytes
import accelerate
import datasets
import peft
import trl

In [None]:
# Imports
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import TrainingArguments
from datasets import load_dataset
from trl import SFTTrainer
import warnings
import torch



warnings.filterwarnings('ignore')

In [None]:
%reload_ext watermark
%watermark -a Pack Version

In [None]:
# Check GPU model
if torch.cuda.is_available():
    print('Number of GPUs:', torch.cuda.device_count())
    print('GPU Model:', torch.cuda.get_device_name(0))
    print('Total GPU Memory [GB]:',torch.cuda.get_device_properties(0).total_memory / 1e9)

In [None]:
# Reset GPU memory (when needed)
from numba import cuda
device = cuda.get_current_device()
device.reset()

## Loading the Dataset


In [None]:
# Load the dataset
dataset = load_dataset("nlpie/Llama2-MedTuned-Instructions")

# Training sample
dataset["train"] = dataset["train"]

# Test sample
dataset["test"] = dataset["train"]

## Adjusting the Input Prompt Format for LLM

In [None]:
# Function for creating the prompt
def create_prompt(sample):
    prompt = sample['instruction']
    prompt += sample['input']
    single_turn_prompt = f"Instruction: {prompt}<|end_of_turn|>AI Assistant: {sample['output']}"
    return single_turn_prompt

## Quantization Parameters

In [None]:
# Sets the quantization parameters
bnb_config = BitsAndBytesConfig(load_in_4bit = True,
                                    bnb_4bit_quant_type = "nf4",
                                    bnb_4bit_compute_dtype = "float32",
                                    bnb_4bit_use_double_quant = True)

## Loading the LLM and Tokenizer

In [None]:
# Repository name on Hugging Face
repository_hf = "berkeley-nest/Starling-LM-7B-alpha"

In [None]:
# Load the LLM applying quantization
model_llm = AutoModelForCausalLM.from_pretrained(repository_hf,
                                                  quantization_config = bnb_config,
                                                  device_map = "auto",
                                                  use_cache = False)

In [None]:
# Loads the LLM tokenizer
tokenizer = AutoTokenizer.from_pretrained(repository_hf)

# Sets the end-of-sentence token
tokenizer.pad_token = tokenizer.eos_token

# Sets the padding direction
tokenizer.padding_side = "right"

## LoRa Parameters for PEFT

In [None]:
# Define LoRa parameters
peft_config = LoraConfig(r = 8,
                             lora_alpha = 16,
                             lora_dropout = 0.05,
                             bias = "none",
                             task_type = "CAUSAL_LM")

In [None]:
# Prepare the model for fine-tuning
model_llm = prepare_model_for_kbit_training(model_llm)

In [None]:
# Concatenate the base model with the LoRa parameters
model_llm = get_peft_model(model_llm, peft_config)

## Training Arguments

In [None]:
# Define the model training arguments
training_arguments = TrainingArguments(output_dir = "adjusted_model",
                                           per_device_train_batch_size = 1,
                                           gradient_accumulation_steps = 4,
                                           optim = "paged_adamw_32bit",
                                           learning_rate = 2e-4,
                                           lr_scheduler_type = "cosine",
                                           save_strategy = "epoch",
                                           logging_steps = 10,
                                           num_train_epochs = 2,
                                           max_steps = 250,
                                           fp16 = True)

## Supervised Fine-tuning Trainer (SFTT) Parameters


In [None]:
# Set SFTT parameters
trainer = SFTTrainer(model = model_llm,
                         peft_config = peft_config,
                         max_seq_length = 512,
                         tokenizer = tokenizer,
                         packing = True,
                         formatting_func = create_prompt,
                         args = training_arguments,
                         train_dataset = dataset["train"],
                         eval_dataset = dataset["test"])

## LLM Training (Fine-Tuning)

In [None]:
%%time
trainer.train()

In [None]:
# Unload the model and remove it from training mode
final_model = model_llm.merge_and_unload()

## Generating Text with LLM

In [None]:
# LLM response generation function
def generate_response_after_fine_tuning(prompt, model):

    # Apply the tokenizer
    encoded_input = tokenizer(prompt,
                              return_tensors = "pt",
                              add_special_tokens = True)

    # Transform the input into a tensor
    model_inputs = encoded_input.to('cuda')

    # Generate the response
    generated_ids = model.generate(**model_inputs,
                                   max_new_tokens = 512,
                                   do_sample = True,
                                   use_cache = False,
                                   pad_token_id = tokenizer.eos_token_id)

    # Decode the response
    decoded_output = tokenizer.batch_decode(generated_ids)

    return decoded_output[0]

### Inference 1

In [None]:
%%time
prompt = "Instruction: In your role as a medical professional, address the user's medical questions and concerns. "
prompt += "I have a white tab under my tounge that is not only painful when i touch it but bleeds as well. not sure what it is, or why I got it. Can you give me any advise? <|end_of_turn|> "
prompt += "AI Assistant:"
response = generate_response_after_fine_tuning(prompt, final_model)
print(response)

### Inference 2

In [None]:
%%time
prompt = "Instruction: In your capacity as a healthcare expert, offer insights and recommendations in response to users' medical inquiries. "
prompt += "I have terrible anxiety and depression. I've tried various therapists and pills, but nothing's helped. <|end_of_turn|> "
prompt += "AI Assistant:"
response = generate_response_after_fine_tuning(prompt, final_model)
print(response)

### Inference 3

In [None]:
%%time
prompt = "Instruction: As a medical chatbot, your responsibility is to provide information and guidance on medical matters to users. "
prompt += "Hi sir, I am so happy with this website. First of all thanks for giving this opportunity. I am the  Software employee.My age is 24. My height is 169cm .Recently I got back pain and some pain in chest. How can i get relief from those pains.How i improve my health and which type of diseases will attack to my life in future. Please give Some health tips for heart and kidneys protection. <|end_of_turn|> "
prompt += "AI Assistant:"
response = generate_response_after_fine_tuning(prompt, final_model)
print(response)

In [None]:
%watermark -a Pack Version

# Deploy

In [None]:
# Saves the model and tokenizer to disk
final_model.save_pretrained("adjusted_model")
tokenizer.save_pretrained("adjusted_model")

#Loads the model and tokenizer saved on disk
loaded_model = AutoModelForCausalLM.from_pretrained("adjusted_model", device_map="auto")
tokenizer_loaded = AutoTokenizer.from_pretrained("adjusted_model")

In [None]:
# Function to generate text
def generate_response_loaded(prompt, model, tokenizer):
    encoded_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
    model_inputs = encoded_input.to('cuda')
    generated_ids = model.generate(**model_inputs, max_new_tokens=512, do_sample=True, pad_token_id=tokenizer.eos_token_id)
    decoded_output = tokenizer.batch_decode(generated_ids)
    return decoded_output[0]

In [None]:
# Prompt with symptoms
symptoms = "When urinating, I felt an intense burning sensation, as if I were being burned from the inside. The feeling was uncomfortable and prevented me from getting a good night's sleep. Additionally, I noticed that my urine had a strong smell and cloudy appearance. I felt a slight pain in the bladder area and constant pressure. The low-grade fever and chills left me weak and without energy."

In [None]:
prompt = ("Instruction: In your role as a medical professional, address the user's medical questions and concerns. "
          f"{symptoms}"
          "Can you give me any advice? <|end_of_turn|> AI Assistant:")

In [None]:
response = generate_response_loaded(prompt, loaded_model, tokenizer_loaded)
print(response)

## Download the adjusted model to your computer

In [None]:
# Importing libraries and setting path
import shutil
folder_to_zip = '/content/adjusted_model'
zip_file = '/content/adjusted_model_full_trained.zip'
shutil.make_archive(zip_file.replace('.zip', ''), 'zip', folder_to_zip)

In [None]:
# start downloading the model
from google.colab import files
files.download(zip_file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [63]:
%watermark -v -m

Python implementation: CPython
Python version       : 3.10.12
IPython version      : 7.34.0

Compiler    : GCC 11.4.0
OS          : Linux
Release     : 6.1.85+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 12
Architecture: 64bit



In [64]:
%watermark --iversions

datasets    : 3.1.0
accelerate  : 0.34.2
transformers: 4.46.2
torch       : 2.5.0+cu121
google      : 2.0.3
peft        : 0.13.2
bitsandbytes: 0.44.1
numba       : 0.60.0
trl         : 0.12.0

