In [None]:
!pip install -q accelerate peft bitsandbytes transformers trl datasets torch

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/314.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━[0m [32m174.1/314.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.1/314.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.8/245.8 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.4/103.4 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [

In [None]:
import accelerate
import peft
import bitsandbytes
import transformers
import trl
import datasets

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import TrainingArguments
from peft import AutoPeftModelForCausalLM, LoraConfig,get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
import warnings
warnings.filterwarnings("ignore")

In [None]:
if torch.cuda.is_available():
  print('Number of GPUs:', torch.cuda.device_count())
  print('GPU name:', torch.cuda.get_device_name(0))
  print('GPU memory [GB]: ', torch.cuda.get_device_properties(0).total_memory / 1e9)

Number of GPUs: 1
GPU name: Tesla T4
GPU memory [GB]:  15.835660288


In [None]:
# Reset GPU Memory
from numba import cuda
device = cuda.get_current_device()
device.reset()

# **Loading Dataset**

In [None]:
dataset = load_dataset('nlpie/Llama2-MedTuned-Instructions')

Downloading readme:   0%|          | 0.00/2.72k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/91.1M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/6.08M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/200252 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/70066 [00:00<?, ? examples/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 200252
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 70066
    })
})

In [None]:
for i in range(3):
  data = dataset['train'][i]
  print(f'Data Point {i + 1}')
  print(data['instruction'])
  print(data['input'])
  print(data['output'])
  print('\n--------------------------------\n')

Data Point 1
In your role as a medical professional, address the user's medical questions and concerns.
My relative suffering from secondary lever cancer ( 4th stage as per Allopathic doctor) and primary is in rectum. He is continuously with 103 to 104 degree F fever. Allpathic doctor suggested chemo only after fever subsidises. Is treatment possible at Lavanya & what is the time scale of recover.
Hi, dairy have gone through your question. I can understand your concern. He has rectal cancer with liver metastasis. It is stage 4 cancer. Surgery is not possible at this stage. Only treatment options are chemotherapy and radiotherapy according to type of cancer. Inspite of all treatment prognosis is poor. Life expectancy is not good. Consult your doctor and plan accordingly. Hope I have answered your question, if you have any doubts then contact me at bit.ly/ Chat Doctor. Thanks for using Chat Doctor. Wish you a very good health.

---------------------

Data Point 2
Your role as a doctor re

In [None]:
# Train Sample
dataset['train'] = dataset['train'].select(range(3600))
# Test Sample
dataset['test'] = dataset['validation'].select(range(400))

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 3600
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 70066
    })
    test: Dataset({
        features: ['instruction', 'input', 'output', 'source'],
        num_rows: 400
    })
})

In [None]:
def create_prompt(sample):
  # Initialize the prompt with the instruction from the sample
  prompt = sample['instruction']

  # Append the input to the prompt
  prompt += sample['input']

  # Format the prompt and output into a single string for a single turn
  single_turn_prompt = f'Instruction: {prompt}AI Assistant: {sample["output"]}'

  # Return the formatted prompt
  return single_turn_prompt


In [None]:
#Prompt Example:
create_prompt(dataset['train'][0])

"Instruction: In your role as a medical professional, address the user's medical questions and concerns.My relative suffering from secondary lever cancer ( 4th stage as per Allopathic doctor) and primary is in rectum. He is continuously with 103 to 104 degree F fever. Allpathic doctor suggested chemo only after fever subsidises. Is treatment possible at Lavanya & what is the time scale of recover.<|end_of_turn|>AI Assistant: Hi, dairy have gone through your question. I can understand your concern. He has rectal cancer with liver metastasis. It is stage 4 cancer. Surgery is not possible at this stage. Only treatment options are chemotherapy and radiotherapy according to type of cancer. Inspite of all treatment prognosis is poor. Life expectancy is not good. Consult your doctor and plan accordingly. Hope I have answered your question, if you have any doubts then contact me at bit.ly/ Chat Doctor. Thanks for using Chat Doctor. Wish you a very good health."

In [None]:
# Configuration for BitsAndBytes using 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Load data in 4-bit precision
    bnb_4bit_quant_type='nf4',  # Use 'nf4' as the quantization type
    bnb_4bit_compute_dtype='float16',  # Use 16-bit floating point for computation
    bnb_4bit_use_double_quant=True  # Enable double quantization for improved precision
)

# **Loading LLM And Tokenizer**

In [None]:
# Loading LLM applying quantization
hf_repository = 'berkeley-nest/Starling-LM-7B-alpha'

llm_model = AutoModelForCausalLM.from_pretrained(hf_repository,
                                                 quantization_config = bnb_config,
                                                 device_map = 'auto',
                                                 use_cache = False)

config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

In [None]:
# Loading Tokenizer
tokenizer = AutoTokenizer.from_pretrained(hf_repository)

tokenizer_config.json:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
# Define End Sentence
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Setting Padding Direction
tokenizer.padding_side = 'right'

# **Generating Answer with LLM Before Fine-Tuning**

In [None]:
def generate_answer(prompt, model):
  # Encode the input prompt using the tokenizer and convert to tensor format
  encoded_input = tokenizer(prompt,
                            return_tensors='pt',
                            add_special_tokens=True)

  # Move the encoded input to the GPU
  model_inputs = encoded_input.to('cuda')

  # Generate output tokens from the model with specified parameters
  generated_ids = model.generate(**model_inputs,
                                 max_new_tokens=1024,
                                 do_sample=True,
                                 pad_token_id=tokenizer.eos_token_id)

  # Decode the generated token IDs into a string
  decoded_output = tokenizer.batch_decode(generated_ids)

  # Return the decoded output with the prompt part removed
  return decoded_output[0].replace(prompt, '')


In [None]:
prompt = dataset['validation'][0]['instruction'] + dataset['validation'][0]['input']
prompt += '<|end_of_turn|>AI Assistant:'

In [None]:
print(prompt)

Your goal is to detect disease-related Named Entities within the text and apply the BIO labeling scheme. Begin by labeling the first word of a disease-related phrase as B (Begin), and then label the subsequent words in that phrase as I (Inner). Any words not related to diseases should be labeled as O.BRCA1 is secreted and exhibits properties of a granin .<|end_of_turn|>AI Assistant:


In [None]:
generate_answer(prompt, llm_model)

'<s> Your goal is to detect disease-related Named Entities within the text and apply the BIO labeling scheme. Begin by labeling the first word of a disease-related phrase as B (Begin), and then label the subsequent words in that phrase as I (Inner). Any words not related to diseases should be labeled as O.BRCA1 is secreted and exhibits properties of a granin .<|end_of_turn|> AI Assistant: BRCA1_B\n\nThe sentence provided does not contain any disease-related Named Entities, therefore all the other words should be labeled as O:\n\nO BRCA1_B O O O O O O O O<|end_of_turn|>'

# **LoRA for PEFT**

In [None]:
# Configuration for LoRA (Low-Rank Adaptation) using specific parameters
peft_config = LoraConfig(
    r=8,  # Rank of the low-rank adaptation matrices
    lora_alpha=16,  # Scaling factor for the adaptation matrices
    lora_dropout=0.05,  # Dropout rate for regularization
    bias='none',  # Type of bias to use ('none' means no bias)
    task_type='CAUSAL_LM'  # Task type indicating causal language modeling
)


In [None]:
# Prepare the language model for k-bit (e.g., 4-bit) training
llm_model = prepare_model_for_kbit_training(llm_model)

In [None]:
# Apply the LoRA configuration to the language model to get a parameter-efficient fine-tuning model
llm_model = get_peft_model(llm_model, peft_config)

In [None]:
# Set up training arguments for fine-tuning the model
training_arguments = TrainingArguments(
    output_dir='fitted_model',  # Directory to save the trained model
    per_device_train_batch_size=1,  # Batch size per device during training
    gradient_accumulation_steps=4,  # Number of steps to accumulate gradients before updating model parameters
    optim='paged_adamw_32bit',  # Optimizer to use (paged AdamW with 32-bit precision)
    learning_rate=2e-4,  # Learning rate for the optimizer
    lr_scheduler_type='cosine',  # Learning rate scheduler type (cosine annealing)
    save_strategy='epoch',  # Strategy for saving checkpoints (save at the end of each epoch)
    logging_steps=10,  # Number of steps between logging training metrics
    num_train_epochs=1,  # Number of epochs to train the model
    max_steps=250,  # Maximum number of training steps
    fp16=True  # Use 16-bit floating point precision (mixed precision training)
)


In [None]:
# Initialize the trainer for supervised fine-tuning (SFT)
trainer = SFTTrainer(
    model=llm_model,  # The language model to be trained
    peft_config=peft_config,  # Parameter-efficient fine-tuning configuration (LoRA)
    max_seq_length=512,  # Maximum sequence length for inputs
    tokenizer=tokenizer,  # Tokenizer to preprocess the inputs
    packing=True,  # Enable input packing for efficient training
    formatting_func=create_prompt,  # Function to format the input prompts
    args=training_arguments,  # Training arguments specifying various hyperparameters
    train_dataset=dataset['train'],  # Training dataset
    eval_dataset=dataset['test']  # Evaluation dataset
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


# **Training Model**

In [None]:
%%timeit
trainer.train()

In [None]:
trainer.save_movel('fitted_model')

In [None]:
final_model = model_llm.merge_and_unload()

# **Generating Text with LLM**

In [None]:
def generate_answer_ft(prompt, model):
  encoded_input = tokenizer(prompt,
                            return_tensors = 'pt',
                            add_special_tokens = True)

  model_inputs = encoded_input.to('cuda')

  generated_ids = model.generate(**model_inputs,
                                 max_new_tokens = 512,
                                 do_sample , True,
                                 use_cache = False,
                                 pad_token_id = tokenizer.eos_token_id)

  decoded_output = tokenizer.batch_decode(generated_ids)

  return decoded_output[0]

In [None]:
%%time
prompt = dataset['validation'][7900]['instruction'] + dataset['validation'][7000]['input']
prompt += '<|end_of_turn|>AI Assistant:'

result = generate_answer_ft(prompt, final_model)
print(result)