In [1]:
!pip install -q -U accelerate peft bitsandbytes
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
)
from datasets import load_dataset
from peft import LoraConfig,get_peft_model  

In [2]:
from huggingface_hub import login
token=""
login(token=token)

In [3]:
# Configuration
#MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"  # Or your Llama 2 model
MODEL_NAME = "meta-llama/Llama-2-7b-hf"  # Or your Llama 2 model
DATASET_NAME = "bio-nlp-umass/bioinstruct"
OUTPUT_DIR = "./llama2-bioinstruct-finetuned"
LEARNING_RATE = 2e-4
BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 8
EPOCHS = 1

In [4]:
# Quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

# quantization_config = BitsAndBytesConfig(
#     load_in_8bit=True, # Load in 8 bit
#     bnb_8bit_quant_type="nf8", # specify nf8 type
#     bnb_8bit_compute_dtype=torch.float16, # compute type
# )

# quantization_config = BitsAndBytesConfig(
#     load_in_8bit=True,
# )

# Load model and tokenizer with quantization
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=quantization_config,
    device_map="auto",
)

# LoRA configuration
lora_config = LoraConfig(
    r=8,  # Rank of the update matrices
    lora_alpha=8,  # Scaling factor
    lora_dropout=0.1,  # Dropout probability
    bias="none",
    task_type="CAUSAL_LM", # Set the correct task type
    target_modules=["v_proj"] # Target Query and Value projection matrices.
)

# lora_config = LoraConfig(
#     r=8,
#     lora_alpha=8,
#     lora_dropout=0.1,
#     bias="none",
#     task_type="CAUSAL_LM",
#     target_modules=["q_proj", "v_proj"]
# )

# Add LoRA adapters to the model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters() # print trainable parameters.

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 2,097,152 || all params: 6,740,512,768 || trainable%: 0.0311


In [5]:
# Load dataset
dataset = load_dataset(DATASET_NAME, split="train")
dataset = dataset.select(range(10000))

In [6]:
# Preprocessing function (format for instruction following)
def preprocess_function(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]

    prompts = [f"Instruction: {instruction}\nInput: {input_text}\nOutput: {output_text}"
               for instruction, input_text, output_text in zip(instructions, inputs, outputs)]

    model_inputs = tokenizer(prompts, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [7]:
# Training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    learning_rate=LEARNING_RATE,
    logging_dir="./logs",
    logging_steps=50,
    optim="paged_adamw_8bit", # Optimized for memory
    fp16=True,
    lr_scheduler_type="linear",
    warmup_ratio=0.03,
    save_strategy="steps",
    push_to_hub=False,
    report_to = "tensorboard",
    save_total_limit = 1
)

In [8]:
# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

  trainer = Trainer(


In [9]:
# Train
trainer.train()

Step,Training Loss
50,81.5002
100,1.7859
150,1.6623
200,1.6515
250,1.6508
300,1.6221


TrainOutput(global_step=312, training_loss=14.46341899725107, metrics={'train_runtime': 8442.1885, 'train_samples_per_second': 1.185, 'train_steps_per_second': 0.037, 'total_flos': 2.0271715316033126e+17, 'train_loss': 14.46341899725107, 'epoch': 0.9984})

In [10]:
# Example input (instruction and input text)
instruction = "Explain the mechanism of action of a given drug in non-medical terms."
input_text = "Metformin"

# Construct the prompt
prompt = f"Instruction: {instruction}\nInput: {input_text}\nOutput:"

# Tokenize the prompt
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only 
# used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
#  UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used 
# in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.

# Generate the output
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    outputs = model.generate(
        **model_inputs,
        max_length=200,  # Adjust as needed
        num_return_sequences=1,  # Generate one output
        temperature=0.7,  # Adjust for creativity
        top_p=0.9,  # Adjust for sampling
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id #Set pad token id
    )

# Decode the generated output
generated_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the results
print("Instruction:", instruction)
print("Input:", input_text)
print("Generated Output:", generated_output)

Instruction: Explain the mechanism of action of a given drug in non-medical terms.
Input: Metformin
Generated Output: Instruction: Explain the mechanism of action of a given drug in non-medical terms.
Input: Metformin
Output: Metformin helps reduce blood sugar levels by increasing the sensitivity of cells in the body to insulin, allowing the insulin to work more effectively. It also decreases the amount of glucose produced by the liver and increases the body's use of glucose.


In [11]:
# Example input (instruction and input text)
instruction = "Explain the concept of 'gene expression' in simple terms."
input_text = ""  # No input text is needed for this example

# Construct the prompt
prompt = f"Instruction: {instruction}\nInput: {input_text}\nOutput:"

# Tokenize the prompt
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only 
# used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
#  UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used 
# in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.

# Generate the output
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    outputs = model.generate(
        **model_inputs,
        max_length=200,  # Adjust as needed
        num_return_sequences=1,  # Generate one output
        temperature=0.7,  # Adjust for creativity
        top_p=0.9,  # Adjust for sampling
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id #Set pad token id
    )

# Decode the generated output
generated_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the results
print("Instruction:", instruction)
print("Input:", input_text)
print("Generated Output:", generated_output)

Instruction: Explain the concept of 'gene expression' in simple terms.
Input: 
Generated Output: Instruction: Explain the concept of 'gene expression' in simple terms.
Input: 
Output: Gene expression refers to the process of how genes are turned into functional proteins in the body. It's the result of various factors, including DNA methylation, chromatin remodeling, and RNA splicing. Gene expression can be influenced by external factors like environmental exposure and lifestyle choices.


In [12]:
# Example input (instruction and input text)
instruction = "Answer the following question."
input_text = "What are the main causes of type 2 diabetes?"

# Construct the prompt
prompt = f"Instruction: {instruction}\nInput: {input_text}\nOutput:"

# Tokenize the prompt
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only 
# used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
#  UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used 
# in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.

# Generate the output
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    outputs = model.generate(
        **model_inputs,
        max_length=200,  # Adjust as needed
        num_return_sequences=1,  # Generate one output
        temperature=0.7,  # Adjust for creativity
        top_p=0.9,  # Adjust for sampling
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id #Set pad token id
    )

# Decode the generated output
generated_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the results
print("Instruction:", instruction)
print("Input:", input_text)
print("Generated Output:", generated_output)

Instruction: Answer the following question.
Input: What are the main causes of type 2 diabetes?
Generated Output: Instruction: Answer the following question.
Input: What are the main causes of type 2 diabetes?
Output: The main causes of type 2 diabetes include a combination of genetic, environmental, and lifestyle factors. These include age, obesity, a sedentary lifestyle, family history, ethnicity, and poor diet.


In [13]:
!zip -r "llama2_bio_instruct_ft.zip" "/kaggle/working"

  adding: kaggle/working/ (stored 0%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/ (stored 0%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/checkpoint-312/ (stored 0%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/checkpoint-312/optimizer.pt (deflated 11%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/checkpoint-312/special_tokens_map.json (deflated 73%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/checkpoint-312/tokenizer_config.json (deflated 68%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/checkpoint-312/training_args.bin (deflated 51%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/checkpoint-312/trainer_state.json (deflated 64%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/checkpoint-312/adapter_model.safetensors (deflated 8%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/checkpoint-312/adapter_config.json (deflated 54%)
  adding: kaggle/working/llama2-bioinstruct-finetuned/checkpoint-312/tokenizer.json 