In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType
from datasets import load_dataset

In [4]:
pretrain_model = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

In [5]:
tokenizer = AutoTokenizer.from_pretrained(pretrain_model)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [6]:
import zipfile
import os
# Path to your zip file
zip_path = "/content/tinyllama-instruction.zip"

# Extract all files
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall()

In [7]:
model_path = "/content/checkpoint-3"

In [8]:
instruction_model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")

config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

In [21]:
prompt = "Explain the clinical findings about the combination of Atorvastatin and Ezetimibe in clinical trials."

In [25]:
prompt = "Explain how artificial intelligence is improving the process of drug discovery and development in the pharmaceutical industry."

In [26]:
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

In [27]:
outputs = instruction_model.generate(
    **inputs,
    max_new_tokens=100,
    temperature=0.8,
    top_p=0.9,
    do_sample=True,
    repetition_penalty=1.1
)

In [28]:
print("\nModel Output:\n")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Model Output:

Explain how artificial intelligence is improving the process of drug discovery and development in the pharmaceutical industry.
15. How does AI/ML affect drug development?
Drug discovery and development have always been difficult tasks, but with advances in computer science, AI has made it easier to develop new drugs. AI has also made it possible to automate many steps in the process, allowing companies to focus on research and development rather than administrative tasks like data analysis or IT support.
AI can also help pharmaceutical companies make more informed decisions about which clin


## Now lets start with prefrence base tuning or preference based alignment

In [43]:
!pip install -U trl



In [59]:
!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.2


In [1]:
from trl import DPOTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import PeftModel
from datasets import load_dataset
import torch

In [2]:
base_model = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

In [3]:
instruction_checkpoint = "/content/tinyllama-instruction/checkpoint-3"

In [4]:
# Load dataset
dataset = load_dataset("csv", data_files="/content/pharma_preference_data.csv")["train"]

In [5]:
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [6]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    load_in_8bit=True,
    device_map="auto"
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [8]:
# Attach the LoRA (instruction adapter)
model = PeftModel.from_pretrained(model, instruction_checkpoint)

In [9]:
from trl import DPOTrainer, DPOConfig

In [10]:
dpo_args = DPOConfig(
    output_dir="./tinyllama-dpo",
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    beta=0.1,
    report_to=None,  # disable logging to wandb or tensorboard
    loss_type="sigmoid",  # or "hinge", depending on experiment
    remove_unused_columns=False
)


In [11]:
trainer = DPOTrainer(
    model=model,
    ref_model=None,
    args=dpo_args,
    train_dataset=dataset,
    processing_class=tokenizer,   # instead of tokenizer argument
    # you can pass data_collator if needed,
    # optionally eval_dataset etc.
)

Extracting prompt in train dataset:   0%|          | 0/5 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/5 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/5 [00:00<?, ? examples/s]

In [12]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.
  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:


Abort: 

### Testing with Non-Instruction Model

In [None]:
model_path = "/content/checkpoint-3"
instruction_model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = instruction_model.generate(
    **inputs,
    max_new_tokens=100,
    temperature=0.8,
    top_p=0.9,
    do_sample=True,
    repetition_penalty=1.1
)
print("\nModel Output:\n")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

### Testing with Instruction-Fine-Tuned Model

In [None]:
model_path = "/content/checkpoint-3"
instruction_model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = instruction_model.generate(
    **inputs,
    max_new_tokens=100,
    temperature=0.8,
    top_p=0.9,
    do_sample=True,
    repetition_penalty=1.1
)
print("\nModel Output:\n")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

### Testing with DPO (Preference-Aligned) Model

In [None]:
question = "Explain how Metformin works in the human body and why some researchers believe it could have benefits beyond diabetes treatment."

In [None]:
model_path = "/content/tinyllama-lora/checkpoint-5"

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")

In [None]:
inputs = tokenizer(question, return_tensors="pt").to("cuda")

In [None]:
outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    temperature=0.8,
    top_p=0.9,
    do_sample=True,
    repetition_penalty=1.1
)

In [None]:
print("\nModel Output:\n")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))