In [None]:
# @title # Disable Weights and Biases

# @markdown ### Enable this cell if you are using this notebook in Kaggle

# import wandb
# wandb.init(mode="disabled")



In [None]:
# @title # Installing Libraries

!pip install -q -U accelerate
!pip install -q -U datasets
!pip install -q -U trl

In [None]:
# @title # Importing Libraries

import torch
import gc
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
)
from datasets import load_dataset
from trl import DPOTrainer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
# @title ## Free Memory

def clean():
    gc.collect()
    torch.cuda.empty_cache()
clean()

In [None]:

# @title ## Your Model and Dataset

# @markdown ### Model
# @markdown Select your model

model_name = "Sharathhebbar24/SSH_300M" # @param {type:"string"}

# @markdown ### Dataset
# @markdown Select your datasets

dataset_name = "Sharathhebbar24/orca_dpo_pairs" # @param {type:"string"}

# @markdown ### Choose your split

split = "train" # @param {type: "string"}



In [None]:
# @title ## Load Model and Dataset

try:
  model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.pad_token = tokenizer.eos_token
  tokenizer.padding_side = "left"

except:
  print("Check if the model exists or not")
if split is None or split == "":
  split = "train"
try:
  dataset = load_dataset(dataset_name, split=split)
  print(dataset.to_pandas().head())
  num_rows = dataset.num_rows
  print("Total number of rows in dataset is: ", num_rows)
except:
  print("Check if dataset or split exists or not")


In [None]:
# @title ## Training Arguments

batch_size = 2 # @param {type:"integer"}
max_steps = 100 # @param {type:"integer"}
gradient_accumulation_steps = 2 # @param {type:"integer"}
per_device_train_batch_size = 2 # @param {type:"integer"}
learning_rate = 2e-5 # @param {type:"number"}
output_dir = "./models/gpt2" # @param {type:"string"}

# @markdown ## Enable it if you are using GPU
fp16 = True # @param {type:"boolean"}

training_args = TrainingArguments(
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    gradient_checkpointing=True,
    learning_rate=learning_rate,
    lr_scheduler_type="cosine",
    max_steps=max_steps,
    save_strategy="no",
    logging_steps=50,
    output_dir=output_dir,
    warmup_steps=max_steps//4,
    fp16=fp16,
)



In [None]:
# @title ## Training DPO

max_prompt_length = 512 # @param {type:"integer"}
max_length = 1024 # @param {type:"integer"}

dpo_trainer = DPOTrainer(
    model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    beta=0.1,
    max_prompt_length=max_prompt_length,
    max_length=max_length,

)

# Fine-tune model with DPO
dpo_trainer.train()

In [None]:
# @title ## Pushing to Hub
MODEL_PATH = "Sharathhebbar24/Instruct_GPT_small" # @param {type:"string"}
HF_TOKEN = "" # @param {type:"string"}

tokenizer.push_to_hub(
    MODEL_PATH,
    token=HF_TOKEN
)

model.push_to_hub(
    MODEL_PATH,
    token=HF_TOKEN
)