In [None]:
%%capture
# Install the dependencies

# Dependencies to play around with the LLM
%pip install -U bitsandbytes    # 8-bit optimizers and quantization functions (to compress the model size)
%pip install -U transformers    # High-level wrapper to easily setup transformer-based neural networks
%pip install -U accelerate      # High-level wrapper to easily integrate with multi GPU/TPU training & inference pipelines

# Dependencies to conduct fine tuning
%pip install -U peft            # Parameter-efficient fine-tuning (QLora, Lora etc.)
%pip install -U trl             # Transformer Reinforcement Learning - Full-stack library to fine-tune and align LLMs
%pip install -U datasets        # Datasets from Huggingface

In [None]:
# Authenticate into third-party platforms (WanDB to log our training progress and HuggingFace to save our pretrained model weights)

from kaggle_secrets import UserSecretsClient
from huggingface_hub import login
import wandb

user_secrets = UserSecretsClient()

secret_hf = user_secrets.get_secret('HuggingFace')
secret_wandb = user_secrets.get_secret('WanDB')

login(secret_hf)
wandb.login(key=secret_wandb)


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

def instantiate(model_path):
    
    # Model quantization configs
    # Quantization allows us to work with a compressed version of the model (i.e. being able to fit into the GPU memory)
    bnbConfig = BitsAndBytesConfig(
        load_in_4bit = True,
        bnb_4bit_quant_type = "nf4",
        bnb_4bit_compute_dtype = torch.bfloat16,
    )
    
    # Instantiate the model
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map = 'auto',
        quantization_config = bnbConfig
    )

    # Instantiate the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    return model, tokenizer

In [None]:
base_model_path = 'google/gemma-2b' #  '/kaggle/input/gemma/transformers/2b/2'
instruction_tuned_model_path = 'google/gemma-2b-it' # '/kaggle/input/gemma/transformers/2b-it/3'

base_model, base_tokenizer = instantiate(base_model_path)
it_model, it_tokenizer = instantiate(instruction_tuned_model_path)

In [None]:
system = "In the bustling streets of Victorian London, there exists a figure of unparalleled intellect and deductive prowess - Sherlock Holmes. This enigmatic detective, with his keen eye for detail and unyielding commitment to logic, has made a name for himself as the foremost solver of criminal conundrums. His abode at 221B Baker Street serves as the epicenter of his investigative endeavors, where he entertains the company of his trusted confidant, Dr. John Watson. Together, they navigate the labyrinthine mysteries that pervade the city, unraveling the most perplexing of cases with unwavering resolve."
user = "How do you approach a new case, Sherlock?"

prompt = f"<|system|>{system}</s> <|user|>{user}</s> <|assistant|>"

In [None]:
def tokenize(prompt, tokenizer, model):
    inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
    return inputs

In [None]:
base_input = tokenize(prompt, base_tokenizer, base_model)
it_input = tokenize(prompt, it_tokenizer, it_model)

In [None]:
def generate(tokenized_input, tokenizer, model):
    output = model.generate(**tokenized_input, max_length=1024, num_return_sequences=1)
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded_output

def extract_response(generation):
    return generation.split("<|assistant|>")[1]

In [None]:
base_model_generation = generate(base_input, base_tokenizer, base_model)
it_model_generation = generate(it_input, it_tokenizer, it_model)

In [None]:
extract_response(base_model_generation)

In [None]:
extract_response(it_model_generation)

# Finetuning!

In [None]:
from datasets import load_dataset

dataset_name = 'hieunguyenminh/roleplay'
dataset = load_dataset(dataset_name, split="train[:300]")

# Have a look at the dataset
dataset

In [None]:
# Remove any records that have to do with Sherlock or Watson
dataset = dataset.filter(lambda x: x['name'] != "Sherlock" and x['name'] != "Watson")
dataset

In [None]:
from transformers import (
    TrainingArguments,
    logging,
)

from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)

from trl import SFTTrainer


def prepare_for_finetuning(model, tokenizer, model_output_dir):
    
    model.config.use_cache = False
    model.config.pretraining_tp = 1
    
    tokenizer.padding_side = 'right'
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.add_eos_token = True
    tokenizer.add_bos_token = True
    
    peft_config = LoraConfig(
        lora_alpha = 16,
        lora_dropout = 0.1,
        r = 64,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=['o_proj', 'q_proj', 'up_proj', 'v_proj', 'k_proj', 'down_proj', 'gate_proj']
    )
    
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, peft_config)
    
    training_arguments = TrainingArguments(
        output_dir=model_output_dir,
        num_train_epochs=1,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=1,
        optim="paged_adamw_32bit",
        save_strategy="epoch",
        logging_steps=100,
        logging_strategy="steps",
        learning_rate=2e-4,
        fp16=False,
        bf16=False,
        group_by_length=True,
        report_to=None,
    )
    
    trainer = SFTTrainer(
        model = model,
        train_dataset = dataset,
        peft_config = peft_config,
        max_seq_length = 512,
        dataset_text_field = "text",
        tokenizer = tokenizer,
        args = training_arguments,
        packing = False,
    )
    
    return trainer, model
    

def prepare_for_eval(model, tokenizer):
    model.config.use_cache = True
    model.gradient_checkpointing_disable()


In [None]:
base_trainer, peft_base_model = prepare_for_finetuning(base_model, base_tokenizer, './gemma-2b-v2-role-play')

In [None]:
peft_base_model.print_trainable_parameters()

In [None]:
base_trainer.train()

In [None]:
base_trainer.model.save_pretrained('./gemma-2b-v2-role-play')

In [None]:
prepare_for_eval(peft_base_model, base_tokenizer)

In [None]:
outputs = generate(base_input, base_tokenizer, peft_base_model)

In [None]:
extract_response(outputs)