## Required Installation

In [None]:
!pip install auto_gptq
!pip install optimum
!pip install -U accelerate bitsandbytes datasets peft transformers

In [None]:
from transformers.utils import is_auto_gptq_available,   is_optimum_available
print(is_auto_gptq_available())
print(is_optimum_available())

In [None]:
!pip install datasets==2.15.0

!pip install trl

!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu117/

!pip install ipywidgets==8.1.5

!pip show optimum

# Load model and tokenizer from huggingface

In [None]:
from peft import prepare_model_for_kbit_training

from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig

model_id = "marcsun13/gemma-2-9b-it-GPTQ"

tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load RAFT dataset

In [None]:
from datasets import Dataset

dataset_path = ""

ds = Dataset.from_file(dataset_path)

In [None]:
ds

In [None]:
import pandas as pd

df = pd.DataFrame(ds)

df.head()

In [None]:
# Combine 'user' and 'assistant' columns into a new 'messages' column as list of dictionaries

df['messages'] = df.apply(lambda row: [{'content': row['instruction'], 'role': 'user'},

                                       {'content': row['cot_answer'], 'role': 'assistant'}], axis=1)


In [None]:
import datasets

ds = datasets.Dataset.from_pandas(df)

ds

# Chat Template for prompt

In [None]:
# Set chat template

DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"

tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE

In [None]:
import re

import random

from multiprocessing import cpu_count



def apply_chat_template(example, tokenizer):

    messages = example["messages"]

    # We add an empty system message if there is none

    if messages[0]["role"] != "system":

        messages.insert(0, {"role": "system", "content": ""})

    example["text"] = tokenizer.apply_chat_template(messages, tokenize=False)



    return example



column_names = list(ds.features)

raw_datasets = ds.map(apply_chat_template,

                                num_proc=cpu_count(),

                                fn_kwargs={"tokenizer": tokenizer},

                                remove_columns=column_names,

                                desc="Applying chat template",)

In [None]:
raw_datasets

In [None]:
raw_datasets["text"][0]

## Create train/test split

In [None]:
raw_datasets = raw_datasets.train_test_split(test_size=0.1)

# create the splits

train_dataset = raw_datasets["train"]

eval_dataset = raw_datasets["test"]


# Peft config

In [None]:
from peft import LoraConfig, get_peft_model

peft_config = LoraConfig(

    r=8,

    lora_alpha=32,

    target_modules=["k_proj","o_proj","q_proj","v_proj"],

    lora_dropout=0.05,

    bias="none",

    task_type="CAUSAL_LM"

)

# Quantization Config

In [None]:
from transformers import GPTQConfig



quantization_config = GPTQConfig(bits=4, disable_exllama=True)



model_kwargs = dict(

    torch_dtype="auto",

    use_cache=False, # set to False as we're going to use gradient checkpointing

    device_map="auto",

    quantization_config=quantization_config,

)


# SFT Training

In [None]:


from trl import SFTTrainer

from peft import LoraConfig

from transformers import TrainingArguments



# path where the Trainer will save its checkpoints and logs

output_dir = 'raft-sft-output'



# based on config

training_args = TrainingArguments(

    fp16=True, # specify bf16=True instead when training on GPUs that support bf16

    do_eval=True,

    evaluation_strategy="epoch",

    gradient_accumulation_steps=64,

    gradient_checkpointing=True,

    gradient_checkpointing_kwargs={"use_reentrant": False},

    learning_rate=2.0e-05,

    log_level="info",

    logging_steps=5,

    logging_strategy="steps",

    lr_scheduler_type="cosine",

    max_steps=-1,

    num_train_epochs=4,

    output_dir=output_dir,

    overwrite_output_dir=True,

    per_device_eval_batch_size=1, # originally set to 8

    per_device_train_batch_size=1, # originally set to 8

    save_strategy="no",

    save_total_limit=None,

    seed=42,

)

In [None]:
trainer = SFTTrainer(

        model=model_id,

        #model_init_kwargs=model_kwargs,

        args=training_args,

        train_dataset=train_dataset,

        eval_dataset=eval_dataset,

        #dataset_text_field="text",

        tokenizer=tokenizer,

        # packing=True,

        peft_config=peft_config,

        # max_seq_length=tokenizer.model_max_length,

        # max_seq_length=512,

        #max_seq_length=2048

    )

## Setting pad_token=eos_token

In [None]:
tokenizer.pad_token = tokenizer.eos_token

## Start Training

In [None]:
#!pip install transformers accelerate peft datasets trl

import json
from transformers import TrainingArguments, TrainerCallback, TrainerState, TrainerControl
from transformers.integrations import TensorBoardCallback

class CustomTensorBoardCallback(TensorBoardCallback):
    """
    Custom TensorBoard callback that handles non-serializable objects in TrainingArguments.
    """
    def on_train_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
        """
        Event called at the beginning of training.

        Overridden to handle non-serializable objects in TrainingArguments.
        """
        if self.tb_writer is not None:
            # Convert args to a dictionary and remove non-serializable objects
            args_dict = args.to_dict()

            # Iterate through the dictionary and remove non-serializable objects
            for key, value in list(args_dict.items()):
                if not isinstance(value,(int,float,str,bool,list,tuple,dict,type(None))):
                    del args_dict[key]

            self.tb_writer.add_text("args", json.dumps(args_dict, indent=2))

            if "model" in kwargs:
                model = kwargs["model"]
                if hasattr(model, "config") and model.config is not None:
                    model_config_json = model.config.to_json_string()
                    self.tb_writer.add_text("model_config", model_config_json)


# Replace the default TensorBoardCallback with the custom one
trainer.remove_callback(TensorBoardCallback)  # Remove default if present
trainer.add_callback(CustomTensorBoardCallback)  # Add custom callback

In [None]:
import wandb
import os
os.environ["WANDB_API_KEY"] = ""

In [None]:
import wandb
wandb.login(key="")

In [None]:
train_result = trainer.train()

In [None]:
 metrics = train_result.metrics

In [None]:
print(metrics)

## Save the finetuned_raft_model

In [None]:
output_dir_new = "finetuned_raft_model"

trainer.save_model(output_dir_new)

In [None]:
import shutil

# Define the directory to be zipped and the output zip file name
directory_to_zip = '/kaggle/working/'
output_zip_file = '/kaggle/working/working_directory.zip'

# Zip the directory
shutil.make_archive(output_zip_file.replace('.zip', ''), 'zip', directory_to_zip)

# Inference

## Load model

In [None]:
output_dir_new = "finetuned_raft_model"

In [None]:
quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True)

# disable_exllama=True

model = AutoModelForCausalLM.from_pretrained(output_dir_new,quantization_config=quantization_config_loading, device_map="auto")
