In [1]:
 ! pip install -U bitsandbytes accelerate transformers datasets trl peft evaluate rouge_score wandb

Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting accelerate
  Downloading accelerate-1.1.1-py3-none-any.whl.metadata (19 kB)
Collecting transformers
  Downloading transformers-4.46.2-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting trl
  Downloading trl-0.12.0-py3-none-any.whl.metadata (10 kB)
Collecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l- done
Collecting wandb
  Downloading wandb-0.18.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Downloading bi

In [2]:
from transformers import (
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    DistilBertTokenizer,
    TrainingArguments,
    pipeline,
)
import evaluate
from datasets import load_dataset, Dataset
from trl import (
    SFTTrainer,
    PPOTrainer,
    RewardTrainer,
    PPOConfig,
    RewardConfig,
    AutoModelForCausalLMWithValueHead,
)
from peft import LoraConfig, get_peft_model
from bitsandbytes.optim import AdamW8bit
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader, Dataset as torchDataset
import numpy as np
import wandb

# Hugging face and wandb login

In [3]:
from huggingface_hub import login
login(token='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')
wandb.login(key="ba3349aecf7f23a3abb849de3155be527d3585f1")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# Hyperparameter

In [4]:
dataset = load_dataset("openai/summarize_from_feedback", "comparisons")
base_reward_model_checkpoint = "google/gemma-2-2b"
reward_model_repo_name="reward_model"
reward_model_checkpoint=f"JaishreeramCoder/{reward_model_repo_name}"
output_dir="/content/sample_data"
base_sft_model_checkpoint = "meta-llama/Llama-3.1-8B"
sft_model_repo_name = "sft_model"
sft_model_checkpoint=f"JaishreeramCoder/{sft_model_repo_name}"
rlhf_model_repo_name="ppo_gpt2_summary"
rlhf_model_checkpoint=f"JaishreeramCoder/{rlhf_model_repo_name}"
num_train_epochs_reward_model = 5
num_train_epochs_sft = 5
num_train_epochs_ppo_outer=5
ppo_training_batch_size=8
eval_batch_size = 8
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

README.md:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

summarize_from_feedback.py:   0%|          | 0.00/9.38k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/21.1M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/22.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/92858 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/86086 [00:00<?, ? examples/s]

In [5]:
def count_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

# Supervised fine tuned model

In [6]:
sft_tokenizer = AutoTokenizer.from_pretrained(base_sft_model_checkpoint)
sft_tokenizer.pad_token = sft_tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [7]:
def get_sft_dataset(data):
    input_ids, attention_mask, label_ids = ([], [], [])
    for i in range(len(data["choice"])):
        input = f"Summarize the following text:\n\n{data['info'][i]['post']}"
        cur = sft_tokenizer(
            input,
            padding="max_length",
            truncation=True,
            max_length=512,
            padding_side="left",
        )
        cur_input_ids = cur.input_ids
        cur_attention_mask = cur.attention_mask
        completion = (
            data["summaries"][i][1]["text"]
            if data["choice"][i] == 1
            else data["summaries"][i][0]["text"]
        )
        cur_label_ids = sft_tokenizer(
            completion,
            padding="max_length",
            truncation=True,
            max_length=512,
            padding_side="left",
        ).input_ids
        input_ids.append(cur_input_ids)
        attention_mask.append(cur_attention_mask)
        label_ids.append(cur_label_ids)
    output = {
        "input_ids": input_ids,
        "attention_masks": attention_mask,
        "labels": label_ids,
    }
    output = Dataset.from_dict(output)
    return output

In [8]:
sft_train_dataset = get_sft_dataset(dataset["train"][1000:2000])
sft_eval_dataset = get_sft_dataset(dataset["validation"][1000:2000])

In [9]:
compute_dtype = getattr(torch, "float16")
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)
sft_model = AutoModelForCausalLM.from_pretrained(
    base_sft_model_checkpoint,
    quantization_config=quantization_config,
)

config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now default to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [10]:
lora_config =  LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)
sft_model = get_peft_model(sft_model, lora_config)

In [11]:
!nvidia-smi

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Sat Nov  9 11:43:12 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.90.07              Driver Version: 550.90.07      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off |   00000000:00:04.0 Off |                    0 |
| N/A   37C    P0             34W /  250W |    6265MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                     

In [12]:
print(count_parameters(sft_model))

(4567863296, 27262976)


In [13]:
sft_training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=num_train_epochs_sft,
    evaluation_strategy="epoch",
    eval_steps=0.2,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir="/content/sample_data",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
    load_best_model_at_end=True,
    push_to_hub=True,
)
param_to_update = []
for param in sft_model.parameters():
    if param.requires_grad == True:
        param_to_update.append(param)
optimizers = AdamW8bit(param_to_update, lr=2e-5)
model_trainer = SFTTrainer(
    model=sft_model,
    tokenizer=sft_tokenizer,
    train_dataset=sft_train_dataset,
    eval_dataset=sft_eval_dataset,
    args=sft_training_args,
    optimizers=(optimizers, None),
)

In [14]:
model_trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33msharmaadarsh345678[0m ([33msharmaadarsh345678-iit-kharagpur[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.18.6
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20241109_114315-3bim5s4q[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33m/content/sample_data[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/sharmaadarsh345678-iit-kharagpur/huggingface[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/sharmaadarsh345678-iit-kharagpur/huggingface/runs/3bim5s4q[0m


Epoch,Training Loss,Validation Loss
1,2.4166,2.422211
2,2.2969,2.420995
3,2.2232,2.432209
4,2.2567,2.424953
5,1.604,2.430543


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=625, training_loss=2.3870511182785035, metrics={'train_runtime': 18450.2278, 'train_samples_per_second': 0.271, 'train_steps_per_second': 0.034, 'total_flos': 1.1569440227328e+17, 'train_loss': 2.3870511182785035, 'epoch': 5.0})

In [15]:
rouge_metric = evaluate.load("rouge")
def compute_metrics(decoded_preds, decoded_actual_labels):
    result = rouge_metric.compute(
        predictions=decoded_preds, references=decoded_actual_labels
    )
    print(f"SFT Model ROUGE values: {result}")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [16]:
generation_kwargs = {
    "min_length": -1,  # don't ignore the EOS token
    "top_k": 0.0,  # no top-k sampling

    "top_p": 1.0,  # no nucleus sampling

    "do_sample": True,  # yes, we want to sample

    "eos_token_id": sft_tokenizer.eos_token_id,

    "bos_token_id": sft_tokenizer.bos_token_id,

    "pad_token_id": sft_tokenizer.eos_token_id,  # most decoder models don't have a padding token - use EOS token instead

    "max_new_tokens": 32,  # specify how many tokens you want to generate at most

}

In [None]:
def evaluate_sft_model(sft_model, sft_eval_dataset):

    with torch.no_grad():

        sft_model.eval()

        decoded_preds = []

        decoded_actual_labels = []

        for i in tqdm(range(0, len(sft_eval_dataset["input_ids"]), eval_batch_size)):

            cur_data = torch.tensor(

                sft_eval_dataset["input_ids"][i : i + eval_batch_size]

            )

            cur_data=cur_data.to(device)

            cur_preds = sft_model.generate(cur_data, **generation_kwargs)

            cur_preds = cur_preds[:, cur_data.shape[1] :]

            for j in range(eval_batch_size):

                generated_text = sft_tokenizer.decode(

                    cur_preds[j], skip_special_tokens=True

                )

                decoded_preds.append(generated_text)

            cur_actual_label_ids = torch.tensor(

                sft_eval_dataset["labels"][i : i + eval_batch_size]

            )

            for j in range(eval_batch_size):

                decoded_actual_labels.append(

                    sft_tokenizer.decode(

                        cur_actual_label_ids[j], skip_special_tokens=True

                    )

                )

        sft_model_eval_result = compute_metrics(

            decoded_preds=decoded_preds, decoded_actual_labels=decoded_actual_labels

        )





evaluate_sft_model(sft_model, sft_eval_dataset)

# Push to hub

In [18]:
sft_model=sft_model.merge_and_unload()

sft_model.push_to_hub(sft_model_repo_name)

sft_tokenizer.push_to_hub(sft_model_repo_name)



model-00001-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/JaishreeramCoder/sft_model/commit/75fb88aae70908443de3aeb21da522d362b8f5bb', commit_message='Upload tokenizer', commit_description='', oid='75fb88aae70908443de3aeb21da522d362b8f5bb', pr_url=None, repo_url=RepoUrl('https://huggingface.co/JaishreeramCoder/sft_model', endpoint='https://huggingface.co', repo_type='model', repo_id='JaishreeramCoder/sft_model'), pr_revision=None, pr_num=None)