# Setup

In [9]:
!pip install transformers
!python -m pip install --upgrade pip
!pip install -U -q transformers
!pip install -q -U bitsandbytes
!pip install -q -U peft
!pip install -q -U accelerate
!pip install -q datasets
!pip install -q -U trl



In [None]:
import torch
from datasets import load_dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoModelForCausalLM, DataCollatorForLanguageModeling, Trainer, TrainingArguments
from trl import DPOTrainer
from typing import Dict

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
import wandb
wandb.login()

In [None]:
print(torch.cuda.is_available())  # Check if CUDA is available
print(torch.cuda.get_device_name(0))  # Get the GPU name

In [None]:
!transformers-cli env

# Fine Tuning Expert 1- Coder

In [3]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, DataCollatorForLanguageModeling, Trainer, TrainingArguments
from datasets import load_dataset
import torch


# Load the dataset
dataset = load_dataset("code_search_net", "python")

# Initialize the tokenizer and set the padding token
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token  # Set the eos_token as the pad_token

# Preprocess and tokenize the dataset
def preprocess_function(examples):
    return tokenizer(examples['func_code_string'], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_dataset.set_format('torch', columns=['input_ids', 'attention_mask'])

# Use a subset of the tokenized dataset for training
#train_dataset = tokenized_dataset["train"].select(range(1000))  # Adjust the range as needed
train_dataset = tokenized_dataset["train"].select(range(2001, 4001))  # Select data from indices 1001 to 2000

# Initialize the model

fine_tuned_model_path = "./gpt2-codesearchnet-dpo-py-js"
model = GPT2LMHeadModel.from_pretrained(fine_tuned_model_path)
model.resize_token_embeddings(len(tokenizer))


# Move model to GPU if available
if torch.cuda.is_available():
    model = model.to('cuda')
    print("Model moved to cuda")
else:
    print("CUDA is not available. Using CPU instead.")

# Define a data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./gpt2-codesearchnet",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,  # Adjust based on your GPU's capability
    save_steps=500,
    save_total_limit=2,
    logging_steps=20,  # Add this line to log every 20 steps
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
)

# Start fine-tuning
trainer.train()

# After fine-tuning
fine_tuned_model_path = "./gpt2-codesearchnet-dpo-py-js"
trainer.save_model(fine_tuned_model_path)

from transformers import TrainingArguments, AutoModelForCausalLM, GPT2Tokenizer
from datasets import load_dataset
from trl import DPOTrainer
from typing import Dict
import torch

training_args = TrainingArguments(
    output_dir="./model_output",  # Directory where the model checkpoints will be saved.
    num_train_epochs=1,          # Total number of training epochs.
    per_device_train_batch_size=2,  # Batch size per device during training.
    per_device_eval_batch_size=2,   # Batch size for evaluation.
    gradient_accumulation_steps=6, # Accumulate gradients over two steps to save memory
    fp16=True, 
    warmup_steps=100,             # Number of warmup steps for learning rate scheduler.
    weight_decay=0.01,            # Strength of weight decay.
    logging_dir='./logs',         # Directory for storing logs.
    logging_steps=10,             # Log every X updates steps.
    save_steps=10,              # Save checkpoint every X updates steps.
    remove_unused_columns=False,  # Add this line,
    # Add other parameters as needed
)
fine_tuned_model_path = "./gpt2-codesearchnet-dpo-py-js"
model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_path)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
tokenizer.pad_token = tokenizer.eos_token
def tokenize_function(examples):
    return tokenizer(examples["text"], max_length=1024, truncation=True, padding="max_length")


# Move model to GPU if available
if torch.cuda.is_available():
    model = model.to('cuda')
    print("Model moved to cuda")
else:
    print("CUDA is not available. Using CPU instead.")

def return_prompt_and_responses(samples) -> Dict[str, list]:
    return {
        "prompt": [
            "Question: " + question + "\n\nAnswer: "
            for question in samples["question"]
        ],
        "chosen": samples["response_j"],   # rated better than k
        "rejected": samples["response_k"], # rated worse than j
    }

dataset = load_dataset(
    "lvwerra/stack-exchange-paired",
    split="train",
    data_dir="data/rl"
)




subset_size = 1000
start_index = 500  # Starting index for the second chunk
end_index = start_index + subset_size  # Ending index for the second chunk
# Select the second chunk of the dataset
subset_indices = range(start_index, end_index)
dataset = dataset.select(subset_indices)

original_columns = dataset.column_names

dataset = dataset.map(
    return_prompt_and_responses,
    batched=True,
    remove_columns=original_columns
)

dpo_trainer = DPOTrainer(
    model=model,
    args=training_args,
    beta=0.5,
    train_dataset=dataset,
    tokenizer=tokenizer,
    max_length = 256,
    max_prompt_length = 128

)

dpo_trainer.train()

model_save_path_after_dpo = "./gpt2-codesearchnet-dpo-py-js"
dpo_trainer.save_model(model_save_path_after_dpo)


from transformers import GPT2Tokenizer, GPT2LMHeadModel, DataCollatorForLanguageModeling, Trainer, TrainingArguments
from datasets import load_dataset
import torch


# Load the dataset
dataset = load_dataset("code_search_net", "javascript")

# Initialize the tokenizer and set the padding token
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token  # Set the eos_token as the pad_token

# Preprocess and tokenize the dataset
def preprocess_function(examples):
    return tokenizer(examples['func_code_string'], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_dataset.set_format('torch', columns=['input_ids', 'attention_mask'])

# Use a subset of the tokenized dataset for training
#train_dataset = tokenized_dataset["train"].select(range(1000))  # Adjust the range as needed
train_dataset = tokenized_dataset["train"].select(range(2000, 4001))  # Select data from indices 1001 to 2000

# Initialize the model

fine_tuned_model_path = "./gpt2-codesearchnet-dpo-py-js"
model = GPT2LMHeadModel.from_pretrained(fine_tuned_model_path)
model.resize_token_embeddings(len(tokenizer))


# Move model to GPU if available
if torch.cuda.is_available():
    model = model.to('cuda')
    print("Model moved to cuda")
else:
    print("CUDA is not available. Using CPU instead.")

# Define a data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./gpt2-codesearchnet-finetuned-dpo-args",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,  # Adjust based on your GPU's capability
    save_steps=500,
    logging_steps=20
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
)

# Start fine-tuning
trainer.train()


# After fine-tuning
fine_tuned_model_path = "./gpt2-codesearchnet-dpo-py-js"
trainer.save_model(fine_tuned_model_path)

training_args = TrainingArguments(
    output_dir="./model_output",  # Directory where the model checkpoints will be saved.
    num_train_epochs=1,          # Total number of training epochs.
    per_device_train_batch_size=2,  # Batch size per device during training.
    per_device_eval_batch_size=2,   # Batch size for evaluation.
    gradient_accumulation_steps=6, # Accumulate gradients over two steps to save memory
    fp16=True, 
    warmup_steps=100,             # Number of warmup steps for learning rate scheduler.
    weight_decay=0.01,            # Strength of weight decay.
    logging_dir='./logs',         # Directory for storing logs.
    logging_steps=10,             # Log every X updates steps.
    save_steps=10,              # Save checkpoint every X updates steps.
    remove_unused_columns=False,  # Add this line,
    # Add other parameters as needed
)
fine_tuned_model_path = "./gpt2-codesearchnet-dpo-py-js"
model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_path)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
tokenizer.pad_token = tokenizer.eos_token
def tokenize_function(examples):
    return tokenizer(examples["text"], max_length=1024, truncation=True, padding="max_length")


# Move model to GPU if available
if torch.cuda.is_available():
    model = model.to('cuda')
    print("Model moved to cuda")
else:
    print("CUDA is not available. Using CPU instead.")

def return_prompt_and_responses(samples) -> Dict[str, list]:
    return {
        "prompt": [
            "Question: " + question + "\n\nAnswer: "
            for question in samples["question"]
        ],
        "chosen": samples["response_j"],   # rated better than k
        "rejected": samples["response_k"], # rated worse than j
    }

dataset = load_dataset(
    "lvwerra/stack-exchange-paired",
    split="train",
    data_dir="data/rl"
)

start_index = 0  # Starting index for the second chunk
end_index = start_index + subset_size  # Ending index for the second chunk

# Select the second chunk of the dataset
subset_indices = range(start_index, end_index)
dataset = dataset.select(subset_indices)

original_columns = dataset.column_names

dataset = dataset.map(
    return_prompt_and_responses,
    batched=True,
    remove_columns=original_columns
)

dpo_trainer = DPOTrainer(
    model=model,
    args=training_args,
    beta=0.5,
    train_dataset=dataset,
    tokenizer=tokenizer,
    max_length = 256,
    max_prompt_length = 128

)

dpo_trainer.train()

model_save_path_after_dpo = "./gpt2-codesearchnet-dpo-py-js"
dpo_trainer.save_model(model_save_path_after_dpo)

Model moved to cuda


  0%|          | 0/1500 [00:00<?, ?it/s]

{'loss': 1.271, 'learning_rate': 4.933333333333334e-05, 'epoch': 0.04}
{'loss': 1.2574, 'learning_rate': 4.866666666666667e-05, 'epoch': 0.08}
{'loss': 1.2408, 'learning_rate': 4.8e-05, 'epoch': 0.12}
{'loss': 1.2417, 'learning_rate': 4.7333333333333336e-05, 'epoch': 0.16}
{'loss': 1.2925, 'learning_rate': 4.666666666666667e-05, 'epoch': 0.2}
{'loss': 1.3231, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.24}
{'loss': 1.301, 'learning_rate': 4.5333333333333335e-05, 'epoch': 0.28}
{'loss': 1.2404, 'learning_rate': 4.466666666666667e-05, 'epoch': 0.32}
{'loss': 1.148, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.36}
{'loss': 1.3483, 'learning_rate': 4.3333333333333334e-05, 'epoch': 0.4}
{'loss': 1.2259, 'learning_rate': 4.266666666666667e-05, 'epoch': 0.44}
{'loss': 1.2077, 'learning_rate': 4.2e-05, 'epoch': 0.48}
{'loss': 1.2801, 'learning_rate': 4.133333333333333e-05, 'epoch': 0.52}
{'loss': 1.281, 'learning_rate': 4.066666666666667e-05, 'epoch': 0.56}
{'loss': 1.2697, 'lear

Checkpoint destination directory ./gpt2-codesearchnet\checkpoint-500 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 1.201, 'learning_rate': 3.3333333333333335e-05, 'epoch': 1.0}
{'loss': 1.0745, 'learning_rate': 3.266666666666667e-05, 'epoch': 1.04}
{'loss': 1.1324, 'learning_rate': 3.2000000000000005e-05, 'epoch': 1.08}
{'loss': 1.1152, 'learning_rate': 3.1333333333333334e-05, 'epoch': 1.12}
{'loss': 1.1383, 'learning_rate': 3.066666666666667e-05, 'epoch': 1.16}
{'loss': 1.0826, 'learning_rate': 3e-05, 'epoch': 1.2}
{'loss': 1.1426, 'learning_rate': 2.9333333333333336e-05, 'epoch': 1.24}
{'loss': 1.0812, 'learning_rate': 2.8666666666666668e-05, 'epoch': 1.28}
{'loss': 1.0586, 'learning_rate': 2.8000000000000003e-05, 'epoch': 1.32}
{'loss': 1.1316, 'learning_rate': 2.733333333333333e-05, 'epoch': 1.36}
{'loss': 1.1482, 'learning_rate': 2.6666666666666667e-05, 'epoch': 1.4}
{'loss': 1.1202, 'learning_rate': 2.6000000000000002e-05, 'epoch': 1.44}
{'loss': 1.1005, 'learning_rate': 2.5333333333333337e-05, 'epoch': 1.48}
{'loss': 1.0816, 'learning_rate': 2.466666666666667e-05, 'epoch': 1.52}
{'l

Resolving data files:   0%|          | 0/20 [00:00<?, ?it/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed
Token indices sequence length is longer than the specified maximum sequence length for this model (1047 > 1024). Running this sequence through the model will result in indexing errors
Checkpoint destination directory ./model_output\checkpoint-10 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.6849, 'learning_rate': 3e-06, 'rewards/chosen': -0.03544330596923828, 'rewards/rejected': -0.056198056787252426, 'rewards/accuracies': 0.25833332538604736, 'rewards/margins': 0.020754750818014145, 'logps/rejected': -393.04083251953125, 'logps/chosen': -383.63580322265625, 'logits/rejected': -27.472835540771484, 'logits/chosen': -27.7117862701416, 'epoch': 0.12}


Checkpoint destination directory ./model_output\checkpoint-20 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.6236, 'learning_rate': 8.000000000000001e-06, 'rewards/chosen': -0.1736384481191635, 'rewards/rejected': -0.5431264042854309, 'rewards/accuracies': 0.6583333611488342, 'rewards/margins': 0.3694879710674286, 'logps/rejected': -367.2397766113281, 'logps/chosen': -368.5068359375, 'logits/rejected': -27.445049285888672, 'logits/chosen': -27.823726654052734, 'epoch': 0.24}


Checkpoint destination directory ./model_output\checkpoint-30 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.6734, 'learning_rate': 1.25e-05, 'rewards/chosen': -1.2601524591445923, 'rewards/rejected': -2.220651149749756, 'rewards/accuracies': 0.6833333373069763, 'rewards/margins': 0.9604988694190979, 'logps/rejected': -403.0313720703125, 'logps/chosen': -394.6908264160156, 'logits/rejected': -27.852750778198242, 'logits/chosen': -27.752178192138672, 'epoch': 0.36}


Checkpoint destination directory ./model_output\checkpoint-40 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.6259, 'learning_rate': 1.75e-05, 'rewards/chosen': -0.8907012939453125, 'rewards/rejected': -3.220024585723877, 'rewards/accuracies': 0.699999988079071, 'rewards/margins': 2.3293235301971436, 'logps/rejected': -411.392578125, 'logps/chosen': -407.0198669433594, 'logits/rejected': -28.221221923828125, 'logits/chosen': -28.57944107055664, 'epoch': 0.48}


Checkpoint destination directory ./model_output\checkpoint-50 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.8121, 'learning_rate': 2.25e-05, 'rewards/chosen': 0.4933929443359375, 'rewards/rejected': -2.754119873046875, 'rewards/accuracies': 0.7250000238418579, 'rewards/margins': 3.2475128173828125, 'logps/rejected': -419.8365173339844, 'logps/chosen': -401.4595947265625, 'logits/rejected': -28.402685165405273, 'logits/chosen': -27.50358772277832, 'epoch': 0.6}


Checkpoint destination directory ./model_output\checkpoint-60 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 1.1894, 'learning_rate': 2.7500000000000004e-05, 'rewards/chosen': 1.541765570640564, 'rewards/rejected': -2.8910584449768066, 'rewards/accuracies': 0.7250000238418579, 'rewards/margins': 4.43282413482666, 'logps/rejected': -398.05450439453125, 'logps/chosen': -393.93072509765625, 'logits/rejected': -26.181493759155273, 'logits/chosen': -26.767601013183594, 'epoch': 0.72}


Checkpoint destination directory ./model_output\checkpoint-70 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 1.3071, 'learning_rate': 3.2500000000000004e-05, 'rewards/chosen': 0.37925484776496887, 'rewards/rejected': -4.146799564361572, 'rewards/accuracies': 0.7666666507720947, 'rewards/margins': 4.526054382324219, 'logps/rejected': -370.5465393066406, 'logps/chosen': -410.6764221191406, 'logits/rejected': -27.533910751342773, 'logits/chosen': -28.77449607849121, 'epoch': 0.84}


Checkpoint destination directory ./model_output\checkpoint-80 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 1.7924, 'learning_rate': 3.7500000000000003e-05, 'rewards/chosen': 2.93487286567688, 'rewards/rejected': -3.9991438388824463, 'rewards/accuracies': 0.6916666626930237, 'rewards/margins': 6.934017658233643, 'logps/rejected': -362.28466796875, 'logps/chosen': -391.1781921386719, 'logits/rejected': -26.513166427612305, 'logits/chosen': -27.775205612182617, 'epoch': 0.96}
{'train_runtime': 586.0087, 'train_samples_per_second': 1.706, 'train_steps_per_second': 0.142, 'train_loss': 1.0003162809165127, 'epoch': 1.0}
Model moved to cuda


  0%|          | 0/1503 [00:00<?, ?it/s]

{'loss': 1.4792, 'learning_rate': 4.9401197604790424e-05, 'epoch': 0.04}
{'loss': 1.4541, 'learning_rate': 4.873586161011311e-05, 'epoch': 0.08}
{'loss': 1.3566, 'learning_rate': 4.80705256154358e-05, 'epoch': 0.12}
{'loss': 1.3989, 'learning_rate': 4.7405189620758485e-05, 'epoch': 0.16}
{'loss': 1.4902, 'learning_rate': 4.673985362608118e-05, 'epoch': 0.2}
{'loss': 1.3003, 'learning_rate': 4.6074517631403865e-05, 'epoch': 0.24}
{'loss': 1.3627, 'learning_rate': 4.540918163672655e-05, 'epoch': 0.28}
{'loss': 1.4296, 'learning_rate': 4.474384564204924e-05, 'epoch': 0.32}
{'loss': 1.417, 'learning_rate': 4.4078509647371926e-05, 'epoch': 0.36}
{'loss': 1.3579, 'learning_rate': 4.341317365269461e-05, 'epoch': 0.4}
{'loss': 1.3682, 'learning_rate': 4.274783765801731e-05, 'epoch': 0.44}
{'loss': 1.3835, 'learning_rate': 4.2082501663339994e-05, 'epoch': 0.48}
{'loss': 1.3177, 'learning_rate': 4.141716566866268e-05, 'epoch': 0.52}
{'loss': 1.2993, 'learning_rate': 4.075182967398537e-05, 'epoch

Checkpoint destination directory ./gpt2-codesearchnet-finetuned-dpo-args\checkpoint-500 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 1.275, 'learning_rate': 3.343313373253493e-05, 'epoch': 1.0}
{'loss': 1.2533, 'learning_rate': 3.276779773785762e-05, 'epoch': 1.04}
{'loss': 1.1588, 'learning_rate': 3.2102461743180304e-05, 'epoch': 1.08}
{'loss': 1.2272, 'learning_rate': 3.143712574850299e-05, 'epoch': 1.12}
{'loss': 1.2371, 'learning_rate': 3.0771789753825685e-05, 'epoch': 1.16}
{'loss': 1.1376, 'learning_rate': 3.013972055888224e-05, 'epoch': 1.2}
{'loss': 1.2122, 'learning_rate': 2.9474384564204926e-05, 'epoch': 1.24}
{'loss': 1.1916, 'learning_rate': 2.8809048569527613e-05, 'epoch': 1.28}
{'loss': 1.1891, 'learning_rate': 2.81437125748503e-05, 'epoch': 1.32}
{'loss': 1.2364, 'learning_rate': 2.7478376580172987e-05, 'epoch': 1.36}
{'loss': 1.2102, 'learning_rate': 2.6813040585495673e-05, 'epoch': 1.4}
{'loss': 1.1813, 'learning_rate': 2.6147704590818367e-05, 'epoch': 1.44}
{'loss': 1.1859, 'learning_rate': 2.5482368596141054e-05, 'epoch': 1.48}
{'loss': 1.1703, 'learning_rate': 2.481703260146374e-05, 'epo

Resolving data files:   0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/83 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (4511 > 1024). Running this sequence through the model will result in indexing errors
Could not estimate the number of tokens of the input, floating-point operations will not be computed
Checkpoint destination directory ./model_output\checkpoint-10 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.6868, 'learning_rate': 2.5e-06, 'rewards/chosen': -0.006717236712574959, 'rewards/rejected': -0.020466836169362068, 'rewards/accuracies': 0.18333333730697632, 'rewards/margins': 0.01374959945678711, 'logps/rejected': -404.7783508300781, 'logps/chosen': -400.45892333984375, 'logits/rejected': 3.5620877742767334, 'logits/chosen': 2.15421986579895, 'epoch': 0.12}


Checkpoint destination directory ./model_output\checkpoint-20 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.6131, 'learning_rate': 7.5e-06, 'rewards/chosen': -0.6784206628799438, 'rewards/rejected': -1.1542631387710571, 'rewards/accuracies': 0.625, 'rewards/margins': 0.47584250569343567, 'logps/rejected': -405.0921325683594, 'logps/chosen': -386.9043273925781, 'logits/rejected': 4.737513065338135, 'logits/chosen': 3.506503105163574, 'epoch': 0.24}


Checkpoint destination directory ./model_output\checkpoint-30 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.6768, 'learning_rate': 1.25e-05, 'rewards/chosen': -0.641642689704895, 'rewards/rejected': -2.0049540996551514, 'rewards/accuracies': 0.7083333134651184, 'rewards/margins': 1.363311529159546, 'logps/rejected': -399.8486328125, 'logps/chosen': -406.9419860839844, 'logits/rejected': 2.7293238639831543, 'logits/chosen': 2.1308655738830566, 'epoch': 0.36}


Checkpoint destination directory ./model_output\checkpoint-40 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.631, 'learning_rate': 1.7000000000000003e-05, 'rewards/chosen': 0.44887658953666687, 'rewards/rejected': -1.8868328332901, 'rewards/accuracies': 0.7583333253860474, 'rewards/margins': 2.3357090950012207, 'logps/rejected': -393.0673828125, 'logps/chosen': -392.3971862792969, 'logits/rejected': -0.7178871035575867, 'logits/chosen': -1.4798275232315063, 'epoch': 0.48}


Checkpoint destination directory ./model_output\checkpoint-50 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.829, 'learning_rate': 2.2000000000000003e-05, 'rewards/chosen': 1.1804136037826538, 'rewards/rejected': -1.8838536739349365, 'rewards/accuracies': 0.6916666626930237, 'rewards/margins': 3.064267158508301, 'logps/rejected': -386.8988037109375, 'logps/chosen': -393.7730407714844, 'logits/rejected': 0.4945967495441437, 'logits/chosen': -2.4242303371429443, 'epoch': 0.6}


Checkpoint destination directory ./model_output\checkpoint-60 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 1.1651, 'learning_rate': 2.7000000000000002e-05, 'rewards/chosen': -3.2439167499542236, 'rewards/rejected': -8.207462310791016, 'rewards/accuracies': 0.7749999761581421, 'rewards/margins': 4.9635467529296875, 'logps/rejected': -428.8787841796875, 'logps/chosen': -378.6533203125, 'logits/rejected': 0.9358721971511841, 'logits/chosen': 0.8868151307106018, 'epoch': 0.72}


Checkpoint destination directory ./model_output\checkpoint-70 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 1.3287, 'learning_rate': 3.2000000000000005e-05, 'rewards/chosen': 0.021077314391732216, 'rewards/rejected': -5.554483890533447, 'rewards/accuracies': 0.800000011920929, 'rewards/margins': 5.575562477111816, 'logps/rejected': -399.04974365234375, 'logps/chosen': -357.0007019042969, 'logits/rejected': -0.2401006668806076, 'logits/chosen': -0.8948389887809753, 'epoch': 0.84}


Checkpoint destination directory ./model_output\checkpoint-80 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 2.0623, 'learning_rate': 3.7e-05, 'rewards/chosen': 1.2345842123031616, 'rewards/rejected': -4.46947717666626, 'rewards/accuracies': 0.699999988079071, 'rewards/margins': 5.704061508178711, 'logps/rejected': -407.26837158203125, 'logps/chosen': -394.05206298828125, 'logits/rejected': 1.1850199699401855, 'logits/chosen': -0.8237319588661194, 'epoch': 0.96}
{'train_runtime': 713.7572, 'train_samples_per_second': 1.401, 'train_steps_per_second': 0.116, 'train_loss': 1.02782260366233, 'epoch': 1.0}


# How many parameters in Expert 1 Coder

In [2]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

# Load the model
model_path = "./gpt2-codesearchnet-dpo-py-js"
model = AutoModelForCausalLM.from_pretrained(model_path)

# Count the number of parameters
total_parameters = count_parameters(model)
print(f"Total number of parameters: {total_parameters}")

Total number of parameters: 124439808


# Test Expert 1 Coder

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

model_path = "./gpt2-codesearchnet-dpo-py-js"

# Configure bitsandbytes quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant = False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Load the model with quantization config
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    #quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    cache_dir=cache_dir  
)
model.eval()

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)

# Move model to GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

# Function to generate text
def generate_simple(model, tokenizer, prompt):
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
    outputs = model.generate(inputs, max_length=50)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Generate text with a custom prompt
prompt = "Write python code to add 3 and 1"
generated_text = generate_simple(model, tokenizer, prompt)
print(generated_text)

# Expert 2- Preference Aligner

In [2]:
cache_dir = ''

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
import torch

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v0.6"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant = True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

#config = AutoConfig.from_pretrained(model_id)
# config.max_position_embeddings = 4096

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    #config = config,
    #quantization_config = bnb_config,
    #rope_scaling = {"type": "Linear","factor": 2.0},
    device_map = 'auto',
    #trust_remote_code = False,
    torch_dtype = torch.bfloat16,
    #use_flash_attention_2 = True, # dep apparently
    #attn_implementation="flash_attention_2",
    cache_dir = cache_dir
)

model.to("cuda")


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head): Line

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast = True)

def generate_simple(model, tokenizer, prompt):
  inputs = tokenizer.encode(prompt, return_tensors = "pt").to("cuda")
  outputs = model.generate(inputs, max_length= 50)
  return tokenizer.decode(outputs[0], skip_special_tokens = True)

prompt = "The Quick brown fox"
generated_text = generate_simple(model, tokenizer, prompt)
print(generated_text)

The Quick brown fox jumps over the lazy dog.

The dog: (laughing)

The Quick: (laughing)

The dog: (laughing)

The Quick: (laughing)



# LoRA

In [5]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()

In [5]:
def print_trainable_parameters(model):
  trainable_params = 0
  non_trainable_params = 0
  all_params = 0

  print("Trainable Parameters: ")
  for name, param in model.named_parameters():
    all_params += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()
      print(f" {name}")
    else:
      non_trainable_params += param.numel()

  print("\nNon-Trainable Parameters: ")
  for name, param in model.named_parameters():
    if not param.requires_grad:
      print(f" {name}")


  print(
      f"\nSummary:\n Trainable Params: {trainable_params} \n Non-Trainable params: {non_trainable_params}"
  )
print(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head): Line

In [10]:
from peft import LoraConfig, get_peft_model
peft_config = LoraConfig(
    r = 8,
    lora_alpha = 32,
    target_modules = ["self_attn.q_proj",
                      "self_attn.k_proj",
                      "self_attn.v_proj",
                      "self_attn.o_proj",
                      #"self_attn.rotary_emb.inv_freq",
                      "mlp.gate_proj",
                      "mlp.up_proj",
                      "mlp.down_proj",
                      ],
    lora_dropout = 0.1,
    bias = "none",
    task_type = "CAUSAL_LM"
)
model = get_peft_model(model, peft_config)


The following directories listed in your path were found to be non-existent: {WindowsPath('C')}
The following directories listed in your path were found to be non-existent: {WindowsPath('vs/workbench/api/node/extensionHostProcess')}
The following directories listed in your path were found to be non-existent: {WindowsPath('/matplotlib_inline.backend_inline'), WindowsPath('module')}
The following directories listed in your path were found to be non-existent: {WindowsPath('/usr/local/cuda/lib64')}
DEBUG: Possible options found for libcudart.so: set()
CUDA SETUP: PyTorch settings found: CUDA_VERSION=121, Highest Compute Capability: 8.6.
CUDA SETUP: To manually override the PyTorch CUDA version please see:https://github.com/TimDettmers/bitsandbytes/blob/main/how_to_use_nonpytorch_cuda.md
CUDA SETUP: Loading binary c:\Users\robbi\anaconda3\envs\my_gpu_env_llm\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.so...
argument of type 'WindowsPath' is not iterable
CUDA SETUP: Problem: The 

RuntimeError: 
        CUDA Setup failed despite GPU being available. Please run the following command to get more information:

        python -m bitsandbytes

        Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them
        to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes
        and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast= True)
print(tokenizer)
print(tokenizer.vocab_size)

LlamaTokenizerFast(name_or_path='TinyLlama/TinyLlama-1.1B-Chat-v0.6', vocab_size=32000, model_max_length=2048, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
32000


In [8]:
if '<pad>' not in tokenizer.get_vocab():
  added_tokens = tokenizer.add_special_tokens({"pad_token":"<pad>"})

else:
  added_tokens = 0


if added_tokens > 0:
  model.resize_token_embeddings(len(tokenizer))
  print('\n\nResizing token embeddings for the model\n\n')

model.config.pad_token_id = tokenizer.pad_token_id

assert model.config.pad_token_id == tokenizer.pad_token_id, "The model's pad token is wrong length"
assert model.config.eos_token_id == tokenizer.eos_token_id, "The model's pad token is wrong length"

print('Tokenizer EOS token ID: ', tokenizer.eos_token_id)
print('Tokenizer EOS token: ', tokenizer.decode([tokenizer.eos_token_id]))

print('Model EOS token ID: ', model.config.eos_token_id)
print('Model EOS token: ', tokenizer.decode([model.config.eos_token_id]))

print('Model BOS token ID: ', model.config.eos_token_id)
print('Model BOS token: ', tokenizer.decode([model.config.eos_token_id]))

print(tokenizer)

Tokenizer EOS token ID:  2
Tokenizer EOS token:  </s>
Model EOS token ID:  2
Model EOS token:  </s>
Model BOS token ID:  2
Model BOS token:  </s>
LlamaTokenizerFast(name_or_path='TinyLlama/TinyLlama-1.1B-Chat-v0.6', vocab_size=32000, model_max_length=2048, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	32000: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}


In [9]:
from transformers import TextStreamer
from peft import PeftModel
import torch
import gc
# Define a stream
def stream(user_prompt, model_type, checkpoint=''):

    if model_type == 'base':
        eval_model = model

    elif model_type == 'fine-tuned':
        eval_model = PeftModel.from_pretrained(model, checkpoint)  # Assuming PeftModel is a pre-trained model class
        eval_model = eval_model.to("cuda")

        for n, p in eval_model.named_parameters():
            if p.device.type == "cpu":
                print(f"{n} is on cpu!")

    else:
        print("You must set the model_type to base or fine-tuned'")
        exit()  # or raise an exception

    #print(f'Proceeding to inference with peft adapters from {checkpoint}')

    eval_model.config.use_cache = True

    # Llama style
    system_prompt = ""
    B_INST, E_INST = "[INST]", "[/INST]"
    prompt = f"{B_INST} {user_prompt.strip()} {E_INST}"

    print(prompt)

    inputs = tokenizer((prompt), return_tensors="pt").to("cuda")

    if "token_type_ids" in inputs:
        del inputs ["token_type_ids"]

    streamer = TextStreamer(tokenizer)

    print(f'eval_model is on: (next(eval_model.parameters()).device)') # Debug i
    print(f'input_ids are on: <inputs ["input_ids"].device)') # Debug line

    # Despite returning the usual output, the streamer will also print the gener
    # = eval_model.generate(**inputs, streamer-streamer)

    _ = eval_model.generate(**inputs, streamer=streamer, max_new_tokens=50,
                          no_repeat_ngram_size=2, top_p=0.9, num_beams=1, do_sample=True)

    torch.cuda.empty_cache()
    gc.collect()

def evaluation(model_type, checkpoint=''):
  questions = [
      "Who are you?",
      "What is the meaning of life?",
      "What is the purpose of building AI?"
  ]

  answers = [
      "",
      "",
      ""
  ]
  for question, answer in zip(questions, answers):
    stream(question, model_type, checkpoint)
    print('\n\n')

print(model.generation_config)

evaluation("base")

GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "max_length": 2048,
  "pad_token_id": 0
}

[INST] Who are you? [/INST]
eval_model is on: (next(eval_model.parameters()).device)
input_ids are on: <inputs ["input_ids"].device)
<s> [INST] Who are you? [/INST]

[INST1] Hello, this is [NAME]. What's up?
You look pretty, I mean really nice. Do you want to go out tonight? I want us to hang out at the bar.
How about



[INST] What is the meaning of life? [/INST]
eval_model is on: (next(eval_model.parameters()).device)
input_ids are on: <inputs ["input_ids"].device)
<s> [INST] What is the meaning of life? [/INST]

The meaning is that we must look within ourselves and make a choice about how we wish to use our life on earth. We can choose to be a good person, to contribute to society, or to live life for ourselves. This quote encourag



[INST] What is the purpose of building AI? [/INST]
eval_model is on: (next(eval_model.parameters()).device)
input_ids are on: <inputs ["input_ids"

In [10]:
from datasets import load_dataset

#dataset = "insub/imdb_prefix20_forDPO_gpt2-large-imdb-FT_siebert_sentiment-roberta-large-english"
dataset = "objects76/Anthropic-hh-rlhf-dpo"
data = load_dataset(dataset)

In [11]:
#extract text from first row of test
text = data['train'][0]['prompt']

# Tokenize the text
tokens = tokenizer.encode(text, add_special_tokens = True)

# Decode back to text
decoded_text = tokenizer.decode(tokens)

# Print tokens and decoded text
print("Token IDs:", tokens)
print("Decoded text:", decoded_text)

Token IDs: [1, 518, 25580, 29962, 1724, 526, 777, 274, 1558, 3838, 297, 3033, 1674, 29973, 518, 29914, 25580, 29962, 2266, 30010, 29879, 385, 28907, 1051, 29889, 13, 13, 7900, 29892, 270, 860, 29892, 6494, 914, 29892, 274, 2390, 29892, 285, 2707, 29892, 528, 277, 29892, 289, 2335, 29892, 7013, 29881, 29892, 528, 277, 2813, 29892, 528, 277, 23156, 29892, 885, 5450, 398, 29892, 274, 1657, 29892, 377, 487, 29892, 285, 29583, 29892, 528, 277, 29899, 29872, 1218, 29892, 13299, 29892, 13299, 21454, 29892, 285, 2707, 29876, 688, 657, 29892, 541, 386, 1772, 29892, 772, 459, 29892, 28015, 465, 29892, 1302, 384, 2146, 4937, 29892, 408, 845, 1772, 29892, 7339, 16846, 29876, 29892, 282, 790, 29892, 269, 17858, 29892, 13031, 29892, 281, 804, 29892, 432, 1608, 29892, 13299, 29899, 2146, 384, 292, 29892, 286, 579, 9265, 403, 29892, 285, 351, 7085, 29892, 712, 261, 29892, 432, 4981, 29892, 432, 4981, 29899, 1406, 292, 29892, 16810, 12356, 29892, 2243, 329, 29892, 923, 1008, 29892, 363, 7823, 1061, 298

In [17]:
# Trimming the test split to the first 960 rows
data['test'] = data['test'].select(range(50))

In [18]:
model_name = model_id.split("/")[-1]
dataset_name = dataset.split("/")[-1]

context_length = 512
grad_accum = 20
batch_size = 1
fine_tune_tag = 'ROB-DPO'

epochs = 0.1
drive_base_path = './abstractor-dpo-lora'

# Update the save_dir path
save_dir = f'{drive_base_path}/{model_name}_{dataset_name}_{epochs}_epochs_{context_length}_length_{fine_tune_tag}'
print(save_dir)

./abstractor-dpo-lora/TinyLlama-1.1B-Chat-v0.6_Anthropic-hh-rlhf-dpo_0.1_epochs_512_length_ROB-DPO


In [19]:
from transformers import TrainingArguments
from trl import DPOTrainer

training_arguments = TrainingArguments(
    output_dir="Results",
    evaluation_strategy="steps",
    do_eval=True,
    eval_steps=0.25,
    #optim="paged_adamw_8bit",

    optim="adamw_torch",
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=grad_accum,
    per_device_eval_batch_size=batch_size,

    log_level="debug",
    save_steps=0.25,

    logging_steps=10,
    # learning_rate=5e-7,

    learning_rate=1e-5,
    num_train_epochs=epochs,

    # max_steps=steps,
    # warmup_steps=20,

    # ir_scheduler_type="linear",
    lr_scheduler_type="constant",
)

trainer = DPOTrainer(
    model,
    #model_ref,
    args = training_arguments,
    beta = 0.1,
    #peft_config = peft_config,
    train_dataset = data['train'],
    eval_dataset = data['test'],
    tokenizer = tokenizer,

)

model.config.use_cache = False

trainer.train()

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


OutOfMemoryError: CUDA out of memory. Tried to allocate 126.00 MiB. GPU 0 has a total capacty of 4.00 GiB of which 0 bytes is free. Of the allocated memory 10.29 GiB is allocated by PyTorch, and 421.19 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
model.save_pretrained(save_dir)