In [None]:
# Install required packages
!pip install bitsandbytes peft trl --quiet
!pip install --upgrade datasets --quiet

In [None]:
# Import necessary libraries
from datasets import Dataset, load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer
import torch
import wandb
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login

In [None]:
# Load dataset
data = load_dataset("August4293/Preference-Dataset", split="train")

In [None]:
# Preprocess dataset
def preprocess_data(example):
    example["processed"] = "<s>[INST] " + example['prompt'] + " [/INST] " + example['chosen'] + "</s>"
    return example

In [None]:
data = data.map(preprocess_data).train_test_split(test_size=0.05)

In [None]:
# Split dataset into train and eval
train_dataset = data['train'].rename_column('processed','text')
eval_dataset = data['test'].rename_column('processed','text')

In [None]:
# Remove unnecessary columns
train_dataset = train_dataset.remove_columns(['prompt', 'rejected', 'chosen'])
eval_dataset = eval_dataset.remove_columns(['prompt', 'rejected', 'chosen'])

In [None]:
# Login to Hugging Face
user_secrets = UserSecretsClient()
wandb_token = user_secrets.get_secret("wandb_august")
HF_token = user_secrets.get_secret("HF_august")
login(HF_token)

In [None]:
# Initialize WandB
wandb.login(key = wandb_token)
run = wandb.init(
    project='mistral self-alignment',
    job_type="training",
    name="test run",
    notes=f"Initial SFT run on full dataset of {len(train_dataset)} and 1 epoch"
)

In [None]:
# Initialize base model
base_model = ("/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1")

In [None]:
# Configure BitsAndBytes
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)

In [None]:
# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

In [None]:
# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

In [None]:
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)

In [None]:
# Configure PEFT
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=16,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj","up_proj","down_proj"]
)

In [None]:
# Get PEFT model
model = get_peft_model(model, peft_config)

In [None]:
# Calculate total trainable parameters
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters in PEFT adapter: {total_params}")

In [None]:
# Define training arguments
training_arguments = TrainingArguments(
    output_dir="/kaggle/working/checkpoints",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    save_strategy="steps",
    save_steps=300,
    logging_steps=200,
    learning_rate=2e-4,
    warmup_ratio=0.03,
    evaluation_strategy="steps",
    eval_steps=200
)

In [None]:
# Initialize SFT Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    max_seq_length=2048,
    tokenizer=tokenizer,
    args=training_arguments,
    dataset_text_field="text"
)

In [None]:
# Start training
trainer.train()

# Finish WandB run
wandb.finish()

In [None]:
# Save fine-tuned model
fine_tuned_model_name = "mistral_self_alignment_SFT"
trainer.model.save_pretrained(fine_tuned_model_name)

In [None]:
# Define commit message
commit_message = "Initial adapter with SFT on full dataset and 1 epoch"

In [None]:
# Push model to Hugging Face Hub
trainer.model.push_to_hub(fine_tuned_model_name, commit_message=commit_message, use_temp_dir=False)
tokenizer.push_to_hub(fine_tuned_model_name, commit_message=commit_message, use_temp_dir=False)