In [1]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
!pip install torchviz

In [9]:
from unsloth import FastLanguageModel

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW

from trl import DPOTrainer
from datasets import load_dataset
from functools import partial

from tqdm import tqdm

import wandb

from transformers import TrainingArguments



In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

In [17]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "tayyibsupercool/Phi_3.5_mini-resource_allocation-energy_efficiecy_instruct_10k",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = "", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2025.1.6: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [6]:
model_ref, _ = FastLanguageModel.from_pretrained(
    model_name = "tayyibsupercool/Phi_3.5_mini-resource_allocation-energy_efficiecy_instruct_10k",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = "", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2025.1.6: Fast Llama patching. Transformers: 4.47.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [7]:
# Map dataset to the format DPOTrainer expects
def preprocess(example):
    return {
        "prompt": example["instruction"] + example["input"],
        "chosen": example["most_preferred_response"],
        "rejected": example["least_preferred_response"]
    }

In [31]:
dataset = load_dataset("json", data_files="dpo_dataset(1).json")
dataset = dataset.map(preprocess)
dataset['train'][0]

{'sample_index': '4178',
 'instruction': 'Take a deep breath and work on this problem step-by-step. You are a mathematical tool to predict some model. Your job is to predict B for given A. The following is the dataset that you can use for the prediction.',
 'input': 'If A is -56, -352, -172, -32,',
 'most_preferred_response': '0,52.',
 'least_preferred_response': '0,13.',
 'prompt': 'Take a deep breath and work on this problem step-by-step. You are a mathematical tool to predict some model. Your job is to predict B for given A. The following is the dataset that you can use for the prediction.If A is -56, -352, -172, -32,',
 'chosen': '0,52.',
 'rejected': '0,13.'}

In [18]:
# Define the output directory
output_dir = "./results"

In [34]:
batch_size = 4
max_length = 512
lr = 1e-6
epochs = 1

In [29]:
dataset = load_dataset("json", data_files=("dpo_dataset(1).json"), split="train")


In [39]:
def train(model, ref_model, dataset, tokenizer, beta, training_args):
    model.train()
    ref_model.eval()

    dpo_trainer = DPOTrainer(
        model,
        ref_model,
        beta=beta,
        train_dataset=dataset,
        tokenizer=tokenizer,
        args=training_args,
        fp16=True,
        max_length=1024,
        max_prompt_length=512
    )

    dpo_trainer.train()

training_args = TrainingArguments(
        learning_rate=lr,
        num_train_epochs=epochs,
        per_device_train_batch_size=batch_size,
        report_to="wandb",
        output_dir='./results',
        logging_steps=10,
        remove_unused_columns=False,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 300,
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 2003,
    )

train(model, model_ref, dataset['train'], tokenizer, 0.1, training_args)

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 10,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 300
 "-____-"     Number of trainable parameters = 29,884,416


Step,Training Loss
10,0.438
20,0.4529
30,0.5067
40,0.4432
50,0.3984
60,0.3992
70,0.451
80,0.4112
90,0.4276
100,0.3577


In [40]:
!pip install huggingface_hub



In [41]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [42]:


model.save_pretrained("phi3.5mini_dpo_10k") # Local saving
tokenizer.save_pretrained("phi3.5mini_dpo_10k")
model.push_to_hub("aamina/phi3.5mini_dpo_10k") # Online saving
tokenizer.push_to_hub("aamina/phi3.5mini_dpo_10k") # Online saving



README.md:   0%|          | 0.00/595 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/120M [00:00<?, ?B/s]

Saved model to https://huggingface.co/aamina/phi3.5mini_dpo_10k


  0%|          | 0/1 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]