In [1]:
import json

# Sample dataset of Physics and Math problems
dataset = [
    {
        "subject": "Physics",
        "topic": "Kinematics",
        "difficulty": "Class 11",
        "problem": "A car starts from rest and accelerates uniformly to reach a speed of 20 m/s in 10 seconds. What is the acceleration?",
        "step_by_step_solution": [
            "Initial velocity (u) = 0 m/s (since it starts from rest)",
            "Final velocity (v) = 20 m/s",
            "Time (t) = 10 s",
            "Use the formula: v = u + at",
            "20 = 0 + a * 10",
            "a = 20 / 10 = 2 m/s²"
        ],
        "final_answer": "2 m/s²",
        "formula_used": ["v = u + at"],
        "answer_type": "numerical",
        "tags": ["kinematics", "acceleration"]
    },
    {
        "subject": "Math",
        "topic": "Quadratic Equations",
        "difficulty": "Class 10",
        "problem": "Solve the quadratic equation: x² - 5x + 6 = 0",
        "step_by_step_solution": [
            "We have a quadratic equation in the form ax² + bx + c = 0",
            "Here, a = 1, b = -5, c = 6",
            "Use the quadratic formula: x = [-b ± √(b² - 4ac)] / (2a)",
            "x = [5 ± √(25 - 24)] / 2",
            "x = [5 ± √1] / 2",
            "x = (5 + 1)/2 = 3 or x = (5 - 1)/2 = 2"
        ],
        "final_answer": "x = 2 or x = 3",
        "formula_used": ["x = [-b ± √(b² - 4ac)] / (2a)"],
        "answer_type": "symbolic",
        "tags": ["quadratic", "roots"]
    },
    {
        "subject": "Physics",
        "topic": "Newton's Laws of Motion",
        "difficulty": "Class 11",
        "problem": "A box of mass 10 kg is pulled with a force of 50 N on a frictionless surface. What is the acceleration of the box?",
        "step_by_step_solution": [
            "Force (F) = 50 N",
            "Mass (m) = 10 kg",
            "Use Newton's Second Law: F = ma",
            "50 = 10 * a",
            "a = 50 / 10 = 5 m/s²"
        ],
        "final_answer": "5 m/s²",
        "formula_used": ["F = ma"],
        "answer_type": "numerical",
        "tags": ["newtons laws", "force", "acceleration"]
    },
    {
        "subject": "Math",
        "topic": "Probability",
        "difficulty": "Class 10",
        "problem": "A coin is tossed twice. What is the probability of getting exactly one head?",
        "step_by_step_solution": [
            "Sample space = {HH, HT, TH, TT}",
            "Favorable outcomes = {HT, TH}",
            "Number of favorable outcomes = 2",
            "Total outcomes = 4",
            "Probability = 2 / 4 = 0.5"
        ],
        "final_answer": "0.5",
        "formula_used": ["Probability = (Favorable outcomes) / (Total outcomes)"],
        "answer_type": "numerical",
        "tags": ["probability", "basic probability"]
    }
]

# Save to JSONL format
with open("physics_math_dataset.jsonl", "w", encoding="utf-8") as f:
    for item in dataset:
        f.write(json.dumps(item) + "\n")

print(" Dataset saved to physics_math_dataset.jsonl")


 Dataset saved to physics_math_dataset.jsonl


In [1]:
pip install -U transformers peft accelerate datasets bitsandbytes




In [3]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="physics_math_dataset.jsonl", split="train")

# Check an example
print(dataset[0])


Generating train split: 0 examples [00:00, ? examples/s]

{'subject': 'Physics', 'topic': 'Kinematics', 'difficulty': 'Class 11', 'problem': 'A car starts from rest and accelerates uniformly to reach a speed of 20 m/s in 10 seconds. What is the acceleration?', 'step_by_step_solution': ['Initial velocity (u) = 0 m/s (since it starts from rest)', 'Final velocity (v) = 20 m/s', 'Time (t) = 10 s', 'Use the formula: v = u + at', '20 = 0 + a * 10', 'a = 20 / 10 = 2 m/s²'], 'final_answer': '2 m/s²', 'formula_used': ['v = u + at'], 'answer_type': 'numerical', 'tags': ['kinematics', 'acceleration']}


In [1]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
from transformers import AutoTokenizer

# model_id = "meta-llama/Llama-2-7b-hf"  # You can use Mistral, Zephyr, etc.
model_id = "microsoft/Phi-3-mini-4k-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Important for fp16
tokenizer.padding_side = "right"           # Avoids overflow errors


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [6]:
def format_prompt(example):
    # Combine step-by-step solution and final answer for the output
    solution = "\n".join(example['step_by_step_solution']) + "\n" + example['final_answer']
    return {
        "input_ids": tokenizer(
            f"### Instruction:\n{example['problem']}\n\n### Solution:\n{solution}",
            truncation=True,
            padding="max_length",
            max_length=512,
            return_tensors="pt"
        )["input_ids"][0]
    }

tokenized_dataset = dataset.map(format_prompt)

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

In [7]:
tokenized_dataset

Dataset({
    features: ['subject', 'topic', 'difficulty', 'problem', 'step_by_step_solution', 'final_answer', 'formula_used', 'answer_type', 'tags', 'input_ids'],
    num_rows: 4
})

In [2]:
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)

In [9]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)




lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Added target modules for Phi-3
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [12]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Phi3ForCausalLM(
      (model): Phi3Model(
        (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
        (layers): ModuleList(
          (0-31): 32 x Phi3DecoderLayer(
            (self_attn): Phi3Attention(
              (o_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (

In [13]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="./physics_math_finetune",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    num_train_epochs=3,
    logging_dir="./logs",
    save_total_limit=2,
    save_strategy="epoch",
    report_to="none"
)

In [14]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [15]:

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Supervised Fine-Tuning (SFT) base model

In [16]:
trainer.train()

Step,Training Loss


TrainOutput(global_step=3, training_loss=0.7422953446706136, metrics={'train_runtime': 7.9575, 'train_samples_per_second': 1.508, 'train_steps_per_second': 0.377, 'total_flos': 137287132250112.0, 'train_loss': 0.7422953446706136, 'epoch': 3.0})

In [17]:
model.save_pretrained("physics-math-qlora")

In [18]:
tokenizer.save_pretrained("physics-math-qlora")

('physics-math-qlora/tokenizer_config.json',
 'physics-math-qlora/special_tokens_map.json',
 'physics-math-qlora/chat_template.jinja',
 'physics-math-qlora/tokenizer.model',
 'physics-math-qlora/added_tokens.json',
 'physics-math-qlora/tokenizer.json')

Reward Model (RM) Training separate model

In [19]:
pip install trl peft accelerate transformers datasets wandb


Collecting trl
  Downloading trl-0.20.0-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.20.0-py3-none-any.whl (504 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m504.6/504.6 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: trl
Successfully installed trl-0.20.0


Train Reward Model (RM)

In [2]:
from datasets import load_dataset
from trl import RewardTrainer, RewardConfig
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [4]:
dataset = load_dataset("json", data_files="reward_dataset.jsonl", split="train")

In [5]:
dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 1
})

In [22]:
def preprocess(example):
    chosen = tokenizer(example["prompt"] + example["chosen"], truncation=True, padding="max_length", max_length=512)
    rejected = tokenizer(example["prompt"] + example["rejected"], truncation=True, padding="max_length", max_length=512)
    return {
        "input_ids_chosen": chosen["input_ids"],
        "attention_mask_chosen": chosen["attention_mask"],
        "input_ids_rejected": rejected["input_ids"],
        "attention_mask_rejected": rejected["attention_mask"]
    }

In [7]:
model_id = "microsoft/Phi-3-mini-4k-instruct"

In [10]:
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [23]:
tokenized_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

In [12]:
model_rm = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

In [13]:


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)




lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Added target modules for Phi-3
    bias="none",
    task_type="CAUSAL_LM"
)

model_rm = get_peft_model(model_rm, lora_config)

In [19]:
config = RewardConfig(
    output_dir="./reward_model",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    report_to="none"  # Disable reporting to Weights & Biases
)

In [24]:
trainer = RewardTrainer(model=model_rm, args=config, train_dataset=tokenized_dataset, processing_class=tokenizer)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [25]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss


TrainOutput(global_step=3, training_loss=1.162075122197469, metrics={'train_runtime': 24.2462, 'train_samples_per_second': 0.124, 'train_steps_per_second': 0.124, 'total_flos': 0.0, 'train_loss': 1.162075122197469, 'epoch': 3.0})

In [28]:
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead

config = PPOConfig(
    batch_size=4,
    mini_batch_size=1,
    gradient_accumulation_steps=1
)

In [None]:
model = AutoModelForCausalLM.from_pretrained("/content/physics-math-qlora")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:

tokenizer = AutoTokenizer.from_pretrained("physics-math-qlora")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:



ppo_trainer = PPOTrainer(config, model=model, tokenizer=tokenizer, dataset=prompt_dataset)

ppo_trainer.train()
