In [29]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip install transformers accelerate peft


Looking in indexes: https://download.pytorch.org/whl/cpu


In [41]:
# Validate the datsets

import pandas as pd

# Load dataset
file_path = "Datasets/probability_dataset.csv"
df = pd.read_csv(file_path)

# Display first few rows
print(df.head())
print(df.columns)  # Show column names


       Subject                                           Question Answer  \
0  Probability  What is the probability of rolling a 3 on a fa...    1/6   
1  Probability  What is the probability of rolling a 2 on a fa...    1/6   
2  Probability  What is the probability of rolling a 4 on a fa...    1/6   
3  Probability  What is the probability of rolling a 5 on a fa...    1/6   
4  Probability  What is the probability of rolling a 4 on a fa...    1/6   

  Difficulty  
0       Easy  
1       Easy  
2       Easy  
3       Easy  
4       Easy  
Index(['Subject', 'Question', 'Answer', 'Difficulty'], dtype='object')


In [42]:
# Rename relevant columns
df = df.rename(columns={"Question": "prompt", "Answer": "response"})

# Remove unnecessary columns
df = df[["prompt", "response"]]

# Save cleaned dataset
cleaned_file_path = "Datasets/cleaned_probability_dataset.csv"
df.to_csv(cleaned_file_path, index=False)

print("Dataset cleaned and saved!")


Dataset cleaned and saved!


In [43]:
from datasets import load_dataset

# Load from cleaned CSV file
dataset = load_dataset("csv", data_files="Datasets/cleaned_probability_dataset.csv")

# Check dataset structure
print(dataset)


Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['prompt', 'response'],
        num_rows: 300
    })
})


In [48]:
from transformers import AutoTokenizer

# Load Phi-2 tokenizer
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set padding token
tokenizer.pad_token = tokenizer.eos_token  # ✅ Fix missing padding token

# Tokenization function
def tokenize_function(examples):
    text_list = [q + " " + a for q, a in zip(examples["prompt"], examples["response"])]  # ✅ Ensure list format
    tokenized_output = tokenizer(text_list, truncation=True, padding="max_length", max_length=512)

    # ✅ Add labels (labels = input_ids for causal language modeling)
    tokenized_output["labels"] = tokenized_output["input_ids"].copy()
    
    return tokenized_output

# Apply tokenization
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Remove original text columns
tokenized_dataset = tokenized_dataset.remove_columns(["prompt", "response"])

print("✅ Dataset tokenized successfully with labels!")




Map:   0%|          | 0/300 [00:00<?, ? examples/s]

✅ Dataset tokenized successfully with labels!


In [20]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "microsoft/phi-2"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model in BF16 (bfloat16) - Best for Mac M3
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,  # Optimized for Apple Silicon
    device_map="auto"  # Uses Apple Metal backend
)

print("Phi-2 model loaded successfully with BF16!")




model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Phi-2 model loaded successfully with BF16!


In [None]:
# Fine-tuning full models requires a lot of memory, so we use LoRA (efficient fine-tuning).

from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,  # LoRA rank (lower = less memory)
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"],  # Focus on attention layers
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

print("LoRA applied to Phi-2 for efficient fine-tuning!")





LoRA applied to Phi-2 for efficient fine-tuning!


In [49]:
# set up the training parameters and fine-tune

from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./fine-tuned-phi2",
    per_device_train_batch_size=1,  # Small batch size for Mac M3
    gradient_accumulation_steps=8,  # Helps with memory efficiency
    learning_rate=2e-4,
    num_train_epochs=3,
    bf16=True,  # Enable BF16 for best performance
    save_strategy="epoch",
    logging_steps=10,
    remove_unused_columns=False  # ✅ Fixes column mismatch issue
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],  # ✅ Now using tokenized inputs
    tokenizer=tokenizer
)

trainer.train()




  trainer = Trainer(


Step,Training Loss
10,2.9506
20,0.0827
30,0.0509
40,0.0308
50,0.0256
60,0.0152
70,0.0116
80,0.0107
90,0.0087
100,0.0081


TrainOutput(global_step=111, training_loss=0.288666075701733, metrics={'train_runtime': 1333.8506, 'train_samples_per_second': 0.675, 'train_steps_per_second': 0.083, 'total_flos': 7167237999820800.0, 'train_loss': 0.288666075701733, 'epoch': 2.9333333333333336})

In [59]:
## Training took ~22 min

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model_name = "microsoft/phi-2"  # Base model
fine_tuned_model_path = "/Users/abhijitroy/Downloads/Edumate_phi/fine-tuned-phi2"

# Load base Phi-2 model
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

# Load fine-tuned LoRA adapter
model = PeftModel.from_pretrained(base_model, fine_tuned_model_path)

print("✅ LoRA adapter loaded successfully!")




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ LoRA adapter loaded successfully!




In [60]:
# Merge LoRA adapters into the base model
merged_model = model.merge_and_unload()

# Save the full model
save_path = "/Users/abhijitroy/Downloads/Edumate_phi/final_model"
merged_model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print("✅ Full model saved at:", save_path)


✅ Full model saved at: /Users/abhijitroy/Downloads/Edumate_phi/final_model


In [63]:

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Define the model path
model_path = "/Users/abhijitroy/Downloads/Edumate_phi/final_model"

# Load the merged model
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

print("✅ Model loaded successfully!")


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use mps:0


✅ Model loaded successfully!


In [64]:
# List of probability questions for testing
questions = [
    "What is the probability of rolling two sixes in a row?",
    "If a coin is flipped twice, what is the probability of getting at least one heads?",
    "A bag contains 3 red and 2 blue balls. What is the probability of drawing a red ball?"
]

# Generate answers
for q in questions:
    result = pipe(q, max_length=100, num_return_sequences=1)
    print(f"🔹 Question: {q}")
    print(f"✅ Model Answer: {result[0]['generated_text']}\n")


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


🔹 Question: What is the probability of rolling two sixes in a row?
✅ Model Answer: What is the probability of rolling two sixes in a row?

Answer: The probability of rolling two sixes in a row is 1/36.

Exercise 3:
What is the probability of flipping a coin and getting heads twice in a row?

Answer: The probability of flipping a coin and getting heads twice in a row is 1/4.

Exercise 4:
What is the probability of rolling a number less than 4 on a six-sided



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


🔹 Question: If a coin is flipped twice, what is the probability of getting at least one heads?
✅ Model Answer: If a coin is flipped twice, what is the probability of getting at least one heads?
    """
    total_outcomes = 2**2
    no_of_outcomes_with_no_heads = 1
    no_of_outcomes_with_at_least_one_heads = total_outcomes - no_of_outcomes_with_no_heads
    probability_of_at_least_one_heads

🔹 Question: A bag contains 3 red and 2 blue balls. What is the probability of drawing a red ball?
✅ Model Answer: A bag contains 3 red and 2 blue balls. What is the probability of drawing a red ball?

Answer: The probability of drawing a red ball is 3/5.

Exercise 2:
A coin is flipped 3 times. What is the probability of getting heads on all 3 flips?

Answer: The probability of getting heads on all 3 flips is 1/8.

Exercise 3:
A deck of cards contains 52 cards. What is the probability of



In [None]:
def generate_probability_question(difficulty="medium"):
    """
    Generates a probability question based on the selected difficulty level.

    Args:
    - difficulty (str): Choose from "easy", "medium", or "hard".

    Returns:
    - str: Generated probability question.
    """
    
    # Define prompt based on difficulty level
    prompts = {
        "easy": "Generate an easy probability question:",
        "medium": "Generate a medium-level probability question:",
        "hard": "Generate a hard probability question:"
    }

    # Ensure valid difficulty
    if difficulty not in prompts:
        raise ValueError("Invalid difficulty! Choose from: 'easy', 'medium', or 'hard'.")

    # Generate question
    question = pipe(prompts[difficulty], 
                    max_length=100, 
                    num_return_sequences=1, 
                    temperature=0.8,  # Add randomness
                    top_p=0.9,  # Nucleus sampling
                    top_k=50)  # Limit word selection

    # Extract generated text and clean up prompt
    return question[0]['generated_text'].replace(prompts[difficulty], '').strip()


In [1]:
# Generate questions of different difficulties
print("🔹 Easy Question:", generate_probability_question("easy"))
print("🔹 Medium Question:", generate_probability_question("medium"))
print("🔹 Hard Question:", generate_probability_question("hard"))


NameError: name 'generate_probability_question' is not defined