## Install Libraries

In [7]:
!pip install -q -U transformers datasets accelerate peft trl bitsandbytes

## Load Dataset

In [13]:
from datasets import load_dataset, Dataset
from itertools import islice

# Step 1: Load the dataset in streaming mode
streamed_dataset = load_dataset("XenArcAI/MathX-5M", split="train", streaming=True)

# Step 2: Normalize columns on the fly
def unify_columns(ex):
    if "question" in ex:
        ex["problem"] = ex.pop("question")
    return ex

streamed_dataset = streamed_dataset.map(unify_columns)

# Step 3: Take first 1% (~10k examples) and materialize in memory
subset = list(islice(streamed_dataset, 10000))

dataset = Dataset.from_list(subset).select_columns(
    ["problem", "generated_solution", "expected_answer"]
)

# Inspect
print(dataset)
print(dataset[0])

Resolving data files:   0%|          | 0/213 [00:00<?, ?it/s]

Dataset({
    features: ['problem', 'generated_solution', 'expected_answer'],
    num_rows: 10000
})
{'problem': 'Given a group of \\( N \\) balls consisting of \\( C \\) colors, where the number of balls in each color is represented as \\( n_1, n_2, \\ldots, n_C \\) (with \\( n_1 + n_2 + \\ldots + n_C = N \\)), what is the probability that when \\( A \\) balls are randomly picked (where \\( A \\leq N \\)), the picked balls consist of \\( a_1, a_2, \\ldots, a_C \\) balls of each color, where \\( a_1 + a_2 + \\ldots + a_C = A \\)?', 'generated_solution': "<think>\nOkay, so I need to find the probability that when I pick A balls out of N, where there are C different colors, the number of each color I pick is exactly a1, a2, ..., aC. Hmm, let's think about how to approach this.\n\nFirst, probability problems often involve combinations. The general formula for probability is the number of favorable outcomes divided by the total number of possible outcomes. So, in this case, the favorable o

In [15]:
print(dataset)

Dataset({
    features: ['problem', 'generated_solution', 'expected_answer'],
    num_rows: 10000
})


In [29]:
from IPython.display import display, Markdown

def show_example_jupyter(dataset, index=0, max_solution_len=300):
    example = dataset[index]

    # Wrap problem in Markdown-friendly LaTeX
    problem = example['problem']
    solution = example['generated_solution']
    answer = example['expected_answer']

    # Truncate solution for readability
    if len(solution) > max_solution_len:
        solution = solution[:max_solution_len]
        last_period = solution.rfind('.')
        if last_period != -1:
            solution = solution[:last_period+1] + " ..."

    # Replace inline parentheses like ( N ) with proper LaTeX
    problem = problem.replace("\\(", "$").replace("\\)", "$")
    answer = answer.replace("\\(", "$").replace("\\)", "$")

    display(Markdown(f"### Problem #{index+1}\n{problem}"))
    display(Markdown(f"**Generated Solution (truncated):**\n{solution}"))
    display(Markdown(f"**Expected Answer:**\n{answer}"))
    display(Markdown("---"))

show_example_jupyter(dataset, 0)

### Problem #1
Given a group of $ N $ balls consisting of $ C $ colors, where the number of balls in each color is represented as $ n_1, n_2, \ldots, n_C $ (with $ n_1 + n_2 + \ldots + n_C = N $), what is the probability that when $ A $ balls are randomly picked (where $ A \leq N $), the picked balls consist of $ a_1, a_2, \ldots, a_C $ balls of each color, where $ a_1 + a_2 + \ldots + a_C = A $?

**Generated Solution (truncated):**
<think>
Okay, so I need to find the probability that when I pick A balls out of N, where there are C different colors, the number of each color I pick is exactly a1, a2, ..., aC. Hmm, let's think about how to approach this.

First, probability problems often involve combinations. ...

**Expected Answer:**
$\frac{C_{n_1}^{a_1} \cdot C_{n_2}^{a_2} \cdot \ldots \cdot C_{n_C}^{a_C}}{C_N^A}$

---

## Load the tokenizer

In [39]:
from huggingface_hub import login

login(token="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")

In [34]:
from transformers import AutoTokenizer

model_name = "meta-llama/Llama-3.1-8B-Instruct"  
tokenizer = AutoTokenizer.from_pretrained(model_name)

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct.
401 Client Error. (Request ID: Root=1-68e8f21a-26e182df7281e86e63fef5ee;a68303ff-9520-40c0-9f77-d92d2838ee6f)

Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in.

## Preprocess data

In [27]:
def format_prompt(sample):
    # Combine problem + generated solution into a single string for the model
    user_content = f"Problem:\n{sample['problem']}\n\nSolution:\n{sample['generated_solution']}"
    
    messages = [
        {"role": "system", "content": "You are a helpful assistant that solves math problems."},
        {"role": "user", "content": user_content}
    ]
    
    # Apply your tokenizer/chat template
    return {"text": tokenizer.apply_chat_template(messages, tokenize=False)}

# Apply to dataset
formatted_dataset = dataset.map(format_prompt)

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

NameError: name 'tokenizer' is not defined

## Training

In [None]:
from transformers import TrainingArguments
from peft import LoraConfig

# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Training arguments
training_args = TrainingArguments(
    output_dir="./llama3-8b-math-tuned",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_steps=10,
    max_steps=100,      # Increase this for a more thorough training
    save_steps=50,
    fp16=True,
)

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=formatted_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_args,
)

In [None]:
trainer.train()

## Export adapters

In [None]:
adapter_path = "./llama3-8b-math-tuned-adapters"
trainer.save_model(adapter_path)

print(f"LoRA adapters saved to {adapter_path}")

## Merge with base model

In [None]:
from peft import PeftModel

# --- Reload the base model without quantization ---
# This is important for merging and for Ollama compatibility
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# --- Load the PeftModel with the adapters ---
model = PeftModel.from_pretrained(base_model, adapter_path)

# --- Merge the weights and save the new model ---
model = model.merge_and_unload()

merged_model_path = "./llama3-8b-math-merged"
model.save_pretrained(merged_model_path)
tokenizer.save_pretrained(merged_model_path)

print(f"Merged model saved to {merged_model_path}")

In [None]:
!zip -r llama3-8b-math-merged.zip ./llama3-8b-math-merged