In [1]:
import tensorflow as tf
import tqdm
import time
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import tiktoken  # Ensure this is installed

print("TensorFlow version:", tf.__version__)
print("tqdm version:", tqdm.__version__)


TensorFlow version: 2.18.0
tqdm version: 4.66.5


In [2]:
# -------------------------------
# 1. Download and Load GPT-2 Weights
# -------------------------------
from gpt_download3 import download_and_load_gpt2

# Download pretrained GPT-2 weights (124M)
settings, params = download_and_load_gpt2(model_size="124M", models_dir="gpt2")



File already exists and is up-to-date: gpt2/124M/checkpoint




File already exists and is up-to-date: gpt2/124M/encoder.json




File already exists and is up-to-date: gpt2/124M/hparams.json




File already exists and is up-to-date: gpt2/124M/model.ckpt.data-00000-of-00001




File already exists and is up-to-date: gpt2/124M/model.ckpt.index




File already exists and is up-to-date: gpt2/124M/model.ckpt.meta




File already exists and is up-to-date: gpt2/124M/vocab.bpe


In [3]:
# -------------------------------
# 2. Define Base Configuration and Update for GPT-2 Small
# -------------------------------
GPT_CONFIG_124M = {
    "vocab_size": 50257,
    "context_length": 256,  # Base context length (will be updated)
    "emb_dim": 768,
    "n_heads": 12,
    "n_layers": 12,
    "drop_rate": 0.1,
    "qkv_bias": False
}

model_configs = {
    "gpt2-small (124M)": {"emb_dim": 768, "n_layers": 12, "n_heads": 12},
    "gpt2-medium (355M)": {"emb_dim": 1024, "n_layers": 24, "n_heads": 16},
    "gpt2-large (774M)": {"emb_dim": 1280, "n_layers": 36, "n_heads": 20},
    "gpt2-xl (1558M)": {"emb_dim": 1600, "n_layers": 48, "n_heads": 25},
}

model_name = "gpt2-small (124M)"
NEW_CONFIG = GPT_CONFIG_124M.copy()
NEW_CONFIG.update(model_configs[model_name])
# Update context length and enable qkv_bias for compatibility with the weights.
NEW_CONFIG.update({"context_length": 1024, "qkv_bias": True})


In [5]:
# -------------------------------
# 3. Define Model Classes
# -------------------------------

class MultiHeadAttention(nn.Module):
    def __init__(self, d_in, d_out, context_length, dropout, num_heads, qkv_bias=False):
        super().__init__()
        assert (d_out % num_heads == 0), "d_out must be divisible by num_heads"
        self.d_out = d_out
        self.num_heads = num_heads
        self.head_dim = d_out // num_heads

        self.W_query = nn.Linear(d_in, d_out, bias=qkv_bias)
        self.W_key   = nn.Linear(d_in, d_out, bias=qkv_bias)
        self.W_value = nn.Linear(d_in, d_out, bias=qkv_bias)
        self.out_proj = nn.Linear(d_out, d_out)
        self.dropout = nn.Dropout(dropout)
        # Create a causal mask for self-attention
        self.register_buffer("mask", torch.triu(torch.ones(context_length, context_length), diagonal=1))

    def forward(self, x):
        b, num_tokens, d_in = x.shape
        queries = self.W_query(x)
        keys    = self.W_key(x)
        values  = self.W_value(x)
        # Reshape to (b, num_heads, num_tokens, head_dim)
        queries = queries.view(b, num_tokens, self.num_heads, self.head_dim).transpose(1,2)
        keys    = keys.view(b, num_tokens, self.num_heads, self.head_dim).transpose(1,2)
        values  = values.view(b, num_tokens, self.num_heads, self.head_dim).transpose(1,2)
        # Compute scaled dot-product attention with causal mask
        attn_scores = queries @ keys.transpose(2,3)
        mask_bool = self.mask.bool()[:num_tokens, :num_tokens]
        attn_scores.masked_fill_(mask_bool, -float('inf'))
        attn_weights = torch.softmax(attn_scores / (self.head_dim ** 0.5), dim=-1)
        attn_weights = self.dropout(attn_weights)
        context_vec = (attn_weights @ values).transpose(1,2).contiguous().view(b, num_tokens, self.d_out)
        context_vec = self.out_proj(context_vec)
        return context_vec

class LayerNorm(nn.Module):
    def __init__(self, emb_dim):
        super().__init__()
        self.eps = 1e-5
        self.scale = nn.Parameter(torch.ones(emb_dim))
        self.shift = nn.Parameter(torch.zeros(emb_dim))
    def forward(self, x):
        mean = x.mean(dim=-1, keepdim=True)
        var  = x.var(dim=-1, keepdim=True, unbiased=False)
        norm_x = (x - mean) / torch.sqrt(var + self.eps)
        return self.scale * norm_x + self.shift

class GELU(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, x):
        return 0.5 * x * (1 + torch.tanh(torch.sqrt(torch.tensor(2.0/torch.pi)) * (x + 0.044715 * torch.pow(x, 3))))

class FeedForward(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(cfg["emb_dim"], 4 * cfg["emb_dim"]),
            GELU(),
            nn.Linear(4 * cfg["emb_dim"], cfg["emb_dim"])
        )
    def forward(self, x):
        return self.layers(x)

class TransformerBlock(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.att = MultiHeadAttention(
            d_in=cfg["emb_dim"],
            d_out=cfg["emb_dim"],
            context_length=cfg["context_length"],
            dropout=cfg["drop_rate"],
            num_heads=cfg["n_heads"],
            qkv_bias=cfg["qkv_bias"]
        )
        self.ff = FeedForward(cfg)
        self.norm1 = LayerNorm(cfg["emb_dim"])
        self.norm2 = LayerNorm(cfg["emb_dim"])
        self.drop_shortcut = nn.Dropout(cfg["drop_rate"])
    def forward(self, x):
        shortcut = x
        x = self.norm1(x)
        x = self.att(x)
        x = self.drop_shortcut(x)
        x = x + shortcut
        shortcut = x
        x = self.norm2(x)
        x = self.ff(x)
        x = self.drop_shortcut(x)
        x = x + shortcut
        return x

class GPTModel(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.tok_emb = nn.Embedding(cfg["vocab_size"], cfg["emb_dim"])
        self.pos_emb = nn.Embedding(cfg["context_length"], cfg["emb_dim"])
        self.drop_emb = nn.Dropout(cfg["drop_rate"])
        self.trf_blocks = nn.Sequential(*[TransformerBlock(cfg) for _ in range(cfg["n_layers"])])
        self.final_norm = LayerNorm(cfg["emb_dim"])
        self.out_head = nn.Linear(cfg["emb_dim"], cfg["vocab_size"], bias=False)
    def forward(self, in_idx):
        batch_size, seq_len = in_idx.shape
        tok_embeds = self.tok_emb(in_idx)
        pos_embeds = self.pos_emb(torch.arange(seq_len, device=in_idx.device))
        x = tok_embeds + pos_embeds
        x = self.drop_emb(x)
        x = self.trf_blocks(x)
        x = self.final_norm(x)
        logits = self.out_head(x)
        return logits


In [6]:
# -------------------------------
# 4. Load Pretrained GPT-2 Weights into Custom Model
# -------------------------------
def assign(left, right):
    if left.shape != right.shape:
        raise ValueError(f"Shape mismatch. Left: {left.shape}, Right: {right.shape}")
    return torch.nn.Parameter(torch.tensor(right))

def load_weights_into_gpt(gpt, params):
    gpt.pos_emb.weight = assign(gpt.pos_emb.weight, params['wpe'])
    gpt.tok_emb.weight = assign(gpt.tok_emb.weight, params['wte'])
    for b in range(len(params["blocks"])):
        q_w, k_w, v_w = np.split(params["blocks"][b]["attn"]["c_attn"]["w"], 3, axis=-1)
        gpt.trf_blocks[b].att.W_query.weight = assign(gpt.trf_blocks[b].att.W_query.weight, q_w.T)
        gpt.trf_blocks[b].att.W_key.weight = assign(gpt.trf_blocks[b].att.W_key.weight, k_w.T)
        gpt.trf_blocks[b].att.W_value.weight = assign(gpt.trf_blocks[b].att.W_value.weight, v_w.T)
        q_b, k_b, v_b = np.split(params["blocks"][b]["attn"]["c_attn"]["b"], 3, axis=-1)
        gpt.trf_blocks[b].att.W_query.bias = assign(gpt.trf_blocks[b].att.W_query.bias, q_b)
        gpt.trf_blocks[b].att.W_key.bias = assign(gpt.trf_blocks[b].att.W_key.bias, k_b)
        gpt.trf_blocks[b].att.W_value.bias = assign(gpt.trf_blocks[b].att.W_value.bias, v_b)
        gpt.trf_blocks[b].att.out_proj.weight = assign(gpt.trf_blocks[b].att.out_proj.weight, params["blocks"][b]["attn"]["c_proj"]["w"].T)
        gpt.trf_blocks[b].att.out_proj.bias = assign(gpt.trf_blocks[b].att.out_proj.bias, params["blocks"][b]["attn"]["c_proj"]["b"])
        gpt.trf_blocks[b].ff.layers[0].weight = assign(gpt.trf_blocks[b].ff.layers[0].weight, params["blocks"][b]["mlp"]["c_fc"]["w"].T)
        gpt.trf_blocks[b].ff.layers[0].bias = assign(gpt.trf_blocks[b].ff.layers[0].bias, params["blocks"][b]["mlp"]["c_fc"]["b"])
        gpt.trf_blocks[b].ff.layers[2].weight = assign(gpt.trf_blocks[b].ff.layers[2].weight, params["blocks"][b]["mlp"]["c_proj"]["w"].T)
        gpt.trf_blocks[b].ff.layers[2].bias = assign(gpt.trf_blocks[b].ff.layers[2].bias, params["blocks"][b]["mlp"]["c_proj"]["b"])
        gpt.trf_blocks[b].norm1.scale = assign(gpt.trf_blocks[b].norm1.scale, params["blocks"][b]["ln_1"]["g"])
        gpt.trf_blocks[b].norm1.shift = assign(gpt.trf_blocks[b].norm1.shift, params["blocks"][b]["ln_1"]["b"])
        gpt.trf_blocks[b].norm2.scale = assign(gpt.trf_blocks[b].norm2.scale, params["blocks"][b]["ln_2"]["g"])
        gpt.trf_blocks[b].norm2.shift = assign(gpt.trf_blocks[b].norm2.shift, params["blocks"][b]["ln_2"]["b"])
    gpt.final_norm.scale = assign(gpt.final_norm.scale, params["g"])
    gpt.final_norm.shift = assign(gpt.final_norm.shift, params["b"])
    gpt.out_head.weight = assign(gpt.out_head.weight, params["wte"])

# Instantiate our custom GPT model and load the weights
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gpt = GPTModel(NEW_CONFIG)
load_weights_into_gpt(gpt, params)
gpt.to(device)
gpt.eval()

GPTModel(
  (tok_emb): Embedding(50257, 768)
  (pos_emb): Embedding(1024, 768)
  (drop_emb): Dropout(p=0.1, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(in_features=768, out_features=768, bias=True)
        (W_key): Linear(in_features=768, out_features=768, bias=True)
        (W_value): Linear(in_features=768, out_features=768, bias=True)
        (out_proj): Linear(in_features=768, out_features=768, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU()
          (2): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (drop_shortcut): Dropout(p=0.1, inplace=False)
    )
    (1): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(in_features=7

In [7]:
# -------------------------------
# 5. Fine-Tuning Setup: Dataset, DataLoader, Loss Function
# -------------------------------
class MathProblemsDataset(Dataset):
    def __init__(self, json_file, tokenizer, max_length):
        with open(json_file, 'r') as f:
            data = json.load(f)
        self.samples = []
        self.tokenizer = tokenizer
        self.max_length = max_length
        for record in data:
            text = "Problem: " + record["question"] + "\nSolution: " + record["explanation"] + "\n<|endoftext|>"
            token_ids = tokenizer.encode(text, allowed_special={"<|endoftext|>"})
            if len(token_ids) > max_length:
                token_ids = token_ids[:max_length]
            self.samples.append(torch.tensor(token_ids, dtype=torch.long))
    def __len__(self):
        return len(self.samples)
    def __getitem__(self, idx):
        tokens = self.samples[idx]
        input_ids = tokens[:-1]
        target_ids = tokens[1:]
        return input_ids, target_ids

def collate_fn(batch):
    input_ids = [item[0] for item in batch]
    target_ids = [item[1] for item in batch]
    input_ids = torch.nn.utils.rnn.pad_sequence(input_ids, batch_first=True, padding_value=0)
    target_ids = torch.nn.utils.rnn.pad_sequence(target_ids, batch_first=True, padding_value=-100)
    return input_ids, target_ids

# Use tiktoken GPT-2 encoding for our dataset
tokenizer = tiktoken.get_encoding("gpt2")
max_length = NEW_CONFIG["context_length"]  # 1024 tokens as per NEW_CONFIG
dataset = MathProblemsDataset("diverse_math_dataset.json", tokenizer, max_length)
train_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)

def calc_loss_batch(input_batch, target_batch, model, device):
    input_batch = input_batch.to(device)
    target_batch = target_batch.to(device)
    logits = model(input_batch)
    loss = F.cross_entropy(logits.view(-1, logits.size(-1)), target_batch.view(-1), ignore_index=-100)
    return loss

In [8]:
# -------------------------------
# 6. Fine-Tuning Loop
# -------------------------------
optimizer = torch.optim.AdamW(gpt.parameters(), lr=5e-4, weight_decay=0.1)
num_epochs = 5
print("Starting fine-tuning...")
start_time = time.time()
for epoch in range(num_epochs):
    gpt.train()
    epoch_loss = 0.0
    for input_batch, target_batch in train_loader:
        optimizer.zero_grad()
        loss = calc_loss_batch(input_batch, target_batch, gpt, device)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    avg_loss = epoch_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs} -- Average Loss: {avg_loss:.4f}")
end_time = time.time()
print(f"Fine-tuning completed in {(end_time - start_time)/60:.2f} minutes.")

Starting fine-tuning...
Epoch 1/5 -- Average Loss: 0.7339
Epoch 2/5 -- Average Loss: 0.5889
Epoch 3/5 -- Average Loss: 0.5723
Epoch 4/5 -- Average Loss: 0.5570
Epoch 5/5 -- Average Loss: 0.5494
Fine-tuning completed in 177.19 minutes.


In [9]:
# -------------------------------
# 7. Generation Functions with Temperature & Top-k Sampling
# -------------------------------
def text_to_token_ids(text, tokenizer):
    encoded = tokenizer.encode(text, allowed_special={"<|endoftext|>"})
    return torch.tensor(encoded).unsqueeze(0)

def token_ids_to_text(token_ids, tokenizer):
    flat = token_ids.squeeze(0)
    return tokenizer.decode(flat.tolist())

def generate_text_simple(model, idx, max_new_tokens, context_size, eos_token_id=None, temperature=1.0, top_k=None):
    for _ in range(max_new_tokens):
        idx_cond = idx[:, -context_size:]
        with torch.no_grad():
            logits = model(idx_cond)
        logits = logits[:, -1, :]
        # Apply top-k filtering if specified
        if top_k is not None:
            top_logits, _ = torch.topk(logits, top_k)
            min_topk = top_logits[:, -1].unsqueeze(1)
            logits = torch.where(logits < min_topk, torch.full_like(logits, -float('inf')), logits)
        logits = logits / temperature
        probas = torch.softmax(logits, dim=-1)
        idx_next = torch.multinomial(probas, num_samples=1)
        if eos_token_id is not None and idx_next.item() == eos_token_id:
            break
        idx = torch.cat((idx, idx_next), dim=1)
    return idx

# Get the EOS token ID (allowing the special token)
eos_token_id = tokenizer.encode("<|endoftext|>", allowed_special={"<|endoftext|>"})[0]

In [23]:
# -------------------------------
# 8. Generate Sample Text from Fine-Tuned Model
# -------------------------------
gpt.eval()
# Use a more complete prompt that includes "Solution:" so the model knows an answer is expected.
start_context = "Problem: Multiply 3 and 4."
encoded_context = text_to_token_ids(start_context, tokenizer).to(device)
generated_ids = generate_text_simple(
    model=gpt,
    idx=encoded_context,
    max_new_tokens=50,
    context_size=NEW_CONFIG["context_length"],
    eos_token_id=eos_token_id,
    temperature=1.0,
    top_k=50
)
generated_text = token_ids_to_text(generated_ids, tokenizer)
generated_text = generated_text.replace("<|endoftext|>", "").strip()
print("Generated text after fine-tuning:\n", generated_text)


Generated text after fine-tuning:
 Problem: Multiply 3 and 4.
Solution: Find the product by calculating 3 times 4.


In [25]:
import random
import json

def number_to_words(n):
    """
    Convert an integer n (0 <= n <= 1000) into its word representation.
    For example, 23 -> 'twenty-three'.
    """
    ones = ["zero", "one", "two", "three", "four", "five", "six", "seven",
            "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
            "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"]
    tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy",
            "eighty", "ninety"]
    
    if n < 20:
        return ones[n]
    elif n < 100:
        if n % 10 == 0:
            return tens[n // 10]
        else:
            return tens[n // 10] + "-" + ones[n % 10]
    elif n < 1000:
        if n % 100 == 0:
            return ones[n // 100] + " hundred"
        else:
            return ones[n // 100] + " hundred and " + number_to_words(n % 100)
    elif n == 1000:
        return "one thousand"
    else:
        return str(n)

def get_representation_function():
    """
    Returns a function that converts numbers either to their digit
    or word representation, based on a single random decision.
    """
    if random.random() < 0.5:
        return lambda n: number_to_words(n)
    else:
        return lambda n: str(n)

# -------------------------
# Problem Generators Below
# -------------------------

def generate_addition():
    rep = get_representation_function()
    x = random.randint(1, 500)
    y = random.randint(1, 500)
    x_disp, y_disp = rep(x), rep(y)
    
    question_templates = [
        f"What is {x_disp} plus {y_disp}?",
        f"Compute the sum of {x_disp} and {y_disp}.",
        f"Add {x_disp} and {y_disp} together.",
        f"Find the total when {x_disp} is added to {y_disp}.",
        f"Determine the result of adding {x_disp} and {y_disp}.",
        f"If you combine {x_disp} with {y_disp}, what is the sum?",
        f"Calculate {x_disp} plus {y_disp}.",
        f"What do you get when you add {x_disp} and {y_disp}?",
        f"Find out the sum of {x_disp} and {y_disp}.",
        f"Determine how much {x_disp} and {y_disp} add up to.",
        f"Can you find the total of {x_disp} and {y_disp}?",
        f"Figure out the sum of {x_disp} and {y_disp}.",
        f"Sum {x_disp} and {y_disp} to find the result.",
        f"Add up {x_disp} and {y_disp}.",
        f"Calculate the combined total of {x_disp} and {y_disp}."
    ]
    explanation_templates = [
        f"This is an addition problem: add {x_disp} to {y_disp}.",
        f"To solve, sum {x_disp} and {y_disp}.",
        f"Simply add {x_disp} and {y_disp} together.",
        f"Add the two numbers {x_disp} and {y_disp} to get the answer.",
        f"The task is to compute {x_disp} + {y_disp}.",
        f"Determine the sum by combining {x_disp} and {y_disp}.",
        f"Calculate the result of adding {x_disp} and {y_disp}.",
        f"Add {x_disp} with {y_disp} to obtain the total.",
        f"This problem requires the addition of {x_disp} and {y_disp}.",
        f"Find the total by adding the numbers {x_disp} and {y_disp}.",
        f"Compute the sum of {x_disp} and {y_disp} for the solution.",
        f"Sum the numbers {x_disp} and {y_disp} to solve the problem.",
        f"Calculate the combined value of {x_disp} and {y_disp}.",
        f"Find the total by adding {x_disp} and {y_disp} together.",
        f"The answer is the sum of {x_disp} and {y_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_subtraction():
    rep = get_representation_function()
    x = random.randint(100, 1000)
    y = random.randint(1, x)  # Ensure y <= x
    x_disp, y_disp = rep(x), rep(y)
    
    question_templates = [
        f"What is {x_disp} minus {y_disp}?",
        f"Subtract {y_disp} from {x_disp} and give the answer.",
        f"Calculate the difference when {y_disp} is subtracted from {x_disp}.",
        f"Determine the result of {x_disp} less {y_disp}.",
        f"Find the difference between {x_disp} and {y_disp}.",
        f"Compute {x_disp} - {y_disp}.",
        f"If you remove {y_disp} from {x_disp}, what remains?",
        f"Can you subtract {y_disp} from {x_disp}?",
        f"What do you get when you take {y_disp} away from {x_disp}?",
        f"Figure out the result of {x_disp} minus {y_disp}.",
        f"Determine how much {x_disp} exceeds {y_disp}.",
        f"Find out what {x_disp} becomes after subtracting {y_disp}.",
        f"Subtract {y_disp} out of {x_disp} to find the answer.",
        f"Compute the result of {x_disp} with {y_disp} taken away.",
        f"Find the outcome of removing {y_disp} from {x_disp}."
    ]
    explanation_templates = [
        f"This problem asks you to subtract {y_disp} from {x_disp}.",
        f"To solve, compute {x_disp} minus {y_disp}.",
        f"Subtract {y_disp} from {x_disp} to find the difference.",
        f"Determine the difference by removing {y_disp} from {x_disp}.",
        f"Calculate the subtraction: {x_disp} - {y_disp}.",
        f"Remove {y_disp} from {x_disp} to get the result.",
        f"Find out what remains when you take {y_disp} away from {x_disp}.",
        f"Subtract the second number from the first to solve the problem.",
        f"Compute the difference by subtracting {y_disp} from {x_disp}.",
        f"The answer is the result of {x_disp} minus {y_disp}.",
        f"Determine the value left after subtracting {y_disp} from {x_disp}.",
        f"Perform the subtraction: take {y_disp} away from {x_disp}.",
        f"Calculate the remaining value when {y_disp} is subtracted from {x_disp}.",
        f"Find the answer by subtracting {y_disp} from {x_disp}.",
        f"Remove {y_disp} from {x_disp} to determine the answer."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_multiplication():
    rep = get_representation_function()
    x = random.randint(2, 50)
    y = random.randint(2, 50)
    x_disp, y_disp = rep(x), rep(y)
    
    question_templates = [
        f"What is {x_disp} multiplied by {y_disp}?",
        f"Multiply {x_disp} and {y_disp}.",
        f"Calculate the product of {x_disp} and {y_disp}.",
        f"Find the result when {x_disp} is multiplied by {y_disp}.",
        f"Determine the multiplication result of {x_disp} and {y_disp}.",
        f"Compute {x_disp} times {y_disp}.",
        f"If you multiply {x_disp} by {y_disp}, what do you get?",
        f"Can you find the product of {x_disp} and {y_disp}?",
        f"Multiply {x_disp} with {y_disp} and provide the answer.",
        f"Figure out the product of {x_disp} and {y_disp}.",
        f"Find out what {x_disp} times {y_disp} equals.",
        f"Determine the result of multiplying {x_disp} and {y_disp}.",
        f"Compute the multiplication of {x_disp} by {y_disp}.",
        f"Multiply {x_disp} and {y_disp} to find the product.",
        f"Calculate {x_disp} times {y_disp} for the answer."
    ]
    explanation_templates = [
        f"This problem requires multiplying {x_disp} by {y_disp}.",
        f"To solve, compute the product of {x_disp} and {y_disp}.",
        f"Multiply {x_disp} and {y_disp} to get the result.",
        f"Find the product by calculating {x_disp} times {y_disp}.",
        f"Determine the answer by multiplying the two numbers {x_disp} and {y_disp}.",
        f"Perform the multiplication: {x_disp} * {y_disp}.",
        f"Calculate the result of {x_disp} multiplied by {y_disp}.",
        f"Multiply the factors {x_disp} and {y_disp} to obtain the product.",
        f"This involves computing {x_disp} times {y_disp}.",
        f"Find the multiplication result of {x_disp} and {y_disp}.",
        f"Compute the product of {x_disp} with {y_disp}.",
        f"Determine the multiplication outcome of {x_disp} and {y_disp}.",
        f"Calculate the product by multiplying {x_disp} and {y_disp}.",
        f"Multiply {x_disp} with {y_disp} to arrive at the answer.",
        f"Find the answer by performing the multiplication of {x_disp} and {y_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_division():
    rep = get_representation_function()
    # Ensure division comes out as an integer.
    divisor = random.randint(2, 20)
    quotient = random.randint(2, 50)
    dividend = divisor * quotient
    dividend_disp, divisor_disp = rep(dividend), rep(divisor)
    
    question_templates = [
        f"What is {dividend_disp} divided by {divisor_disp}?",
        f"Divide {dividend_disp} by {divisor_disp} and state the quotient.",
        f"Calculate the result of {dividend_disp} divided by {divisor_disp}.",
        f"Find the quotient when {dividend_disp} is divided by {divisor_disp}.",
        f"Determine the division result of {dividend_disp} over {divisor_disp}.",
        f"Compute {dividend_disp} ÷ {divisor_disp}.",
        f"If you split {dividend_disp} into {divisor_disp} equal parts, what is each part?",
        f"How many times does {divisor_disp} go into {dividend_disp}?",
        f"Find out what {dividend_disp} divided by {divisor_disp} equals.",
        f"Determine how many {divisor_disp}'s are contained in {dividend_disp}.",
        f"Calculate the quotient of {dividend_disp} and {divisor_disp}.",
        f"Divide {dividend_disp} equally by {divisor_disp} to get the answer.",
        f"Compute the division: {dividend_disp} divided by {divisor_disp}.",
        f"Figure out the result of dividing {dividend_disp} by {divisor_disp}.",
        f"What do you obtain when {dividend_disp} is divided by {divisor_disp}?"
    ]
    explanation_templates = [
        f"To solve, divide {dividend_disp} by {divisor_disp}.",
        f"This is a division problem: compute {dividend_disp} ÷ {divisor_disp}.",
        f"Find the quotient by dividing {dividend_disp} by {divisor_disp}.",
        f"Perform the division of {dividend_disp} by {divisor_disp} to get the result.",
        f"Divide {dividend_disp} by {divisor_disp} and the answer is the quotient.",
        f"Calculate how many times {divisor_disp} fits into {dividend_disp}.",
        f"Split {dividend_disp} into {divisor_disp} equal parts to find the answer.",
        f"Determine the division result by computing {dividend_disp} ÷ {divisor_disp}.",
        f"Compute the quotient of {dividend_disp} divided by {divisor_disp}.",
        f"Find the answer by performing the division {dividend_disp} over {divisor_disp}.",
        f"Calculate the result of dividing {dividend_disp} by {divisor_disp}.",
        f"Determine the number of groups when {dividend_disp} is split by {divisor_disp}.",
        f"Compute the division to get the quotient of {dividend_disp} by {divisor_disp}.",
        f"Find out how many {divisor_disp}'s make up {dividend_disp}.",
        f"Perform the calculation: {dividend_disp} divided by {divisor_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_apples_word_problem():
    rep = get_representation_function()
    apples1 = random.randint(5, 50)
    apples2 = random.randint(5, 50)
    a1_disp, a2_disp = rep(apples1), rep(apples2)
    
    question_templates = [
        f"Anna has {a1_disp} apples and then receives {a2_disp} more. How many apples does she have in total?",
        f"If you combine {a1_disp} apples with another {a2_disp} apples, what is the overall count?",
        f"John collected {a1_disp} apples and later got {a2_disp} additional apples. Find the total.",
        f"Calculate the sum of {a1_disp} apples and {a2_disp} apples.",
        f"Determine the total number of apples when {a1_disp} apples are increased by {a2_disp}.",
        f"Add {a1_disp} apples to {a2_disp} apples to get the total.",
        f"How many apples are there if you start with {a1_disp} and then add {a2_disp}?",
        f"Combine {a1_disp} apples with {a2_disp} more apples. What is the sum?",
        f"Figure out the total when {a1_disp} apples are mixed with {a2_disp} apples.",
        f"Find the overall count by adding {a1_disp} apples and {a2_disp} apples.",
        f"If someone has {a1_disp} apples and obtains {a2_disp} extra, what is the new total?",
        f"Determine the sum of {a1_disp} and {a2_disp} apples.",
        f"Calculate the complete count by summing {a1_disp} with {a2_disp}.",
        f"Add {a1_disp} apples to {a2_disp} apples to find how many there are.",
        f"What is the total number when {a1_disp} apples are added to {a2_disp} apples?"
    ]
    explanation_templates = [
        f"To solve, add {a1_disp} and {a2_disp} to get the total number of apples.",
        f"This problem requires adding {a1_disp} apples to {a2_disp} apples.",
        f"Simply sum {a1_disp} and {a2_disp} to find the answer.",
        f"Combine the two numbers by adding {a1_disp} with {a2_disp}.",
        f"Calculate the total by performing the addition of {a1_disp} and {a2_disp}.",
        f"Determine the overall count by summing {a1_disp} and {a2_disp}.",
        f"Add the amounts {a1_disp} and {a2_disp} together to find the result.",
        f"Compute the sum of {a1_disp} and {a2_disp} for the total number of apples.",
        f"This is a straightforward addition: {a1_disp} plus {a2_disp}.",
        f"Find the answer by adding the numbers {a1_disp} and {a2_disp}.",
        f"Sum up {a1_disp} and {a2_disp} to get the complete count.",
        f"Add {a1_disp} to {a2_disp} to determine the total apples.",
        f"Calculate the result by adding {a1_disp} and {a2_disp}.",
        f"Combine {a1_disp} with {a2_disp} to obtain the final count.",
        f"Determine the sum of {a1_disp} and {a2_disp} to solve the problem."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_pencil_price_problem():
    rep = get_representation_function()
    cost = random.randint(1, 20)
    count = random.randint(3, 30)
    cost_disp, count_disp = rep(cost), rep(count)
    
    question_templates = [
        f"If one pencil costs ${cost_disp} and you buy {count_disp} pencils, how much do you pay?",
        f"Calculate the total cost for {count_disp} pencils if each pencil is priced at ${cost_disp}.",
        f"What is the overall expense for buying {count_disp} pencils at ${cost_disp} each?",
        f"Determine the total amount spent when purchasing {count_disp} pencils at ${cost_disp} per pencil.",
        f"Find the cost for {count_disp} pencils when one pencil costs ${cost_disp}.",
        f"If you purchase {count_disp} pencils at ${cost_disp} each, what is the total cost?",
        f"Compute the expense by multiplying {count_disp} by ${cost_disp}.",
        f"How much money is spent on {count_disp} pencils if each costs ${cost_disp}?",
        f"Determine the sum spent on {count_disp} pencils given a unit cost of ${cost_disp}.",
        f"Add up the cost by calculating {count_disp} times ${cost_disp}.",
        f"What total do you get when buying {count_disp} pencils at a cost of ${cost_disp} each?",
        f"Calculate the overall price for {count_disp} pencils at ${cost_disp} per unit.",
        f"Determine the expenditure when purchasing {count_disp} pencils, each costing ${cost_disp}.",
        f"If each pencil is ${cost_disp}, what is the price for {count_disp} pencils?",
        f"Compute the total amount by multiplying the unit price ${cost_disp} with the quantity {count_disp}."
    ]
    explanation_templates = [
        f"Multiply ${cost_disp} by {count_disp} to get the total cost.",
        f"Find the product of {count_disp} and ${cost_disp} to determine the expenditure.",
        f"Calculate the total expense by multiplying the cost ${cost_disp} by the number {count_disp}.",
        f"This problem involves multiplication: ${cost_disp} times {count_disp}.",
        f"Determine the amount spent by multiplying ${cost_disp} by {count_disp}.",
        f"Compute the overall cost by calculating {count_disp} multiplied by ${cost_disp}.",
        f"Multiply the unit price ${cost_disp} with the quantity {count_disp} to find the answer.",
        f"Find the total amount by performing the multiplication ${cost_disp} * {count_disp}.",
        f"Calculate the expense by taking the product of ${cost_disp} and {count_disp}.",
        f"Determine the sum spent on pencils by multiplying ${cost_disp} by {count_disp}.",
        f"Compute the total cost through the multiplication of ${cost_disp} and {count_disp}.",
        f"Multiply ${cost_disp} with {count_disp} to get the final price.",
        f"Find the overall expenditure by calculating ${cost_disp} times {count_disp}.",
        f"Determine the spending by multiplying the cost per pencil, ${cost_disp}, by the quantity, {count_disp}.",
        f"Calculate the total expense as ${cost_disp} multiplied by {count_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_fraction_problem():
    rep = get_representation_function()
    x = random.randint(20, 200)
    x_disp = rep(x)
    
    question_templates = [
        f"What is one half of {x_disp}?",
        f"Calculate 50% of {x_disp}.",
        f"Find the value when {x_disp} is divided equally into 2 parts.",
        f"Determine one half of the number {x_disp}.",
        f"Divide {x_disp} by 2 to get one half.",
        f"How much is {x_disp} split into two equal parts?",
        f"If you split {x_disp} into 2 equal sections, what is the size of each?",
        f"Find half of {x_disp} by dividing it by 2.",
        f"Compute one half of {x_disp}.",
        f"Determine 1/2 of {x_disp}.",
        f"What do you obtain when you cut {x_disp} into 2 equal pieces?",
        f"Find the result of halving {x_disp}.",
        f"Calculate the half value of {x_disp}.",
        f"Determine the midpoint of {x_disp} by dividing by 2.",
        f"Split {x_disp} in half to get the answer."
    ]
    explanation_templates = [
        f"To solve, divide {x_disp} by 2.",
        f"Find one half of {x_disp} by performing the division {x_disp} ÷ 2.",
        f"Divide the number {x_disp} by 2 to obtain the half value.",
        f"Calculate 50% of {x_disp} by dividing it by 2.",
        f"Simply split {x_disp} into 2 equal parts.",
        f"Divide {x_disp} equally into two parts to find the answer.",
        f"Perform the division {x_disp} / 2 to get the result.",
        f"Compute the half value by dividing {x_disp} by 2.",
        f"Determine the answer by cutting {x_disp} in half.",
        f"Calculate the midpoint of {x_disp} by a division by 2.",
        f"Find the half by dividing {x_disp} equally.",
        f"Split {x_disp} into two to get the correct answer.",
        f"Divide {x_disp} by two to determine the half.",
        f"Halve {x_disp} to arrive at the answer.",
        f"Perform the operation {x_disp} divided by 2 for the solution."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_percentage_problem():
    rep = get_representation_function()
    base = random.randint(100, 1000)
    percent = random.choice([10, 15, 20, 25, 30])
    base_disp, percent_disp = rep(base), rep(percent)
    
    question_templates = [
        f"What is {percent_disp}% of {base_disp}?",
        f"Calculate {percent_disp}% of {base_disp}.",
        f"Determine the value corresponding to {percent_disp}% of {base_disp}.",
        f"Find the result when {percent_disp}% is applied to {base_disp}.",
        f"Compute {percent_disp}% of the number {base_disp}.",
        f"If you take {percent_disp}% of {base_disp}, what is the value?",
        f"Find out what {percent_disp}% of {base_disp} equals.",
        f"Determine {percent_disp}% of {base_disp} by calculation.",
        f"How much is {percent_disp}% of {base_disp}?",
        f"Calculate the percentage: what is {percent_disp}% of {base_disp}?",
        f"What amount is represented by {percent_disp}% of {base_disp}?",
        f"Find the fraction of {base_disp} that is {percent_disp} percent.",
        f"Determine the part of {base_disp} corresponding to {percent_disp}%.",
        f"Calculate the share of {base_disp} that amounts to {percent_disp}%.",
        f"If {base_disp} represents the whole, what is {percent_disp}% of it?"
    ]
    explanation_templates = [
        f"Convert {percent_disp}% to a decimal and multiply by {base_disp} to solve.",
        f"To find the value, compute ({percent} / 100) * {base_disp}.",
        f"Multiply {base_disp} by {percent/100} to determine {percent_disp}%.",
        f"Find the percentage by calculating {base_disp} times {percent/100}.",
        f"Determine the result by applying the percentage formula to {base_disp}.",
        f"Calculate the fraction {percent}/100 of {base_disp} to obtain the answer.",
        f"Convert the percentage into a decimal and multiply it by {base_disp}.",
        f"Use the formula (percentage/100) * base: ({percent}/100)*{base_disp}.",
        f"Multiply {base_disp} by the decimal equivalent of {percent_disp}%.",
        f"Determine the value by calculating {percent_disp}% of {base_disp}.",
        f"Find the result using the conversion {percent_disp}% = {percent/100} and multiplying by {base_disp}.",
        f"Apply the percentage formula to {base_disp} with {percent_disp}% to solve.",
        f"Calculate the product of {base_disp} and {percent/100} to find the answer.",
        f"Determine the result by converting {percent_disp}% to a decimal and multiplying by {base_disp}.",
        f"Compute {base_disp} times {percent/100} for the percentage value."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_algebra_problem():
    rep = get_representation_function()
    a = random.randint(1, 50)
    x = random.randint(1, 50)
    b = a + x
    a_disp, x_disp, b_disp = rep(a), rep(x), rep(b)
    
    question_templates = [
        f"Solve for x: x + {a_disp} = {b_disp}.",
        f"If x + {a_disp} equals {b_disp}, what is x?",
        f"Find x given that x + {a_disp} is equal to {b_disp}.",
        f"Determine the value of x in the equation: x + {a_disp} = {b_disp}.",
        f"Calculate x when x added to {a_disp} equals {b_disp}.",
        f"In the equation x + {a_disp} = {b_disp}, solve for x.",
        f"Find the missing number x such that x + {a_disp} = {b_disp}.",
        f"Determine x from the equation: x + {a_disp} = {b_disp}.",
        f"Solve the equation: x + {a_disp} = {b_disp} for x.",
        f"Find the value of x if x plus {a_disp} gives {b_disp}.",
        f"If x and {a_disp} sum to {b_disp}, what is x?",
        f"Determine the unknown x in x + {a_disp} = {b_disp}.",
        f"Calculate the missing term x in the equation: x + {a_disp} = {b_disp}.",
        f"Find x when you know that adding {a_disp} to it results in {b_disp}.",
        f"Solve for x in the relation: x + {a_disp} equals {b_disp}."
    ]
    explanation_templates = [
        f"Subtract {a_disp} from {b_disp} to solve for x.",
        f"To find x, subtract {a_disp} from {b_disp} (x = {b_disp} - {a_disp}).",
        f"Isolate x by computing {b_disp} - {a_disp}.",
        f"Subtract {a_disp} from {b_disp} to determine the value of x.",
        f"Perform the subtraction: x = {b_disp} - {a_disp}.",
        f"Find x by removing {a_disp} from {b_disp}.",
        f"Calculate x as the difference: {b_disp} minus {a_disp}.",
        f"Determine the unknown by subtracting {a_disp} from {b_disp}.",
        f"Find the value of x using the operation {b_disp} - {a_disp}.",
        f"Subtract {a_disp} from {b_disp} to isolate x.",
        f"Compute x by evaluating {b_disp} - {a_disp}.",
        f"Determine x through the subtraction {b_disp} minus {a_disp}.",
        f"Isolate x with the calculation: {b_disp} minus {a_disp}.",
        f"Subtract to find x: {b_disp} - {a_disp} equals x.",
        f"Find x by computing the difference between {b_disp} and {a_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_time_problem():
    rep = get_representation_function()
    start_hour = random.randint(1, 12)
    duration = random.randint(1, 8)
    end_hour = start_hour + duration
    start_disp, end_disp = rep(start_hour), rep(end_hour)
    
    question_templates = [
        f"If a train leaves at {start_disp}:00 and arrives at {end_disp}:00, how long is the journey?",
        f"Calculate the travel time for a train departing at {start_disp}:00 and arriving at {end_disp}:00.",
        f"Determine the duration of a trip that starts at {start_disp}:00 and ends at {end_disp}:00.",
        f"Find the length of the journey if it begins at {start_disp}:00 and concludes at {end_disp}:00.",
        f"What is the travel time for a trip starting at {start_disp}:00 and finishing at {end_disp}:00?",
        f"How many hours does a trip take if it starts at {start_disp}:00 and ends at {end_disp}:00?",
        f"Determine the time difference between a departure at {start_disp}:00 and an arrival at {end_disp}:00.",
        f"If a bus departs at {start_disp}:00 and reaches its destination at {end_disp}:00, what is the journey duration?",
        f"Calculate how long the trip lasts when it starts at {start_disp}:00 and ends at {end_disp}:00.",
        f"Find out the travel duration from {start_disp}:00 to {end_disp}:00.",
        f"Determine the time span of a journey that begins at {start_disp}:00 and ends at {end_disp}:00.",
        f"If a vehicle leaves at {start_disp}:00 and arrives at {end_disp}:00, compute the total travel time.",
        f"Calculate the difference in time between {start_disp}:00 and {end_disp}:00.",
        f"How long is the trip if it starts at {start_disp}:00 and finishes at {end_disp}:00?",
        f"Determine the total duration of travel between {start_disp}:00 and {end_disp}:00."
    ]
    explanation_templates = [
        f"Subtract the departure time ({start_disp}:00) from the arrival time ({end_disp}:00) to get the journey duration.",
        f"To solve, compute the difference between {end_disp}:00 and {start_disp}:00.",
        f"Find the travel time by subtracting {start_disp}:00 from {end_disp}:00.",
        f"Determine the time span by calculating {end_disp}:00 minus {start_disp}:00.",
        f"Subtract the start time from the end time to obtain the duration.",
        f"Calculate the journey length by taking the difference between {end_disp}:00 and {start_disp}:00.",
        f"Determine how many hours pass by subtracting {start_disp}:00 from {end_disp}:00.",
        f"Find the answer by computing the time gap: {end_disp}:00 - {start_disp}:00.",
        f"Compute the travel duration by subtracting the departure time from the arrival time.",
        f"Determine the length of the trip using the difference between {end_disp}:00 and {start_disp}:00.",
        f"Subtract the initial time from the final time to determine the journey duration.",
        f"Calculate the elapsed time by finding the difference between {end_disp}:00 and {start_disp}:00.",
        f"Determine the trip duration by subtracting {start_disp}:00 from {end_disp}:00.",
        f"Find the difference in time between {end_disp}:00 and {start_disp}:00 to get the duration.",
        f"Compute the duration by subtracting the start time ({start_disp}:00) from the end time ({end_disp}:00)."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

# List of all problem generator functions
problem_generators = [
    generate_addition,
    generate_subtraction,
    generate_multiplication,
    generate_division,
    generate_apples_word_problem,
    generate_pencil_price_problem,
    generate_fraction_problem,
    generate_percentage_problem,
    generate_algebra_problem,
    generate_time_problem
]

def generate_dataset(n):
    dataset = []
    for _ in range(n):
        generator = random.choice(problem_generators)
        dataset.append(generator())
    return dataset

if __name__ == "__main__":
    n = 10000  # Generate at least 10,000 data points.
    dataset = generate_dataset(n)
    with open("diverse_math_dataset.json", "w") as f:
        json.dump(dataset, f, indent=2)
    print(f"Generated {n} diverse math problem entries in 'diverse_math_dataset.json'.")


Generated 10000 diverse math problem entries in 'diverse_math_dataset.json'.


In [2]:
import json
import random

# -------------------------
# Helper Functions
# -------------------------
def number_to_words(n):
    """
    Convert an integer n (0 <= n <= 1000) into its word representation.
    For example, 23 -> 'twenty-three'.
    """
    ones = ["zero", "one", "two", "three", "four", "five", "six", "seven",
            "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
            "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"]
    tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy",
            "eighty", "ninety"]
    
    if n < 20:
        return ones[n]
    elif n < 100:
        if n % 10 == 0:
            return tens[n // 10]
        else:
            return tens[n // 10] + "-" + ones[n % 10]
    elif n < 1000:
        if n % 100 == 0:
            return ones[n // 100] + " hundred"
        else:
            return ones[n // 100] + " hundred and " + number_to_words(n % 100)
    elif n == 1000:
        return "one thousand"
    else:
        return str(n)

def get_representation_function():
    """
    Returns a function that converts numbers either to their digit
    or word representation, based on one random decision.
    Ensures that all numbers in one problem use the same format.
    """
    if random.random() < 0.5:
        return lambda n: number_to_words(n)
    else:
        return lambda n: str(n)

# -------------------------
# New Shape-Specific Problem Generators with 15 Templates Each
# -------------------------

# --- Rectangle ---
def generate_rectangle_area_problem():
    rep = get_representation_function()
    length = random.randint(2, 50)
    width  = random.randint(2, 50)
    length_disp, width_disp = rep(length), rep(width)
    
    question_templates = [
        f"What is the area of a rectangle with length {length_disp} and width {width_disp}?",
        f"Calculate the area of a rectangle whose sides are {length_disp} and {width_disp}.",
        f"Determine the area of a rectangle with dimensions {length_disp} by {width_disp}.",
        f"Find the area when the length is {length_disp} and the width is {width_disp}.",
        f"Compute the area of a rectangle given the length {length_disp} and width {width_disp}.",
        f"If a rectangle has a length of {length_disp} and a width of {width_disp}, what is its area?",
        f"Determine the total area of a rectangle with side lengths {length_disp} and {width_disp}.",
        f"Find out the area of a rectangle with a length of {length_disp} and a width of {width_disp}.",
        f"Calculate the product of {length_disp} and {width_disp} to get the area of the rectangle.",
        f"What is the result of multiplying {length_disp} by {width_disp} for a rectangle's area?",
        f"Compute the area by finding the product of the rectangle's length ({length_disp}) and width ({width_disp}).",
        f"Find the rectangle's area when its length is {length_disp} and its width is {width_disp}.",
        f"Determine the area by multiplying {length_disp} with {width_disp}.",
        f"Add no extra steps: the area of the rectangle is just {length_disp} multiplied by {width_disp}.",
        f"Express the area of a rectangle with sides {length_disp} and {width_disp} as their product."
    ]
    explanation_templates = [
        f"Multiply {length_disp} by {width_disp} to obtain the area.",
        f"The area of a rectangle is calculated as length times width: {length_disp} * {width_disp}.",
        f"Area = length × width, so compute {length_disp} multiplied by {width_disp}.",
        f"To find the area, multiply the length ({length_disp}) by the width ({width_disp}).",
        f"Simply compute the product of {length_disp} and {width_disp} to get the area.",
        f"Multiply the two dimensions: {length_disp} × {width_disp} equals the area.",
        f"Find the area by calculating {length_disp} times {width_disp}.",
        f"Area is the result of multiplying the rectangle's length by its width: {length_disp} x {width_disp}.",
        f"Compute the rectangle's area by taking {length_disp} and multiplying it by {width_disp}.",
        f"The formula for area is length × width, which here is {length_disp} multiplied by {width_disp}.",
        f"To solve, multiply {length_disp} with {width_disp} and that gives the area.",
        f"Simply perform the multiplication: {length_disp} * {width_disp} equals the area.",
        f"Determine the area by computing the product {length_disp} x {width_disp}.",
        f"Multiply {length_disp} and {width_disp} together to yield the area.",
        f"The area is found by multiplying the length ({length_disp}) by the width ({width_disp})."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_rectangle_perimeter_problem():
    rep = get_representation_function()
    length = random.randint(2, 50)
    width  = random.randint(2, 50)
    length_disp, width_disp = rep(length), rep(width)
    
    question_templates = [
        f"What is the perimeter of a rectangle with length {length_disp} and width {width_disp}?",
        f"Calculate the perimeter of a rectangle whose sides are {length_disp} and {width_disp}.",
        f"Determine the perimeter for a rectangle with dimensions {length_disp} by {width_disp}.",
        f"Find the perimeter when the rectangle's length is {length_disp} and its width is {width_disp}.",
        f"Compute the total boundary length of a rectangle with side lengths {length_disp} and {width_disp}.",
        f"If a rectangle has a length of {length_disp} and a width of {width_disp}, what is its perimeter?",
        f"Determine the perimeter by adding all sides of a rectangle with length {length_disp} and width {width_disp}.",
        f"Find out the perimeter of a rectangle that measures {length_disp} in length and {width_disp} in width.",
        f"Calculate the sum of all sides for a rectangle with sides {length_disp} and {width_disp}.",
        f"What is the result of 2 times the sum of {length_disp} and {width_disp}?",
        f"Compute the perimeter using the formula 2 * (length + width): 2 * ({length_disp} + {width_disp}).",
        f"Find the total distance around a rectangle with a length of {length_disp} and a width of {width_disp}.",
        f"Determine the boundary length by computing 2 multiplied by the sum of {length_disp} and {width_disp}.",
        f"Calculate the perimeter as 2({length_disp} + {width_disp}).",
        f"Add the rectangle’s length and width, double it, and you get the perimeter: 2 * ({length_disp} + {width_disp})."
    ]
    explanation_templates = [
        f"Perimeter = 2 × (length + width), so compute 2 * ({length_disp} + {width_disp}).",
        f"Add the length and width, then multiply by 2: 2 * ({length_disp} + {width_disp}).",
        f"To find the perimeter, calculate 2 times the sum of {length_disp} and {width_disp}.",
        f"Multiply the sum of {length_disp} and {width_disp} by 2 to get the perimeter.",
        f"Compute the perimeter using the formula: 2 * (length + width), here 2 * ({length_disp} + {width_disp}).",
        f"Add {length_disp} and {width_disp} and then double the result to obtain the perimeter.",
        f"Find the perimeter by calculating 2 * ({length_disp} + {width_disp}).",
        f"Determine the perimeter by using the formula: Perimeter = 2 × (length + width).",
        f"Multiply ({length_disp} + {width_disp}) by 2 to get the total perimeter.",
        f"To solve, sum {length_disp} and {width_disp} then multiply by 2.",
        f"The perimeter is obtained by 2 * ({length_disp} + {width_disp}).",
        f"Calculate the boundary length by doubling the sum of the sides: 2({length_disp} + {width_disp}).",
        f"Sum the sides and multiply by 2, i.e., 2({length_disp} + {width_disp}) equals the perimeter.",
        f"Double the sum of the rectangle's length and width to determine its perimeter.",
        f"Compute 2 * ({length_disp} + {width_disp}) to find the perimeter."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

# --- Square ---
def generate_square_area_problem():
    rep = get_representation_function()
    side = random.randint(2, 50)
    side_disp = rep(side)
    
    question_templates = [
        f"What is the area of a square with side {side_disp}?",
        f"Calculate the area of a square where each side is {side_disp}.",
        f"Determine the area for a square with side length {side_disp}.",
        f"Find the area of a square whose side is {side_disp}.",
        f"Compute the area of a square with a side measuring {side_disp}.",
        f"Express the area of a square with side {side_disp}.",
        f"If a square has a side of {side_disp}, what is its area?",
        f"Determine the total area enclosed by a square of side {side_disp}.",
        f"Find the area by squaring the side {side_disp}.",
        f"Calculate the square of {side_disp} to get the area of the square.",
        f"Compute {side_disp} multiplied by {side_disp} for the square's area.",
        f"What is {side_disp} times {side_disp} for a square's area?",
        f"Determine the area by finding the product of {side_disp} and itself.",
        f"Calculate the area of a square as the square of its side, {side_disp}².",
        f"Find the enclosed area of a square with side length {side_disp} by computing {side_disp}²."
    ]
    explanation_templates = [
        f"Area of a square is side squared: {side_disp} * {side_disp}.",
        f"Multiply {side_disp} by itself to get the area.",
        f"Square the side {side_disp} to find the area.",
        f"Compute the area by calculating {side_disp}².",
        f"Area = side × side, so here it is {side_disp} × {side_disp}.",
        f"Simply multiply {side_disp} by {side_disp} to obtain the area.",
        f"Find the area by performing {side_disp} multiplied by {side_disp}.",
        f"Calculate the square of {side_disp} to determine the area.",
        f"To solve, compute {side_disp} x {side_disp} for the area.",
        f"Area is the product of the side with itself: {side_disp} x {side_disp}.",
        f"Multiply the side length {side_disp} by itself to yield the area.",
        f"Determine the area by squaring {side_disp}.",
        f"Area is given by {side_disp}², which is {side_disp} times {side_disp}.",
        f"Find the area using the formula: area = side² = {side_disp}².",
        f"Compute the area by taking the square of {side_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_square_perimeter_problem():
    rep = get_representation_function()
    side = random.randint(2, 50)
    side_disp = rep(side)
    
    question_templates = [
        f"What is the perimeter of a square with side {side_disp}?",
        f"Calculate the perimeter of a square where each side measures {side_disp}.",
        f"Determine the perimeter for a square with side length {side_disp}.",
        f"Find the perimeter of a square whose side is {side_disp}.",
        f"Compute the total boundary length of a square with side {side_disp}.",
        f"If a square has a side length of {side_disp}, what is its perimeter?",
        f"Determine the sum of all four sides of a square with side {side_disp}.",
        f"Find out the perimeter of a square with sides equal to {side_disp}.",
        f"Calculate the boundary of a square by multiplying {side_disp} by 4.",
        f"What is 4 times {side_disp} for a square's perimeter?",
        f"Compute the perimeter by adding four sides of length {side_disp}.",
        f"Determine the total length around a square with side {side_disp}.",
        f"Find the perimeter as 4 * {side_disp}.",
        f"Add up all sides of the square: {side_disp} + {side_disp} + {side_disp} + {side_disp}.",
        f"Compute the perimeter by calculating four times the side, i.e., 4({side_disp})."
    ]
    explanation_templates = [
        f"Perimeter of a square is 4 times the side: 4 * {side_disp}.",
        f"Multiply {side_disp} by 4 to get the perimeter.",
        f"Compute the perimeter by calculating 4 × {side_disp}.",
        f"Add the four equal sides: {side_disp} + {side_disp} + {side_disp} + {side_disp}.",
        f"Determine the perimeter as 4 multiplied by {side_disp}.",
        f"Perimeter = 4 x side, so here it is 4 x {side_disp}.",
        f"Find the perimeter by multiplying {side_disp} by 4.",
        f"Calculate 4 * {side_disp} to obtain the perimeter.",
        f"To solve, compute 4 times {side_disp}.",
        f"Perimeter equals 4 times the side length, i.e., 4({side_disp}).",
        f"Multiply {side_disp} by 4 to determine the total boundary length.",
        f"Compute the perimeter by adding up four sides of {side_disp} each.",
        f"Determine the boundary of the square: 4 * {side_disp}.",
        f"Calculate the sum of all four sides: 4({side_disp}).",
        f"Find the total perimeter by computing four times {side_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

# --- Triangle ---
def generate_triangle_area_problem():
    rep = get_representation_function()
    base = random.randint(2, 50)
    height = random.randint(2, 50)
    base_disp, height_disp = rep(base), rep(height)
    
    question_templates = [
        f"What is the area of a triangle with base {base_disp} and height {height_disp}?",
        f"Calculate the area of a triangle given a base of {base_disp} and a height of {height_disp}.",
        f"Determine the area for a triangle with base {base_disp} and height {height_disp}.",
        f"Find the area of a triangle where the base is {base_disp} and the height is {height_disp}.",
        f"Compute the area of a triangle with base {base_disp} and height {height_disp}.",
        f"If a triangle has a base of {base_disp} and height of {height_disp}, what is its area?",
        f"Determine the total area enclosed by a triangle with base {base_disp} and height {height_disp}.",
        f"Find out the area of a triangle with measurements base {base_disp} and height {height_disp}.",
        f"Calculate the area by applying the formula 1/2 * base * height: 1/2 * {base_disp} * {height_disp}.",
        f"What is one half of {base_disp} multiplied by {height_disp} for the triangle's area?",
        f"Compute the area of the triangle using the values {base_disp} for base and {height_disp} for height.",
        f"Determine the area by finding one half of the product {base_disp} and {height_disp}.",
        f"Find the triangle's area using the formula: (base * height) / 2 with base {base_disp} and height {height_disp}.",
        f"Calculate the area by dividing the product of {base_disp} and {height_disp} by 2.",
        f"Express the area of a triangle with base {base_disp} and height {height_disp} as (1/2)({base_disp} x {height_disp})."
    ]
    explanation_templates = [
        f"Area of a triangle = 1/2 * base * height, so compute 1/2 * {base_disp} * {height_disp}.",
        f"Multiply {base_disp} by {height_disp} and then divide by 2 to get the area.",
        f"Calculate the area using the formula: (base * height) / 2, i.e., {base_disp} * {height_disp} / 2.",
        f"Find the area by taking half of the product of {base_disp} and {height_disp}.",
        f"To solve, compute 0.5 * {base_disp} * {height_disp}.",
        f"Determine the area by multiplying {base_disp} and {height_disp} then halving the result.",
        f"Area = (base × height) / 2, so here it is: ({base_disp} × {height_disp}) / 2.",
        f"First multiply {base_disp} by {height_disp}, then divide the product by 2.",
        f"Compute the triangle's area as one half of {base_disp} times {height_disp}.",
        f"Find the area by calculating (1/2) * {base_disp} * {height_disp}.",
        f"Multiply {base_disp} and {height_disp} and then take half to get the area.",
        f"Determine the area with the formula: (base * height)/2 = ({base_disp} * {height_disp})/2.",
        f"Calculate the area by first finding the product {base_disp} * {height_disp} and then dividing by 2.",
        f"Use the formula (base * height) / 2 with {base_disp} and {height_disp} to compute the area.",
        f"Area is computed by halving the product of {base_disp} and {height_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_triangle_perimeter_problem():
    rep = get_representation_function()
    # For simplicity, we use an equilateral triangle.
    side = random.randint(2, 50)
    side_disp = rep(side)
    
    question_templates = [
        f"What is the perimeter of an equilateral triangle with side {side_disp}?",
        f"Calculate the perimeter of an equilateral triangle where each side is {side_disp}.",
        f"Determine the perimeter for an equilateral triangle with side length {side_disp}.",
        f"Find the perimeter of an equilateral triangle whose side is {side_disp}.",
        f"Compute the total boundary of an equilateral triangle with side {side_disp}.",
        f"If an equilateral triangle has a side of {side_disp}, what is its perimeter?",
        f"Determine the sum of the three equal sides of an equilateral triangle with side {side_disp}.",
        f"Find out the perimeter of an equilateral triangle with sides measuring {side_disp}.",
        f"Calculate the total length around an equilateral triangle with side {side_disp}.",
        f"What is three times {side_disp} for the triangle's perimeter?",
        f"Compute the perimeter by adding three equal sides of length {side_disp}.",
        f"Determine the perimeter as 3 * {side_disp} for an equilateral triangle.",
        f"Find the triangle's boundary by multiplying {side_disp} by 3.",
        f"Calculate the sum of all three sides: {side_disp} + {side_disp} + {side_disp}.",
        f"Express the perimeter of the equilateral triangle as 3({side_disp})."
    ]
    explanation_templates = [
        f"Perimeter of an equilateral triangle = 3 × side, so compute 3 * {side_disp}.",
        f"Multiply {side_disp} by 3 to get the perimeter.",
        f"Compute the perimeter as 3 x {side_disp}.",
        f"Add the three equal sides: {side_disp} + {side_disp} + {side_disp}.",
        f"Determine the perimeter by calculating 3 * {side_disp}.",
        f"Perimeter = 3 times the side length, i.e., 3({side_disp}).",
        f"Find the perimeter by multiplying the side {side_disp} by 3.",
        f"Calculate 3 * {side_disp} to obtain the perimeter.",
        f"To solve, compute 3 multiplied by {side_disp}.",
        f"Determine the boundary by summing three sides of {side_disp} each.",
        f"Perimeter = 3({side_disp}), so add {side_disp} three times.",
        f"Multiply {side_disp} by 3 for the total perimeter.",
        f"Compute the sum of the three sides: 3 x {side_disp}.",
        f"Determine the perimeter as three times {side_disp}.",
        f"Find the equilateral triangle's perimeter by calculating 3({side_disp})."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

# --- Circle ---
def generate_circle_area_problem():
    rep = get_representation_function()
    radius = random.randint(2, 20)
    radius_disp = rep(radius)
    
    question_templates = [
        f"What is the area of a circle with radius {radius_disp}?",
        f"Calculate the area of a circle given its radius is {radius_disp}.",
        f"Determine the area for a circle with radius {radius_disp}.",
        f"Find the area of a circle where the radius is {radius_disp}.",
        f"Compute the area of a circle with a radius of {radius_disp}.",
        f"If a circle has a radius of {radius_disp}, what is its area?",
        f"Determine the enclosed area of a circle with radius {radius_disp}.",
        f"Find out the area of a circle that has a radius of {radius_disp}.",
        f"Calculate the area using the circle's radius, which is {radius_disp}.",
        f"Express the area of a circle with radius {radius_disp} using the area formula.",
        f"What is the result of π multiplied by {radius_disp} squared for the area?",
        f"Compute the area as 3.14 * {radius_disp}² for the circle.",
        f"Determine the area by applying the formula: area = π * (radius)² with radius {radius_disp}.",
        f"Find the area by calculating 3.14 times {radius_disp} squared.",
        f"Calculate the area of the circle using the value of {radius_disp} for the radius."
    ]
    explanation_templates = [
        f"Area of a circle = π * radius². Using π ≈ 3.14, compute 3.14 * {radius_disp}².",
        f"Multiply the square of {radius_disp} by 3.14 to find the area.",
        f"Compute the area using the formula: area = 3.14 * (radius)^2, here 3.14 * {radius_disp} * {radius_disp}.",
        f"Find the area by squaring the radius {radius_disp} and multiplying by 3.14.",
        f"To solve, calculate 3.14 * ({radius_disp}^2) to get the area.",
        f"Determine the area by computing π times the square of {radius_disp} (with π ≈ 3.14).",
        f"Area = 3.14 x {radius_disp}², so square {radius_disp} and multiply by 3.14.",
        f"Multiply {radius_disp}² by 3.14 to obtain the area of the circle.",
        f"Calculate the area as 3.14 times the square of the radius {radius_disp}.",
        f"Find the area by applying the circle area formula: 3.14 * {radius_disp} * {radius_disp}.",
        f"Square the radius {radius_disp} and multiply by 3.14 for the area.",
        f"Compute the area using the approximation π ≈ 3.14: 3.14 * {radius_disp}^2.",
        f"Determine the area by evaluating 3.14 * ({radius_disp} x {radius_disp}).",
        f"Multiply 3.14 by {radius_disp} squared to find the area.",
        f"Use the formula area = πr² with r = {radius_disp} and π ≈ 3.14."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_circle_perimeter_problem():
    rep = get_representation_function()
    radius = random.randint(2, 20)
    radius_disp = rep(radius)
    
    question_templates = [
        f"What is the circumference of a circle with radius {radius_disp}?",
        f"Calculate the circumference of a circle given its radius is {radius_disp}.",
        f"Determine the perimeter (circumference) for a circle with radius {radius_disp}.",
        f"Find the circumference of a circle where the radius is {radius_disp}.",
        f"Compute the perimeter of a circle with a radius of {radius_disp}.",
        f"If a circle has a radius of {radius_disp}, what is its circumference?",
        f"Determine the boundary length of a circle with radius {radius_disp}.",
        f"Find out the circumference of a circle that has a radius of {radius_disp}.",
        f"Calculate the circle's perimeter using its radius {radius_disp}.",
        f"Express the circumference of a circle with radius {radius_disp} using the formula.",
        f"What is 2 * π * {radius_disp} for the circumference?",
        f"Compute the circumference as 2 * 3.14 * {radius_disp} for the circle.",
        f"Determine the perimeter by applying the formula: circumference = 2πr with r = {radius_disp}.",
        f"Find the circumference by calculating 2 * 3.14 * {radius_disp}.",
        f"Calculate the boundary of the circle using 2 x 3.14 x {radius_disp}."
    ]
    explanation_templates = [
        f"Circumference of a circle = 2 * π * radius. Using π ≈ 3.14, compute 2 * 3.14 * {radius_disp}.",
        f"Multiply 2, 3.14, and {radius_disp} to get the circumference.",
        f"Compute the perimeter by calculating 2 * 3.14 * {radius_disp}.",
        f"Find the circumference by multiplying {radius_disp} by 2 and then by 3.14.",
        f"To solve, compute 2 * 3.14 * {radius_disp} for the circle's perimeter.",
        f"Determine the circumference using the formula: 2πr ≈ 2 * 3.14 * {radius_disp}.",
        f"Calculate the circle's boundary by evaluating 2 * 3.14 * {radius_disp}.",
        f"Multiply {radius_disp} by 2 and then by 3.14 to determine the circumference.",
        f"Find the perimeter by computing 2 * {radius_disp} * 3.14.",
        f"Express the circumference as 2 * 3.14 * {radius_disp}.",
        f"Compute 2 * 3.14 * {radius_disp} to obtain the circumference.",
        f"Determine the circle's perimeter using 2 * 3.14 * {radius_disp}.",
        f"To find the circumference, multiply {radius_disp} by 2 and then by 3.14.",
        f"Calculate the boundary length as 2 * 3.14 * {radius_disp}.",
        f"Use the formula circumference = 2πr with r = {radius_disp} and π ≈ 3.14."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

# --- Parallelogram ---
def generate_parallelogram_area_problem():
    rep = get_representation_function()
    base = random.randint(2, 50)
    height = random.randint(2, 50)
    base_disp, height_disp = rep(base), rep(height)
    
    question_templates = [
        f"What is the area of a parallelogram with base {base_disp} and height {height_disp}?",
        f"Calculate the area of a parallelogram given a base of {base_disp} and height of {height_disp}.",
        f"Determine the area for a parallelogram with base {base_disp} and height {height_disp}.",
        f"Find the area of a parallelogram where the base is {base_disp} and the height is {height_disp}.",
        f"Compute the area of a parallelogram with base {base_disp} and height {height_disp}.",
        f"If a parallelogram has a base of {base_disp} and a height of {height_disp}, what is its area?",
        f"Determine the total area enclosed by a parallelogram with base {base_disp} and height {height_disp}.",
        f"Find out the area of a parallelogram with measurements base {base_disp} and height {height_disp}.",
        f"Calculate the area using the formula: area = base × height, that is {base_disp} * {height_disp}.",
        f"What is the product of {base_disp} and {height_disp} for the area of a parallelogram?",
        f"Compute the area by multiplying the base ({base_disp}) by the height ({height_disp}).",
        f"Determine the area as {base_disp} multiplied by {height_disp} for the parallelogram.",
        f"Find the area by calculating the product of the base {base_disp} and the height {height_disp}.",
        f"Express the area of the parallelogram as the product of its base and height: {base_disp} x {height_disp}.",
        f"Calculate the area by finding {base_disp} times {height_disp}."
    ]
    explanation_templates = [
        f"Area of a parallelogram = base * height, so compute {base_disp} * {height_disp}.",
        f"Multiply {base_disp} by {height_disp} to obtain the area.",
        f"Calculate the area using the formula: area = base × height, i.e., {base_disp} x {height_disp}.",
        f"Find the area by multiplying the base ({base_disp}) by the height ({height_disp}).",
        f"Simply multiply {base_disp} and {height_disp} to get the area.",
        f"Determine the area by computing the product {base_disp} x {height_disp}.",
        f"Compute the area by taking {base_disp} multiplied by {height_disp}.",
        f"Area is the product of the base and height: {base_disp} x {height_disp}.",
        f"To solve, multiply {base_disp} with {height_disp} and that gives the area.",
        f"Calculate the product of {base_disp} and {height_disp} to find the area.",
        f"Determine the area as the result of {base_disp} multiplied by {height_disp}.",
        f"Multiply the given base and height: {base_disp} * {height_disp}.",
        f"Find the area by performing the multiplication {base_disp} x {height_disp}.",
        f"Compute the area using base × height = {base_disp} × {height_disp}.",
        f"Determine the area by calculating {base_disp} multiplied by {height_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_parallelogram_perimeter_problem():
    rep = get_representation_function()
    base = random.randint(2, 50)
    side = random.randint(2, 50)
    base_disp, side_disp = rep(base), rep(side)
    
    question_templates = [
        f"What is the perimeter of a parallelogram with base {base_disp} and side {side_disp}?",
        f"Calculate the perimeter of a parallelogram where the base is {base_disp} and one side is {side_disp}.",
        f"Determine the perimeter for a parallelogram with base {base_disp} and side length {side_disp}.",
        f"Find the perimeter of a parallelogram with base {base_disp} and side {side_disp}.",
        f"Compute the total boundary of a parallelogram with base {base_disp} and side {side_disp}.",
        f"If a parallelogram has a base of {base_disp} and a side of {side_disp}, what is its perimeter?",
        f"Determine the perimeter by adding the base and side lengths appropriately: 2 * (base + side) = 2 * ({base_disp} + {side_disp}).",
        f"Calculate the perimeter as twice the sum of {base_disp} and {side_disp}.",
        f"What is the result of 2 * ({base_disp} + {side_disp}) for the perimeter?",
        f"Compute the perimeter using the formula: 2 * (base + side) where base is {base_disp} and side is {side_disp}.",
        f"Find the total length around the parallelogram as 2({base_disp} + {side_disp}).",
        f"Determine the boundary by calculating 2 times the sum of {base_disp} and {side_disp}.",
        f"Express the perimeter as 2 * ({base_disp} + {side_disp}).",
        f"Add {base_disp} and {side_disp} and then double the result to get the perimeter.",
        f"Compute the perimeter by evaluating 2({base_disp} + {side_disp})."
    ]
    explanation_templates = [
        f"Perimeter of a parallelogram = 2 × (base + side), so compute 2 * ({base_disp} + {side_disp}).",
        f"Add the base {base_disp} and side {side_disp} then multiply by 2.",
        f"To find the perimeter, calculate 2 * ({base_disp} + {side_disp}).",
        f"Multiply the sum of {base_disp} and {side_disp} by 2 to get the perimeter.",
        f"Compute the perimeter using the formula: 2 * (base + side) = 2({base_disp} + {side_disp}).",
        f"Determine the perimeter by summing the base and side and then doubling the result.",
        f"Find the perimeter by calculating 2({base_disp} + {side_disp}).",
        f"Add {base_disp} and {side_disp} and multiply the result by 2.",
        f"To solve, compute 2 * ({base_disp} + {side_disp}).",
        f"Determine the boundary length using 2 * ({base_disp} + {side_disp}).",
        f"Calculate the total perimeter as 2 times the sum of {base_disp} and {side_disp}.",
        f"Compute 2 * ({base_disp} + {side_disp}) to get the perimeter.",
        f"Find the perimeter by adding {base_disp} and {side_disp} and then multiplying by 2.",
        f"Express the perimeter as 2({base_disp} + {side_disp}).",
        f"Multiply the sum ({base_disp} + {side_disp}) by 2 to obtain the perimeter."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

# --- Trapezoid ---
def generate_trapezoid_area_problem():
    rep = get_representation_function()
    base1 = random.randint(2, 50)
    base2 = random.randint(2, 50)
    height = random.randint(2, 50)
    base1_disp, base2_disp, height_disp = rep(base1), rep(base2), rep(height)
    
    question_templates = [
        f"What is the area of a trapezoid with bases {base1_disp} and {base2_disp} and height {height_disp}?",
        f"Calculate the area of a trapezoid given bases {base1_disp} and {base2_disp} and a height of {height_disp}.",
        f"Determine the area for a trapezoid with bases {base1_disp} & {base2_disp} and height {height_disp}.",
        f"Find the area of a trapezoid where the two bases are {base1_disp} and {base2_disp} and the height is {height_disp}.",
        f"Compute the area of a trapezoid with base lengths {base1_disp} and {base2_disp} and height {height_disp}.",
        f"If a trapezoid has bases {base1_disp} and {base2_disp} and a height of {height_disp}, what is its area?",
        f"Determine the area by applying the formula for a trapezoid with bases {base1_disp} and {base2_disp} and height {height_disp}.",
        f"Find out the area of a trapezoid with measurements: bases {base1_disp} & {base2_disp}, height {height_disp}.",
        f"Calculate the area using the formula: 1/2 * (base1 + base2) * height, with values {base1_disp}, {base2_disp} and {height_disp}.",
        f"Express the area of a trapezoid as 0.5 * ({base1_disp} + {base2_disp}) * {height_disp}.",
        f"What is the result of 0.5 * ({base1_disp} + {base2_disp}) * {height_disp} for the trapezoid's area?",
        f"Compute the area by taking the average of the two bases ({base1_disp} and {base2_disp}) and multiplying by {height_disp}.",
        f"Determine the area of the trapezoid by calculating 1/2 times the sum of {base1_disp} and {base2_disp} times {height_disp}.",
        f"Find the area by evaluating (1/2) * ({base1_disp} + {base2_disp}) * {height_disp}.",
        f"Calculate the trapezoid's area using the formula: area = (base1 + base2)/2 * height, i.e., ({base1_disp} + {base2_disp})/2 * {height_disp}."
    ]
    explanation_templates = [
        f"Area of a trapezoid = 1/2 * (base1 + base2) * height, so compute 1/2 * ({base1_disp} + {base2_disp}) * {height_disp}.",
        f"Add the two bases ({base1_disp} and {base2_disp}), divide by 2, then multiply by the height {height_disp}.",
        f"To find the area, use the formula: (base1 + base2)/2 * height = ({base1_disp} + {base2_disp})/2 * {height_disp}.",
        f"Compute the area by averaging the bases and then multiplying by the height: 0.5 * ({base1_disp} + {base2_disp}) * {height_disp}.",
        f"Determine the area by calculating 1/2 * ({base1_disp} + {base2_disp}) * {height_disp}.",
        f"Multiply the sum of the bases {base1_disp} and {base2_disp} by the height {height_disp} and then take half.",
        f"Calculate the trapezoid's area using the formula: area = 0.5 * (base1 + base2) * height.",
        f"Find the area by computing (1/2) * ({base1_disp} + {base2_disp}) * {height_disp}.",
        f"Express the area as the product of the average of the bases and the height: ({base1_disp} + {base2_disp})/2 * {height_disp}.",
        f"To solve, add {base1_disp} and {base2_disp}, divide by 2, and multiply by {height_disp}.",
        f"Determine the area by performing the operation: 0.5 * ({base1_disp} + {base2_disp}) * {height_disp}.",
        f"Compute the area by finding the average of the bases ({base1_disp} and {base2_disp}) and multiplying by the height {height_disp}.",
        f"Multiply ({base1_disp} + {base2_disp}) by {height_disp} and then divide by 2 to obtain the area.",
        f"Find the area using the formula: (base1 + base2)/2 * height = ({base1_disp} + {base2_disp})/2 * {height_disp}.",
        f"Calculate the area by averaging the bases and then scaling by the height, i.e., 0.5 * ({base1_disp} + {base2_disp}) * {height_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

def generate_trapezoid_perimeter_problem():
    rep = get_representation_function()
    base1 = random.randint(2, 50)
    base2 = random.randint(2, 50)
    side1 = random.randint(2, 50)
    side2 = random.randint(2, 50)
    base1_disp, base2_disp = rep(base1), rep(base2)
    side1_disp, side2_disp = rep(side1), rep(side2)
    
    question_templates = [
        f"What is the perimeter of a trapezoid with sides {base1_disp}, {side1_disp}, {base2_disp}, and {side2_disp}?",
        f"Calculate the perimeter of a trapezoid with side lengths {base1_disp}, {side1_disp}, {base2_disp} and {side2_disp}.",
        f"Determine the perimeter for a trapezoid having sides {base1_disp}, {side1_disp}, {base2_disp} and {side2_disp}.",
        f"Find the perimeter of a trapezoid with side lengths {base1_disp}, {side1_disp}, {base2_disp} and {side2_disp}.",
        f"Compute the total boundary of a trapezoid with sides {base1_disp}, {side1_disp}, {base2_disp}, and {side2_disp}.",
        f"If a trapezoid has sides of lengths {base1_disp}, {side1_disp}, {base2_disp}, and {side2_disp}, what is its perimeter?",
        f"Determine the total perimeter by summing the sides: {base1_disp}, {side1_disp}, {base2_disp} and {side2_disp}.",
        f"Find out the perimeter of a trapezoid with given sides {base1_disp}, {side1_disp}, {base2_disp}, and {side2_disp}.",
        f"Calculate the sum of all four sides: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"What is the result of adding {base1_disp}, {side1_disp}, {base2_disp}, and {side2_disp}?",
        f"Compute the perimeter by summing up the side lengths: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Determine the boundary length by adding the four sides: {base1_disp}, {side1_disp}, {base2_disp}, {side2_disp}.",
        f"Find the trapezoid's perimeter by calculating the total of its sides: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Express the perimeter as the sum of all four sides: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Calculate the total perimeter by adding each side: {base1_disp}, {side1_disp}, {base2_disp}, and {side2_disp}."
    ]
    explanation_templates = [
        f"Perimeter is the sum of all sides: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Add the four side lengths to get the perimeter: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Compute the perimeter by summing up all sides: {base1_disp}, {side1_disp}, {base2_disp}, and {side2_disp}.",
        f"To find the perimeter, add the lengths of all four sides: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Determine the perimeter by calculating the total of the side lengths: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Sum the sides: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp} to obtain the perimeter.",
        f"Find the total boundary length by adding each side: {base1_disp}, {side1_disp}, {base2_disp}, {side2_disp}.",
        f"Compute the total perimeter as the sum of all sides: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Add together the four sides ({base1_disp}, {side1_disp}, {base2_disp}, and {side2_disp}) to get the perimeter.",
        f"Determine the perimeter by summing the values: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Calculate the sum of the side lengths: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp} equals the perimeter.",
        f"Express the perimeter as the total of all sides: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Find the trapezoid's perimeter by computing the sum: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}.",
        f"Determine the overall boundary by adding the four sides together.",
        f"Compute the perimeter by adding up the side lengths: {base1_disp} + {side1_disp} + {base2_disp} + {side2_disp}."
    ]
    return {"question": random.choice(question_templates), "explanation": random.choice(explanation_templates)}

# --- Combined Function ---
def generate_shape_area_perimeter_problem():
    """
    Randomly choose one of the shape-specific problem generators.
    """
    generators = [
        generate_rectangle_area_problem,
        generate_rectangle_perimeter_problem,
        generate_square_area_problem,
        generate_square_perimeter_problem,
        generate_triangle_area_problem,
        generate_triangle_perimeter_problem,
        generate_circle_area_problem,
        generate_circle_perimeter_problem,
        generate_parallelogram_area_problem,
        generate_parallelogram_perimeter_problem,
        generate_trapezoid_area_problem,
        generate_trapezoid_perimeter_problem
    ]
    return random.choice(generators)()

# -------------------------
# Load Existing Dataset and Append New Data
# -------------------------
def load_dataset(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data

def save_dataset(data, file_path):
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=2)

# Load the existing 10,000 entries from diverse_math_dataset.json
existing_data = load_dataset("diverse_math_dataset.json")
print(f"Loaded {len(existing_data)} existing entries.")

# Generate 5,000 new area/perimeter problems (with diverse shapes and 15+ templates each)
new_data = []
num_new = 5000
for _ in range(num_new):
    new_data.append(generate_shape_area_perimeter_problem())

print(f"Generated {len(new_data)} new area/perimeter entries.")

# Append the new entries to the existing dataset
combined_data = existing_data + new_data
print(f"Total combined entries: {len(combined_data)}.")

# Save the expanded dataset to a new file
save_dataset(combined_data, "expanded_math_dataset.json")
print("Expanded dataset saved as 'expanded_math_dataset.json'.")


Loaded 10000 existing entries.
Generated 5000 new area/perimeter entries.
Total combined entries: 15000.
Expanded dataset saved as 'expanded_math_dataset.json'.


In [2]:
import json
import random
import math

# -------------------------
# Helper Functions
# -------------------------
def number_to_words(n):
    """
    Convert an integer n (0 <= n <= 1000) into its word representation.
    For example, 23 -> 'twenty-three'.
    """
    ones = ["zero", "one", "two", "three", "four", "five", "six", "seven",
            "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
            "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"]
    tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy",
            "eighty", "ninety"]
    if n < 20:
        return ones[n]
    elif n < 100:
        if n % 10 == 0:
            return tens[n // 10]
        else:
            return tens[n // 10] + "-" + ones[n % 10]
    elif n < 1000:
        if n % 100 == 0:
            return ones[n // 100] + " hundred"
        else:
            return ones[n // 100] + " hundred and " + number_to_words(n % 100)
    elif n == 1000:
        return "one thousand"
    else:
        return str(n)

def get_representation_function():
    """
    Returns a function that converts numbers either to digits or words.
    All numbers in a single problem will use the same style.
    """
    return (lambda n: number_to_words(n)) if random.random() < 0.5 else (lambda n: str(n))


# -------------------------
# Cube
# -------------------------
def generate_cube_volume_problem():
    rep = get_representation_function()
    side = random.randint(2, 20)
    s = rep(side)
    q_templates = [
        f"What is the volume of a cube with side {s}?",
        f"Calculate the volume of a cube where each side is {s}.",
        f"Determine the volume of a cube having side length {s}.",
        f"Find the volume of a cube with a side of {s}.",
        f"Compute the volume of a cube given its side is {s}.",
        f"If a cube has a side {s}, what is its volume?",
        f"Determine the cube's volume by cubing the side {s}.",
        f"Find the space occupied by a cube with side length {s}.",
        f"Calculate the cube’s volume when its side measures {s}.",
        f"Express the volume as {s} raised to the power of 3.",
        f"Determine the volume by computing {s}³.",
        f"Find the volume of a cube by raising {s} to the third power.",
        f"Compute the internal capacity of a cube with side {s}.",
        f"Express the volume using the formula: volume = side³, where side = {s}.",
        f"Calculate the volume of a cube by taking {s} cubed."
    ]
    e_templates = [
        f"To find the volume of a cube, raise the side length to the power of 3.",
        f"Volume is computed by cubing the side: side³.",
        f"Remember: volume = side × side × side.",
        f"Use the formula: volume = side³.",
        f"Cube the side length to obtain the volume without revealing the numerical value.",
        f"Compute the volume by raising the given side to the third power.",
        f"Apply the formula for a cube's volume: side³.",
        f"Volume equals the side multiplied by itself twice.",
        f"Determine the volume by calculating the cube of the side.",
        f"Remember that volume = side³.",
        f"Use the expression (side)^3 to find the volume.",
        f"Compute the volume using the cube formula, without evaluating numerically.",
        f"Raise the side to the power of 3 to determine the cube’s volume.",
        f"Apply the volume formula for a cube without computing the final result.",
        f"Cube the side to conceptually determine the volume."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_cube_surface_area_problem():
    rep = get_representation_function()
    side = random.randint(2, 20)
    s = rep(side)
    q_templates = [
        f"What is the total surface area of a cube with side {s}?",
        f"Calculate the complete surface area of a cube where each side is {s}.",
        f"Determine the total surface area of a cube with side length {s}.",
        f"Find the surface area for a cube that has a side of {s}.",
        f"Compute the total area covering all faces of a cube with side {s}.",
        f"If a cube has sides of length {s}, what is its total surface area?",
        f"Determine the area covering the entire cube with side {s}.",
        f"Find the sum of the areas of all six faces of a cube with side {s}.",
        f"Calculate the cube’s total surface area when its side measures {s}.",
        f"Express the surface area as 6 multiplied by {s} squared.",
        f"Determine the surface area by computing 6 * {s}².",
        f"Find the area using the formula: total surface area = 6*(side)² for side = {s}.",
        f"Compute the area by multiplying the area of one face by 6, with side {s}.",
        f"Express the cube’s surface area as the sum of 6 faces, each of area {s}².",
        f"Calculate the total exterior area of a cube using side {s}."
    ]
    e_templates = [
        f"Multiply the area of one face (side²) by 6 to obtain the total surface area.",
        f"Surface area = 6 × side²; use this formula without computing the numeric value.",
        f"Remember: total surface area = 6 * (side)².",
        f"Calculate one face’s area and multiply by 6.",
        f"Use the formula: total surface area = 6 * side².",
        f"Determine the area covering the cube by computing 6*(side²).",
        f"Compute the total area by multiplying one face's area by 6.",
        f"Square the side and multiply by 6 to get the surface area.",
        f"Apply the formula: surface area = 6 * side², without revealing the final number.",
        f"To solve, square the side and multiply by 6.",
        f"Multiply one face’s area by 6 to determine the total surface area.",
        f"Determine the surface area using the relationship: total area = 6*(side²).",
        f"Calculate the sum of the areas of all six faces using the cube formula.",
        f"Find the total exterior area of the cube by applying 6*(side squared).",
        f"Compute the surface area as 6 times the area of one face."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_cube_lateral_surface_area_problem():
    rep = get_representation_function()
    side = random.randint(2, 20)
    s = rep(side)
    q_templates = [
        f"What is the lateral surface area of a cube with side {s} (excluding top and bottom)?",
        f"Calculate the side surface area of a cube with side {s}, not including the top and bottom.",
        f"Determine the curved (lateral) area of a cube with side length {s} (exclude the base faces).",
        f"Find the area of the four vertical faces of a cube with side {s}.",
        f"Compute the lateral area (sum of four sides) of a cube with side {s}.",
        f"If a cube has side {s}, what is the total area of its four vertical faces?",
        f"Determine the side (lateral) surface area of a cube with side length {s}.",
        f"Find the total area of the four lateral faces of a cube with side {s}.",
        f"Calculate the area covering the sides (excluding top and bottom) of a cube with side {s}.",
        f"Express the lateral surface area as 4 multiplied by {s} squared.",
        f"Determine the area of the four side faces of a cube by computing 4 * {s}².",
        f"Find the sum of the areas of the vertical faces of a cube with side {s}.",
        f"Compute the lateral area by multiplying {s}² by 4.",
        f"Calculate the side area (excluding top and bottom) of a cube with side {s} using 4*(side)².",
        f"Determine the vertical surface area of a cube by computing 4 * {s}²."
    ]
    e_templates = [
        f"To find the lateral surface area, multiply the area of one vertical face by 4.",
        f"Remember: lateral surface area = 4 × side² (excluding top and bottom).",
        f"Compute the area of the four vertical faces as 4 * (side)².",
        f"Determine the side area by multiplying one face’s area by 4.",
        f"Use the formula: lateral area = 4 * (side)² without computing the final number.",
        f"Multiply the area of a single side (side²) by 4 to get the lateral surface area.",
        f"Find the vertical area by computing 4 * side².",
        f"To solve, use the formula: lateral area = 4 * (side)².",
        f"Determine the side surface area by multiplying one face’s area by 4.",
        f"Remember that lateral area equals 4 times (side)².",
        f"Compute the vertical exterior area as 4 * side² without evaluating numerically.",
        f"Apply the relationship: lateral area = 4 × (side)².",
        f"Calculate the area of the four side faces using 4*(side²).",
        f"Find the total lateral area by multiplying one face’s area by 4.",
        f"Use the formula for lateral surface area: 4 * (side²) for a cube."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

# -------------------------
# Rectangular Prism
# -------------------------
def generate_rect_prism_volume_problem():
    rep = get_representation_function()
    length = random.randint(2, 40)
    width  = random.randint(2, 40)
    height = random.randint(2, 40)
    l, w, h = rep(length), rep(width), rep(height)
    q_templates = [
        f"What is the volume of a rectangular prism with length {l}, width {w}, and height {h}?",
        f"Calculate the volume of a box with dimensions {l} by {w} by {h}.",
        f"Determine the volume of a rectangular prism measuring {l} × {w} × {h}.",
        f"Find the volume of a prism with sides {l}, {w}, and {h}.",
        f"Compute the volume of a box with length {l}, width {w}, and height {h}.",
        f"If a box has dimensions {l}, {w}, and {h}, what is its volume?",
        f"Determine the internal capacity of a rectangular prism with dimensions {l} × {w} × {h}.",
        f"Calculate the product of {l}, {w}, and {h} to find the volume.",
        f"What is the result of multiplying {l} × {w} × {h} for the volume?",
        f"Find the volume by computing {l} * {w} * {h}.",
        f"Express the volume as {l} multiplied by {w} multiplied by {h}.",
        f"Compute the volume by multiplying {l}, {w}, and {h} together.",
        f"Determine the volume using the formula: volume = length × width × height.",
        f"Calculate the box's volume by finding the product {l} * {w} * {h}.",
        f"Find the internal capacity of a prism with dimensions {l}, {w}, and {h} by multiplying them."
    ]
    e_templates = [
        f"To find the volume of a rectangular prism, multiply its length, width, and height.",
        f"Volume is computed as length × width × height.",
        f"Remember: volume = length × width × height.",
        f"Multiply the three dimensions to determine the volume.",
        f"Use the formula: volume = length × width × height without computing the actual number.",
        f"Multiply length, width, and height to get the volume.",
        f"Determine the space inside by multiplying the three measurements.",
        f"Volume equals the product of the three dimensions.",
        f"Compute the volume by taking the product of length, width, and height.",
        f"Apply the formula for volume of a prism: length * width * height.",
        f"Multiply the three sides to obtain the volume.",
        f"Find the volume by computing the multiplication of the dimensions.",
        f"Volume equals the product of the three given numbers.",
        f"Calculate the internal capacity by multiplying length, width, and height.",
        f"Use the standard formula: volume = (length) × (width) × (height)."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_rect_prism_surface_area_problem():
    rep = get_representation_function()
    length = random.randint(2, 40)
    width  = random.randint(2, 40)
    height = random.randint(2, 40)
    l, w, h = rep(length), rep(width), rep(height)
    q_templates = [
        f"What is the total surface area of a rectangular prism with length {l}, width {w}, and height {h}?",
        f"Calculate the total surface area of a box with dimensions {l} × {w} × {h}.",
        f"Determine the surface area for a rectangular prism measuring {l} by {w} by {h}.",
        f"Find the surface area of a prism with sides {l}, {w}, and {h}.",
        f"Compute the total area covering all faces of a rectangular prism with dimensions {l}, {w}, and {h}.",
        f"If a box has dimensions {l}, {w}, and {h}, what is its surface area?",
        f"Determine the total area of all faces of a rectangular prism with dimensions {l}, {w}, and {h}.",
        f"Calculate the sum of the areas of all six faces for a box of dimensions {l}, {w}, and {h}.",
        f"Find the surface area using the formula 2*(lw + lh + wh) for dimensions {l}, {w}, and {h}.",
        f"Express the surface area as 2 times the sum of ({l}×{w}) + ({l}×{h}) + ({w}×{h}).",
        f"Compute the total area covering a rectangular prism with dimensions {l}, {w}, {h} using its formula.",
        f"Determine the boundary area by applying 2*(l×w + l×h + w×h).",
        f"Calculate the surface area of a box with dimensions {l}, {w}, and {h} without revealing the number.",
        f"Find the total exterior area of a rectangular prism with dimensions {l}, {w}, {h}.",
        f"Express the overall surface area as 2*(lw + lh + wh) for a box with dimensions {l}, {w}, {h}."
    ]
    e_templates = [
        f"Surface area is calculated by summing the areas of all faces: 2*(lw + lh + wh).",
        f"Remember: total surface area = 2*(length×width + length×height + width×height).",
        f"Compute the area by adding the areas of each pair of identical faces and doubling the sum.",
        f"Use the formula: area = 2*(lw + lh + wh) without computing the numeric value.",
        f"Multiply the sum of the areas of the three distinct faces by 2 to get the total area.",
        f"Determine the surface area by applying 2*(l×w + l×h + w×h) conceptually.",
        f"To solve, add the areas of all faces and multiply by 2.",
        f"Calculate the total exterior area using the standard formula for a box.",
        f"Remember that you must add the areas of three pairs of faces and multiply by 2.",
        f"Use the relationship: surface area = 2*(lw + lh + wh).",
        f"Find the total area by summing the areas of all faces, then doubling the result.",
        f"Apply the standard formula for a rectangular prism's surface area without evaluating it.",
        f"Determine the surface area by computing 2*(lw + lh + wh) conceptually.",
        f"Calculate the total area covering the prism by using the given formula.",
        f"Sum the areas of all six faces using 2*(lw + lh + wh) without revealing the computed value."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_rect_prism_lateral_surface_area_problem():
    rep = get_representation_function()
    length = random.randint(2, 40)
    width  = random.randint(2, 40)
    height = random.randint(2, 40)
    l, w, h = rep(length), rep(width), rep(height)
    q_templates = [
        f"What is the lateral surface area of a rectangular prism with length {l}, width {w}, and height {h} (excluding top and bottom)?",
        f"Calculate the side surface area of a box with dimensions {l} × {w} × {h}, excluding its top and bottom.",
        f"Determine the curved surface area (lateral area) of a rectangular prism with dimensions {l}, {w}, {h}, not including the base faces.",
        f"Find the area of the vertical faces of a rectangular prism with length {l}, width {w}, and height {h} (excluding top and bottom).",
        f"Compute the lateral surface area of a box with dimensions {l} by {w} by {h}, ignoring the top and bottom.",
        f"If a rectangular prism has dimensions {l}, {w}, and {h}, what is the total area of its four vertical faces?",
        f"Determine the lateral area (sum of the vertical faces) of a rectangular prism with dimensions {l}, {w}, and {h}.",
        f"Find the vertical surface area of a box with dimensions {l} × {w} × {h} by excluding the top and bottom.",
        f"Calculate the sum of the areas of the four vertical faces of a prism with dimensions {l}, {w}, and {h}.",
        f"Express the lateral surface area as 2h(l+w) for a rectangular prism with dimensions {l}, {w}, {h}.",
        f"Determine the side area by computing 2 * {h} * ({l} + {w}) for a box with dimensions {l}, {w}, {h}.",
        f"Find the lateral surface area using the formula 2h(l+w) for a rectangular prism with dimensions {l}, {w}, {h}.",
        f"Compute the vertical exterior area by calculating 2 * {h} * ({l} + {w}) for the prism.",
        f"Determine the area of the four sides of a box using 2({l}+{w}) multiplied by the height {h}.",
        f"Calculate the lateral surface area (excluding top and bottom) of a rectangular prism using the formula 2h(l+w)."
    ]
    e_templates = [
        f"To find the lateral surface area, use the formula: lateral area = 2 * height * (length + width).",
        f"Multiply the height by the sum of length and width, then double it to get the lateral area.",
        f"Remember: lateral surface area = 2h(l+w).",
        f"Calculate the side area by using the formula 2 * (length + width) * height.",
        f"Use the formula: lateral area = 2h(l+w) without computing the numeric value.",
        f"Determine the vertical exterior area by multiplying height by (length + width) and then by 2.",
        f"Apply the formula 2h(l+w) to compute the lateral surface area conceptually.",
        f"To solve, add length and width, multiply by the height, then double the result.",
        f"Compute the lateral area using the relationship: 2 * height * (length + width).",
        f"Remember that the side surface area is given by 2h(l+w).",
        f"Multiply the height by (length + width) and double it to find the lateral area.",
        f"Determine the lateral surface area with the formula: 2h(l+w) without numerical evaluation.",
        f"Use the concept: lateral area = 2 * height * (length + width) without revealing the result.",
        f"Calculate the vertical area by adding length and width, then multiplying by 2 and the height.",
        f"Apply the formula for lateral surface area: 2h(l+w) conceptually."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

# -------------------------
# Cuboid (same as Rectangular Prism)
# -------------------------
def generate_cuboid_volume_problem():
    rep = get_representation_function()
    length = random.randint(2, 40)
    width  = random.randint(2, 40)
    height = random.randint(2, 40)
    l_disp, w_disp, h_disp = rep(length), rep(width), rep(height)
    q_templates = [
        f"What is the volume of a cuboid with length {l_disp}, width {w_disp}, and height {h_disp}?",
        f"Calculate the volume of a cuboid measuring {l_disp} by {w_disp} by {h_disp}.",
        f"Determine the volume of a cuboid with dimensions {l_disp} × {w_disp} × {h_disp}.",
        f"Find the volume of a cuboid whose length is {l_disp}, width is {w_disp}, and height is {h_disp}.",
        f"Compute the volume of a cuboid with a length of {l_disp}, a width of {w_disp}, and a height of {h_disp}.",
        f"If a cuboid has dimensions {l_disp}, {w_disp}, and {h_disp}, what is its volume?",
        f"Determine the internal capacity of a cuboid with dimensions {l_disp} × {w_disp} × {h_disp}.",
        f"Find the volume of a cuboid by multiplying its length {l_disp}, width {w_disp}, and height {h_disp}.",
        f"Calculate the product of {l_disp}, {w_disp}, and {h_disp} to get the volume of a cuboid.",
        f"Express the volume of a cuboid as {l_disp} multiplied by {w_disp} multiplied by {h_disp}.",
        f"Determine the cuboid's volume by computing {l_disp} * {w_disp} * {h_disp}.",
        f"Compute the volume using the formula: volume = length × width × height for a cuboid with dimensions {l_disp}, {w_disp}, {h_disp}.",
        f"Find the internal volume of a cuboid with sides {l_disp}, {w_disp}, and {h_disp}.",
        f"Express the cuboid’s volume in terms of its dimensions: {l_disp} × {w_disp} × {h_disp}.",
        f"Calculate the volume of a cuboid by multiplying its length, width, and height: {l_disp} * {w_disp} * {h_disp}."
    ]
    e_templates = [
        f"To find the volume of a cuboid, multiply its length, width, and height.",
        f"Volume is computed as length × width × height for a cuboid.",
        f"Remember: the volume of a cuboid equals the product of its three dimensions.",
        f"Calculate the volume by multiplying the cuboid’s length, width, and height.",
        f"Use the formula: volume = length × width × height for a cuboid, without computing the numeric result.",
        f"Multiply the three dimensions of the cuboid to determine its volume.",
        f"Determine the internal space by taking the product of the cuboid’s length, width, and height.",
        f"Volume equals the product of the three sides of a cuboid; use that method.",
        f"Compute the cuboid’s volume by multiplying its length, width, and height together.",
        f"Apply the standard formula for volume of a cuboid: volume = length × width × height.",
        f"Multiply the length, width, and height of the cuboid to get its volume.",
        f"Determine the cuboid's capacity by calculating the product of its dimensions.",
        f"Use the relationship: volume = (length) × (width) × (height) for a cuboid.",
        f"Calculate the internal capacity by multiplying all three dimensions of the cuboid.",
        f"Express the volume of a cuboid as the multiplication of its length, width, and height."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_cuboid_lateral_surface_area_problem():
    rep = get_representation_function()
    length = random.randint(2, 40)
    width  = random.randint(2, 40)
    height = random.randint(2, 40)
    l, w, h = rep(length), rep(width), rep(height)
    # Lateral (side) surface area for a cuboid (excluding top and bottom) is given by the sum of the areas of the four vertical faces:
    # lateral area = 2h(l + w)
    q_templates = [
        f"What is the lateral surface area of a cuboid with length {l}, width {w}, and height {h} (excluding top and bottom)?",
        f"Calculate the side surface area of a cuboid with dimensions {l} × {w} × {h}, not including its top and bottom.",
        f"Determine the lateral area of a cuboid with length {l}, width {w}, and height {h}, excluding the base faces.",
        f"Find the area of the vertical faces of a cuboid with dimensions {l}, {w}, and {h} (excluding top and bottom).",
        f"Compute the lateral surface area of a cuboid by calculating 2 * {h} * ({l} + {w}).",
        f"If a cuboid has dimensions {l}, {w}, and {h}, what is the total area of its four vertical faces?",
        f"Determine the side (lateral) surface area of a cuboid with length {l}, width {w}, and height {h}.",
        f"Find the vertical surface area of a cuboid with dimensions {l} × {w} × {h} (excluding the top and bottom faces).",
        f"Calculate the sum of the areas of the four vertical faces of a cuboid with dimensions {l}, {w}, and {h}.",
        f"Express the lateral surface area as 2 × {h} × ({l} + {w}) for a cuboid with dimensions {l}, {w}, and {h}.",
        f"Determine the cuboid’s side area by computing 2 * {h} * ({l} + {w}).",
        f"Find the lateral area by adding the lengths of the vertical sides: 2 * {h} * ({l} + {w}).",
        f"Compute the vertical area of a cuboid using the formula: lateral area = 2h(l + w) for dimensions {l}, {w}, {h}.",
        f"Determine the side surface area of a cuboid by calculating 2 * {h} * ({l} + {w}).",
        f"Calculate the lateral (vertical) surface area of a cuboid with dimensions {l}, {w}, and {h} using the formula 2h(l+w)."
    ]
    e_templates = [
        f"To find the lateral surface area of a cuboid, use the formula: lateral area = 2 * height * (length + width).",
        f"Multiply the height by the sum of length and width, then double it to get the lateral area.",
        f"Remember: lateral surface area = 2h(l+w) for a cuboid.",
        f"Calculate the side area by using the formula 2 * (length + width) * height.",
        f"Use the formula: lateral area = 2h(l+w) without computing the numeric value.",
        f"Determine the vertical area by multiplying the height by the sum of the length and width, then doubling the result.",
        f"Apply the formula 2h(l+w) to compute the lateral surface area conceptually.",
        f"To solve, add the length and width, multiply by the height, then multiply by 2.",
        f"Compute the lateral area using the relationship: 2 * height * (length + width).",
        f"Remember that the side surface area of a cuboid is given by 2h(l+w).",
        f"Multiply the height by (length + width) and then by 2 to obtain the lateral surface area.",
        f"Determine the lateral area using the formula: lateral area = 2h(l+w) without numerical evaluation.",
        f"Use the expression 2h(l+w) to compute the side area of the cuboid.",
        f"Calculate the vertical surface area by applying the formula 2 * height * (length + width).",
        f"Apply the standard formula for lateral area of a cuboid: 2h(l+w), conceptually."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}


# -------------------------
# Cylinder
# -------------------------
def generate_cylinder_volume_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    height = random.randint(2, 40)
    r, h = rep(radius), rep(height)
    q_templates = [
        f"What is the volume of a cylinder with radius {r} and height {h}?",
        f"Calculate the volume of a cylinder given a radius of {r} and a height of {h}.",
        f"Determine the volume for a cylinder with radius {r} and height {h}.",
        f"Find the volume of a cylinder where the radius is {r} and the height is {h}.",
        f"Compute the volume of a cylinder with dimensions radius {r} and height {h}.",
        f"If a cylinder has a radius of {r} and height {h}, what is its volume?",
        f"Determine the capacity of a cylinder with radius {r} and height {h}.",
        f"Find the internal volume of a cylinder with radius {r} and height {h}.",
        f"Calculate the cylinder’s volume using its radius {r} and height {h}.",
        f"Express the volume as π * (radius)² * height for a cylinder with r = {r} and h = {h}.",
        f"Determine the volume by computing π * {r}² * {h} conceptually.",
        f"Find the volume using the formula: volume = πr²h with r = {r} and h = {h}.",
        f"Compute the volume by applying the standard cylinder formula without numerical evaluation.",
        f"Determine the space inside the cylinder using π * (r)² * h for r = {r} and h = {h}.",
        f"Express the cylinder's volume as the product of π, the square of {r}, and {h}."
    ]
    e_templates = [
        f"Volume of a cylinder is given by π * (radius)² * height; use this formula without computing the number.",
        f"Square the radius, multiply by the height and by π to get the volume.",
        f"Remember the formula: volume = πr²h.",
        f"Compute the volume conceptually by applying π times the square of the radius times the height.",
        f"Use the cylinder volume formula without revealing the final numeric value.",
        f"Multiply the square of the radius by the height and then by π to determine the volume.",
        f"Volume is obtained by the product: π * r² * h.",
        f"Apply the standard formula for a cylinder’s volume: πr²h, without numerical evaluation.",
        f"Remember that volume equals π times the square of the radius times the height.",
        f"To solve, use the formula: volume = π * (radius)² * height.",
        f"Determine the internal space by multiplying π, the square of the radius, and the height.",
        f"Compute the volume by following the formula for a cylinder without calculating the actual number.",
        f"Use the relationship: volume = πr²h, conceptually.",
        f"Multiply π by the square of the radius and the height to get the volume, without evaluation.",
        f"Apply the concept of cylinder volume without computing the numeric result."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_cylinder_surface_area_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    height = random.randint(2, 40)
    r, h = rep(radius), rep(height)
    q_templates = [
        f"What is the total surface area of a cylinder with radius {r} and height {h}?",
        f"Calculate the complete surface area of a cylinder given a radius of {r} and a height of {h}.",
        f"Determine the total surface area for a cylinder with radius {r} and height {h}.",
        f"Find the surface area of a cylinder where the radius is {r} and the height is {h}.",
        f"Compute the total area covering all faces of a cylinder with dimensions radius {r} and height {h}.",
        f"If a cylinder has a radius of {r} and height {h}, what is its total surface area?",
        f"Determine the boundary area of a cylinder with radius {r} and height {h}.",
        f"Find out the surface area of a cylinder with radius {r} and height {h}.",
        f"Calculate the exterior area of a cylinder using its radius {r} and height {h}.",
        f"Express the surface area as the sum of the lateral area and the areas of the two bases for a cylinder with r = {r} and h = {h}.",
        f"Determine the total area using the formula: 2πrh + 2πr² for a cylinder with radius {r} and height {h}.",
        f"Compute the surface area by adding the lateral area and the base areas conceptually.",
        f"Find the complete surface area of a cylinder using its dimensions without evaluating numerically.",
        f"Calculate the total exterior area using the standard cylinder surface area formula for r = {r} and h = {h}.",
        f"Express the cylinder's total surface area as 2πrh + 2πr²."
    ]
    e_templates = [
        f"To find the total surface area, add the lateral area (2πrh) to the area of the two bases (2πr²).",
        f"Remember the formula: total surface area = 2πrh + 2πr².",
        f"Compute the area by calculating the curved surface and the areas of both circular bases.",
        f"Use the formula 2πrh + 2πr² to determine the total surface area conceptually.",
        f"Multiply the circumference of the base by the height for the lateral area and add twice the area of the base.",
        f"Determine the area by summing the lateral area and the base areas using the given formula.",
        f"Apply the formula for total surface area of a cylinder without revealing the computed number.",
        f"Remember that total surface area = lateral area + 2*(base area).",
        f"Calculate the total area by adding the curved area (2πrh) and the area of two circles (2πr²).",
        f"Use the relationship: total area = 2πrh + 2πr², without numerical evaluation.",
        f"Compute the total surface area by following the standard formula conceptually.",
        f"Determine the full exterior area using the formula: 2πrh + 2πr².",
        f"To solve, calculate the lateral area and add the area of two bases, as per the formula.",
        f"Apply the standard cylinder surface area formula without computing the final result.",
        f"Use the formula for a cylinder's total area conceptually: 2πrh + 2πr²."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_cylinder_lateral_surface_area_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    height = random.randint(2, 40)
    r, h = rep(radius), rep(height)
    q_templates = [
        f"What is the lateral (curved) surface area of a cylinder with radius {r} and height {h}?",
        f"Calculate the curved surface area of a cylinder given a radius of {r} and a height of {h} (exclude the bases).",
        f"Determine the lateral surface area for a cylinder with radius {r} and height {h}.",
        f"Find the area of the curved side of a cylinder where the radius is {r} and the height is {h}.",
        f"Compute the lateral area of a cylinder (excluding the top and bottom) with radius {r} and height {h}.",
        f"If a cylinder has a radius {r} and height {h}, what is its curved surface area?",
        f"Determine the area of the cylindrical side (lateral surface area) for a cylinder with radius {r} and height {h}.",
        f"Find out the curved area of a cylinder with radius {r} and height {h}, ignoring the bases.",
        f"Calculate the curved surface area using the formula 2πrh for a cylinder with radius {r} and height {h}.",
        f"Express the lateral surface area as 2π × {r} × {h} for a cylinder.",
        f"Determine the curved area by applying the formula: lateral area = 2πrh with r = {r} and h = {h}.",
        f"Compute the area of just the side of a cylinder by calculating 2πrh conceptually.",
        f"Find the curved area of a cylinder using the relationship: lateral area = 2πrh.",
        f"Calculate the lateral surface area by multiplying 2, π, the radius {r}, and the height {h}.",
        f"Determine the curved surface area of a cylinder with radius {r} and height {h} using 2πrh."
    ]
    e_templates = [
        f"To find the lateral surface area, use the formula: lateral area = 2πrh.",
        f"Remember that the curved surface area is given by 2π times the radius times the height.",
        f"Multiply 2, π, the radius, and the height to compute the lateral area without numerical evaluation.",
        f"Apply the formula 2πrh to determine the curved surface area, excluding the bases.",
        f"Determine the lateral area by using 2πrh without computing the number.",
        f"Compute the curved area by multiplying the given dimensions with 2π, without evaluation.",
        f"Remember: lateral area = 2πrh for a cylinder.",
        f"To solve, multiply 2, π, the radius, and the height conceptually.",
        f"Apply the relationship: lateral area = 2πrh without numerical computation.",
        f"Determine the curved surface area by multiplying the radius, height, and 2π.",
        f"Use the formula 2πrh to find the lateral area without revealing the result.",
        f"Compute the lateral surface area by following the formula: 2πrh.",
        f"Multiply the radius by the height and 2π to obtain the curved area.",
        f"To find the lateral area, use the product 2πrh conceptually.",
        f"Apply the standard method for lateral area of a cylinder: 2πrh, without evaluation."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

# -------------------------
# Sphere
# -------------------------
def generate_sphere_volume_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    r = rep(radius)
    q_templates = [
        f"What is the volume of a sphere with radius {r}?",
        f"Calculate the volume of a sphere given a radius of {r}.",
        f"Determine the volume for a sphere with radius {r}.",
        f"Find the volume of a sphere where the radius is {r}.",
        f"Compute the volume of a sphere with a radius of {r}.",
        f"If a sphere has a radius {r}, what is its volume?",
        f"Determine the space occupied by a sphere with radius {r}.",
        f"Find the sphere's volume using its radius {r}.",
        f"Calculate the volume using the sphere’s radius {r}.",
        f"Express the sphere’s volume as (4/3)π * {r}³.",
        f"Determine the volume by computing (4/3)π * {r}³.",
        f"Find the volume of a sphere by cubing the radius {r} and multiplying by (4/3)π.",
        f"Compute the volume using the formula: volume = (4/3)πr³ for r = {r}.",
        f"Express the volume conceptually as (4/3)π times the cube of {r}.",
        f"Calculate the sphere’s volume using the standard formula without numeric evaluation."
    ]
    e_templates = [
        f"To find the volume of a sphere, use the formula: volume = (4/3)π * (radius)³.",
        f"Remember that the volume of a sphere is (4/3)π times the cube of the radius.",
        f"Cube the radius and multiply by (4/3)π to get the volume.",
        f"Apply the formula (4/3)πr³ without computing the exact value.",
        f"Use the sphere volume formula: volume = (4/3)πr³ conceptually.",
        f"Multiply (4/3)π by the cube of the radius without numerical evaluation.",
        f"Volume equals (4/3) times π times the cube of the radius; do not reveal the result.",
        f"To solve, apply the formula: volume = (4/3)π * r³.",
        f"Compute the volume by following the sphere formula without numerical computation.",
        f"Remember the key formula for a sphere’s volume: (4/3)πr³.",
        f"Cube the radius, multiply by π, then by 4/3, without computing the final number.",
        f"Apply the standard method for a sphere’s volume without revealing the numeric answer.",
        f"Determine the volume conceptually by using (4/3)π times the cube of the radius.",
        f"Use the formula (4/3)πr³ to determine the sphere's volume without calculation.",
        f"Follow the procedure: cube the radius and multiply by (4/3)π without evaluation."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_sphere_surface_area_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    r = rep(radius)
    q_templates = [
        f"What is the total surface area of a sphere with radius {r}?",
        f"Calculate the surface area of a sphere given a radius of {r}.",
        f"Determine the surface area for a sphere with radius {r}.",
        f"Find the surface area of a sphere where the radius is {r}.",
        f"Compute the surface area of a sphere with a radius of {r}.",
        f"If a sphere has a radius {r}, what is its surface area?",
        f"Determine the area covering a sphere with radius {r}.",
        f"Find the total exterior area of a sphere with radius {r}.",
        f"Calculate the sphere's surface area using its radius {r}.",
        f"Express the surface area as 4π * {r}².",
        f"Determine the area by computing 4π * {r}².",
        f"Find the total area of a sphere using the formula 4πr² for radius {r}.",
        f"Compute the surface area by squaring the radius {r} and multiplying by 4π.",
        f"Express the sphere’s surface area as 4 times π times {r} squared.",
        f"Calculate the surface area of a sphere using the standard formula without numerical evaluation."
    ]
    e_templates = [
        f"To find the surface area of a sphere, use the formula: area = 4π * (radius)².",
        f"Surface area is computed as 4 times π times the square of the radius.",
        f"Remember the formula: surface area = 4πr².",
        f"Square the radius and multiply by 4π to determine the area, without computing the number.",
        f"Use the sphere surface area formula without revealing the numeric result.",
        f"Determine the area by applying 4π multiplied by the square of the radius.",
        f"Apply the standard formula: surface area = 4π * r².",
        f"To solve, square the given radius and multiply by 4π conceptually.",
        f"Follow the method: area = 4πr², without performing the multiplication.",
        f"Calculate the sphere’s surface area using the formula without numerical evaluation.",
        f"Remember that surface area = 4π times the square of the radius.",
        f"Use the relationship: area = 4πr², conceptually.",
        f"Compute the area by following the sphere surface area formula, leaving it in formula form.",
        f"Square the radius and multiply by 4π to find the surface area without evaluation.",
        f"Apply the formula for a sphere’s surface area: 4π * (radius)², without revealing the answer."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

# -------------------------
# Cone
# -------------------------
def generate_cone_volume_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    height = random.randint(2, 40)
    r, h = rep(radius), rep(height)
    q_templates = [
        f"What is the volume of a cone with radius {r} and height {h}?",
        f"Calculate the volume of a cone given a radius of {r} and a height of {h}.",
        f"Determine the volume for a cone with radius {r} and height {h}.",
        f"Find the volume of a cone where the radius is {r} and the height is {h}.",
        f"Compute the volume of a cone with dimensions radius {r} and height {h}.",
        f"If a cone has a radius of {r} and height {h}, what is its volume?",
        f"Determine the capacity of a cone with radius {r} and height {h}.",
        f"Find the volume of a cone using its radius {r} and height {h}.",
        f"Calculate the cone’s volume using the formula (1/3)πr²h for r = {r} and h = {h}.",
        f"Express the volume as one third of π * {r}² * {h}.",
        f"Determine the volume by computing (1/3)π * {r}² * {h}.",
        f"Find the volume by applying the cone volume formula with r = {r} and h = {h}.",
        f"Compute the volume conceptually using (1/3)πr²h without revealing the numeric result.",
        f"Apply the standard cone volume formula without computing the final number for r = {r} and h = {h}.",
        f"Calculate the volume of a cone by cubing nothing but using (1/3)πr²h conceptually."
    ]
    e_templates = [
        f"To find the volume of a cone, use the formula: volume = (1/3)π * (radius)² * height.",
        f"Remember: the volume of a cone is one third that of a cylinder with the same base and height.",
        f"Cube not the radius, but square the radius, multiply by height and π, then divide by 3.",
        f"Apply the formula for cone volume: (1/3)πr²h without computing the exact value.",
        f"Use the cone volume formula without revealing the numerical result.",
        f"Multiply the square of the radius by the height and π, then divide by 3 conceptually.",
        f"Volume equals (1/3)πr²h; apply that formula without evaluation.",
        f"To solve, use the formula: volume = (1/3)π * (radius)² * height.",
        f"Compute the volume by following the cone formula without numerical computation.",
        f"Remember the key formula for a cone’s volume: (1/3)πr²h.",
        f"Square the radius, multiply by the height and π, then divide by 3 without revealing the result.",
        f"Apply the standard cone volume formula conceptually: (1/3)πr²h.",
        f"Determine the volume using (1/3)πr²h without performing the multiplication.",
        f"Use the method: cube not the radius, but square it, then multiply by height, then by π, then divide by 3.",
        f"Follow the procedure for a cone’s volume: (1/3)πr²h, without evaluating numerically."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_cone_surface_area_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    height = random.randint(2, 40)
    r, h = rep(radius), rep(height)
    q_templates = [
        f"What is the total surface area of a cone with radius {r} and height {h}?",
        f"Calculate the surface area of a cone given a radius of {r} and a height of {h}.",
        f"Determine the surface area for a cone with radius {r} and height {h}.",
        f"Find the surface area of a cone where the radius is {r} and the height is {h}.",
        f"Compute the total surface area of a cone with dimensions radius {r} and height {h}.",
        f"If a cone has a radius of {r} and height {h}, what is its total surface area?",
        f"Determine the area covering a cone with radius {r} and height {h} (including the base).",
        f"Find out the total surface area of a cone with radius {r} and height {h} including its base.",
        f"Calculate the cone’s total surface area using its dimensions, where the formula is base area + lateral area.",
        f"Express the total surface area as the sum of the base area (π{r}²) and the lateral area for a cone with r = {r} and h = {h}.",
        f"Determine the area covering the entire cone using the standard formula for total surface area.",
        f"Compute the total area by adding the area of the circular base and the lateral surface area for a cone with radius {r} and height {h}.",
        f"Find the total surface area using the formula: area = πr(r + l) for a cone with r = {r} and l being the slant height.",
        f"Calculate the total area covering a cone (including base) conceptually using its dimensions.",
        f"Determine the cone's total surface area by applying the standard formula without revealing the number."
    ]
    e_templates = [
        f"To find the total surface area of a cone, add the lateral area to the base area using: area = πr(r + l), where l is the slant height.",
        f"Remember: the total surface area equals the lateral surface area plus the area of the base.",
        f"Compute the total area by finding the slant height and applying πr(r + l) without numerical evaluation.",
        f"Use the formula: total area = πr(r + l) for a cone without computing the final value.",
        f"Determine the area by adding the base area (πr²) to the lateral area (πrl), conceptually.",
        f"Apply the standard cone surface area formula without revealing the computed number.",
        f"Calculate the total area by first finding the slant height then using πr(r+l).",
        f"To solve, remember that total surface area = base area + lateral area.",
        f"Determine the area by computing πr(r + l) without calculating the slant height numerically.",
        f"Remember that the cone’s total area is the sum of its base and lateral areas, given by πr(r+l).",
        f"Use the relationship: total area = πr(r+l) conceptually.",
        f"Compute the total surface area by adding the area of the base to the lateral area.",
        f"Apply the formula for a cone's total surface area without performing the multiplication.",
        f"Determine the cone’s area by using πr(r+l) in formula form.",
        f"Follow the method: calculate the base area and lateral area, then sum them using the standard formula."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_cone_lateral_surface_area_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    height = random.randint(2, 40)
    r, h = rep(radius), rep(height)
    q_templates = [
        f"What is the lateral surface area of a cone with radius {r} and height {h}?",
        f"Calculate the curved (lateral) surface area of a cone given a radius of {r} and height {h} (exclude the base).",
        f"Determine the lateral area for a cone with radius {r} and height {h}.",
        f"Find the curved surface area of a cone where the radius is {r} and the height is {h}, not including the base.",
        f"Compute the lateral surface area of a cone with dimensions radius {r} and height {h} (excluding the base).",
        f"If a cone has a radius {r} and height {h}, what is its lateral surface area?",
        f"Determine the area of the cone’s side (lateral area) with radius {r} and height {h}.",
        f"Find the curved area of a cone with a radius of {r} and height {h}, excluding the base.",
        f"Calculate the lateral surface area using the formula πr * l for a cone with radius {r} and height {h} (l is the slant height).",
        f"Express the lateral surface area as π * {r} * l (with l as the slant height) for a cone with radius {r} and height {h}.",
        f"Determine the curved surface area by conceptually using the formula: lateral area = πr * l.",
        f"Compute the lateral area of a cone by multiplying π, the radius {r}, and the slant height (computed from r and h) without numerical evaluation.",
        f"Find the cone's lateral surface area by applying the formula πr(l) where l is derived from r and h.",
        f"Calculate the curved surface area (excluding the base) of a cone with radius {r} and height {h} using its standard formula.",
        f"Determine the lateral area of a cone by using π times the radius {r} times the slant height (l) without computing l."
    ]
    e_templates = [
        f"To find the lateral surface area of a cone, use the formula: lateral area = πr * l, where l is the slant height determined from r and h.",
        f"Remember that the cone's curved surface area is given by π times the radius times the slant height.",
        f"Compute the lateral area by multiplying π, the radius, and the slant height (found via the Pythagorean theorem) without numerical evaluation.",
        f"Use the formula πr(l) to determine the lateral area of the cone (excluding the base) without computing the exact value.",
        f"Determine the lateral area by first finding the slant height and then multiplying by π and the radius.",
        f"Apply the cone’s lateral surface area formula without computing the actual number.",
        f"Multiply π, the given radius, and the slant height (derived from r and h) to obtain the lateral area conceptually.",
        f"To solve, use the relationship: lateral area = πr * l, where l is computed from the radius and height.",
        f"Remember: the lateral surface area of a cone equals π times the radius times the slant height.",
        f"Determine the curved area by applying πr(l) without revealing numerical details.",
        f"Compute the cone’s lateral area by following the standard formula without evaluating it.",
        f"Use the concept of finding the slant height first, then multiply by π and the radius to get the lateral area.",
        f"Apply the method for a cone’s lateral surface area: calculate l and then use πr * l.",
        f"Find the lateral surface area using the formula πr * l without calculating l numerically.",
        f"Follow the procedure for a cone’s lateral area using πr(l) without computing the final number."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

# -------------------------
# Hemisphere
# -------------------------
def generate_hemisphere_volume_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    r = rep(radius)
    q_templates = [
        f"What is the volume of a hemisphere with radius {r}?",
        f"Calculate the volume of a hemisphere given a radius of {r}.",
        f"Determine the volume for a hemisphere with radius {r}.",
        f"Find the volume of a hemisphere where the radius is {r}.",
        f"Compute the volume of a hemisphere with a radius of {r}.",
        f"If a hemisphere has a radius {r}, what is its volume?",
        f"Determine the space occupied by a hemisphere with radius {r}.",
        f"Find the hemisphere's volume using its radius {r}.",
        f"Calculate the volume using the hemisphere’s radius {r}.",
        f"Express the volume of a hemisphere as (2/3)π * {r}³.",
        f"Determine the volume by computing (2/3)π * {r}³ for a hemisphere.",
        f"Find the volume of a hemisphere by cubing the radius {r} and multiplying by (2/3)π.",
        f"Compute the volume using the formula: volume = (2/3)πr³ for r = {r}.",
        f"Express the volume conceptually as (2/3)π times the cube of {r}.",
        f"Calculate the hemisphere’s volume using the standard formula without numerical evaluation."
    ]
    e_templates = [
        f"To find the volume of a hemisphere, use the formula: volume = (2/3)π * (radius)³.",
        f"Remember: the volume of a hemisphere is (2/3) of the volume of a full sphere.",
        f"Cube the radius and multiply by (2/3)π to get the volume, without computing the number.",
        f"Apply the formula (2/3)πr³ to determine the hemisphere’s volume without numerical evaluation.",
        f"Use the hemisphere volume formula: volume = (2/3)πr³ conceptually.",
        f"Multiply the cube of the radius by (2/3)π without revealing the numeric result.",
        f"Volume equals (2/3)π times the cube of the radius; use that process without evaluation.",
        f"To solve, use the formula: volume = (2/3)π * r³ for a hemisphere.",
        f"Compute the volume by following the hemisphere formula without calculating the exact value.",
        f"Remember the key formula: volume = (2/3)πr³ for a hemisphere.",
        f"Cube the radius, multiply by π, then by 2/3, without computing the final number.",
        f"Apply the standard method for a hemisphere’s volume without revealing the numeric answer.",
        f"Determine the volume conceptually using (2/3)πr³ without performing the multiplication.",
        f"Use the formula for a hemisphere's volume and leave it in symbolic form.",
        f"Follow the procedure for a hemisphere’s volume: cube the radius and multiply by (2/3)π, without evaluation."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_hemisphere_surface_area_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    r = rep(radius)
    q_templates = [
        f"What is the total surface area of a hemisphere with radius {r} (including its base)?",
        f"Calculate the complete surface area of a hemisphere given a radius of {r}, including the circular base.",
        f"Determine the total surface area for a hemisphere with radius {r} when the base is included.",
        f"Find the overall surface area of a hemisphere with a radius of {r}, counting its flat base.",
        f"Compute the total area covering a hemisphere with radius {r} (base included).",
        f"If a hemisphere has a radius of {r}, what is its total surface area, including the base?",
        f"Determine the full exterior area of a hemisphere with radius {r}, considering the base.",
        f"Find the complete surface area of a hemisphere (with base) where the radius is {r}.",
        f"Calculate the hemisphere’s total surface area (including base) using its radius {r}.",
        f"Express the total surface area as 3π * {r}² for a hemisphere with radius {r}.",
        f"Determine the area covering both the dome and the base of a hemisphere with radius {r}.",
        f"Compute the complete surface area of a hemisphere with radius {r} by applying the standard formula.",
        f"Find the total area (dome plus base) of a hemisphere with radius {r} without computing the number.",
        f"Express the overall surface area of a hemisphere as 3π * (radius)² with radius {r}.",
        f"Calculate the full surface area of a hemisphere by using the relationship 3πr² for radius {r}."
    ]
    e_templates = [
        f"To find the total surface area (including the base) of a hemisphere, use the formula: area = 3π * (radius)².",
        f"Remember that the total surface area of a hemisphere equals the area of its curved part plus the area of the base, which sums to 3πr².",
        f"Compute the total area by squaring the radius and multiplying by 3π, without revealing the numeric result.",
        f"Use the formula: total surface area = 3πr² for a hemisphere (base included) without calculating the number.",
        f"Determine the full surface area by applying 3π times the square of the radius conceptually.",
        f"Apply the formula 3π * (radius)² to get the complete exterior area of the hemisphere, including its base.",
        f"Remember: total surface area = curved area (2πr²) + base area (πr²) = 3πr².",
        f"To solve, square the radius and multiply by 3π conceptually.",
        f"Compute the total surface area using the relationship: area = 3πr², without numerical evaluation.",
        f"Use the method: total area = 3π * (radius)² without revealing the computed value.",
        f"Determine the complete exterior area by applying the standard hemisphere formula: 3πr².",
        f"Remember that the hemisphere's total surface area (including base) equals 3π times the radius squared.",
        f"Calculate the full surface area by squaring the radius and multiplying by 3π, leaving it in formula form.",
        f"Apply the concept that total surface area = 3πr² for a hemisphere, without numerical calculation.",
        f"Use the formula for complete surface area: 3π * (radius)², without performing the multiplication."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

def generate_hemisphere_curved_surface_area_problem():
    rep = get_representation_function()
    radius = random.randint(2, 15)
    r = rep(radius)
    q_templates = [
        f"What is the curved (dome) surface area of a hemisphere with radius {r} (excluding the base)?",
        f"Calculate the curved surface area of a hemisphere given a radius of {r}, not including the flat circular base.",
        f"Determine the lateral area for a hemisphere with radius {r} (exclude the base area).",
        f"Find the curved (dome) area of a hemisphere where the radius is {r}, ignoring the base.",
        f"Compute the curved surface area of a hemisphere with radius {r} (do not include the base).",
        f"If a hemisphere has a radius {r}, what is its curved surface area (excluding the base)?",
        f"Determine the dome area of a hemisphere with radius {r} without counting the circular base.",
        f"Find out the curved area of a hemisphere with radius {r}, excluding the base.",
        f"Calculate the lateral (curved) surface area of a hemisphere with radius {r} using the appropriate formula.",
        f"Express the curved surface area as 2π * {r}² for a hemisphere with radius {r}.",
        f"Determine the area of the curved portion of a hemisphere with radius {r} (exclude the base).",
        f"Compute the dome area using the formula for a hemisphere’s curved surface area with radius {r}.",
        f"Find the lateral area (curved) of a hemisphere with radius {r} by applying the formula without numerical evaluation.",
        f"Calculate the curved surface area of a hemisphere with radius {r} using the standard relationship.",
        f"Determine the curved area of a hemisphere (excluding its base) using the formula 2πr² for radius {r}."
    ]
    e_templates = [
        f"To find the curved surface area (excluding the base) of a hemisphere, use the formula: area = 2π * (radius)².",
        f"Remember: the curved (or lateral) surface area of a hemisphere equals 2πr².",
        f"Compute the curved area by squaring the radius and multiplying by 2π, without revealing the number.",
        f"Use the formula 2πr² to determine the dome area of a hemisphere (base excluded) without numerical evaluation.",
        f"Determine the curved area by applying 2π multiplied by the square of the radius, conceptually.",
        f"Apply the standard formula: curved area = 2π * (radius)² for a hemisphere (excluding the base).",
        f"To solve, square the radius and multiply by 2π to get the curved surface area conceptually.",
        f"Remember that the lateral (curved) area of a hemisphere (without the base) is given by 2πr².",
        f"Compute the dome area by using 2πr² without numerical computation.",
        f"Use the relationship: curved area = 2π * (radius)², leaving it in formula form.",
        f"Determine the hemisphere's curved area (excluding the base) using the formula 2πr² without revealing the computed value.",
        f"To find the curved surface area, multiply 2π by the square of the radius conceptually.",
        f"Apply the method for a hemisphere’s curved area: area = 2πr², without calculating it.",
        f"Use the formula for curved area (2πr²) without performing the multiplication.",
        f"Compute the curved surface area of a hemisphere using 2πr² conceptually."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

# -------------------------
# Combined Generator
# -------------------------
def generate_surface_volume_problem():
    """
    Randomly choose one of the shape-specific generators.
    Includes functions for volume, total surface area, and lateral/curved surface area.
    """
    generators = [
        generate_cube_volume_problem,
        generate_cube_surface_area_problem,
        generate_cube_lateral_surface_area_problem,
        generate_rect_prism_volume_problem,
        generate_rect_prism_surface_area_problem,
        generate_rect_prism_lateral_surface_area_problem,
        generate_cuboid_volume_problem,
        generate_cuboid_lateral_surface_area_problem,
        generate_cylinder_volume_problem,
        generate_cylinder_surface_area_problem,
        generate_cylinder_lateral_surface_area_problem,
        generate_sphere_volume_problem,
        generate_sphere_surface_area_problem,
        generate_cone_volume_problem,
        generate_cone_surface_area_problem,
        generate_cone_lateral_surface_area_problem,
        generate_hemisphere_volume_problem,
        generate_hemisphere_surface_area_problem,  # Total surface area including base
        generate_hemisphere_curved_surface_area_problem  # Curved/dome surface area excluding base
    ]
    return random.choice(generators)()

# -------------------------
# Dataset Merging
# -------------------------
def load_dataset(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

def save_dataset(data, file_path):
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=2)

# Load the existing dataset (for example, a dataset with 15,000 entries)
existing_data = load_dataset("expanded_math_dataset.json")
print(f"Loaded {len(existing_data)} existing entries.")

# Generate 5,000 new surface area and volume problems
new_data = [generate_surface_volume_problem() for _ in range(5000)]

# Append the new entries to the existing dataset
combined_data = existing_data + new_data
print(f"Total combined entries: {len(combined_data)}.")

# Save the final combined dataset to a new file
save_dataset(combined_data, "final_expanded_dataset.json")
print("Final expanded dataset saved as 'final_expanded_dataset.json'.")


Loaded 15000 existing entries.
Total combined entries: 20000.
Final expanded dataset saved as 'final_expanded_dataset.json'.


In [5]:
import json
import random
import math

def number_to_words(n):
    """Convert an integer n (0 <= n <= 1000) into its word representation."""
    ones = ["zero", "one", "two", "three", "four", "five", "six", "seven",
            "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
            "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"]
    tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy",
            "eighty", "ninety"]
    if n < 20:
        return ones[n]
    elif n < 100:
        if n % 10 == 0:
            return tens[n // 10]
        else:
            return tens[n // 10] + "-" + ones[n % 10]
    elif n < 1000:
        if n % 100 == 0:
            return ones[n // 100] + " hundred"
        else:
            return ones[n // 100] + " hundred and " + number_to_words(n % 100)
    elif n == 1000:
        return "one thousand"
    else:
        return str(n)

def get_representation_function():
    """Return a function that converts numbers either to digits or words consistently."""
    return (lambda n: number_to_words(n)) if random.random() < 0.5 else (lambda n: str(n))

# -------------------------
# Triangle Area Problem Generator (Diverse)
# -------------------------
def generate_triangle_area_problem_diverse():
    """
    Generates a triangle area problem using one of several methods:
    - Right Triangle: area = 1/2 * base * height.
    - Equilateral Triangle: area = (sqrt(3)/4) * side^2.
    - General Triangle (base and height): area = 1/2 * base * height.
    - Triangle with Heron's formula.
    """
    # Randomly choose a triangle type (0: Right, 1: Equilateral, 2: General, 3: Heron)
    tri_type = random.choice([0, 1, 2, 3])
    rep = get_representation_function()

    if tri_type == 0:
        # Right Triangle: generate base and height
        base = random.randint(5, 50)
        height = random.randint(5, 50)
        b_disp, h_disp = rep(base), rep(height)
        q_templates = [
            f"What is the area of a right triangle with base {b_disp} and height {h_disp}?",
            f"Calculate the area of a right triangle given a base of {b_disp} and a height of {h_disp}.",
            f"Determine the area for a right triangle with base {b_disp} and height {h_disp}.",
            f"Find the area of a right triangle where the base is {b_disp} and the height is {h_disp}.",
            f"Compute the area of a right-angled triangle with base {b_disp} and height {h_disp}.",
            f"If a right triangle has a base of {b_disp} and height of {h_disp}, what is its area?",
            f"Determine the area of a right triangle by using base {b_disp} and height {h_disp}.",
            f"Find the area by applying the formula 1/2 * base * height to a right triangle with base {b_disp} and height {h_disp}.",
            f"Calculate the right triangle's area with base {b_disp} and height {h_disp}.",
            f"Express the area of a right triangle with base {b_disp} and height {h_disp} using 1/2 * base * height.",
            f"Determine the area by multiplying {b_disp} and {h_disp} and dividing by 2.",
            f"Compute the area of a right triangle as half the product of {b_disp} and {h_disp}.",
            f"Find the area using the formula: area = 0.5 * {b_disp} * {h_disp}.",
            f"Calculate the area of a right triangle using base {b_disp} and height {h_disp} with the formula 1/2 * base * height.",
            f"Determine the area of a right triangle by taking half the product of its base {b_disp} and height {h_disp}."
        ]
        e_templates = [
            f"Multiply the base by the height and then divide by 2.",
            f"Use the formula: area = 1/2 * base * height for a right triangle.",
            f"Compute the area by calculating half of {b_disp} multiplied by {h_disp}.",
            f"Remember that for a right triangle, area = 0.5 × base × height.",
            f"To find the area, multiply {b_disp} and {h_disp} and then take half of the result.",
            f"Apply the formula: area = 1/2 * (base × height).",
            f"Calculate the area by halving the product of the base and height.",
            f"Determine the area using the expression: 0.5 * {b_disp} * {h_disp}.",
            f"Find the area of a right triangle by multiplying the base and height, then dividing by 2.",
            f"Compute the area using the standard right triangle formula.",
            f"Area = 1/2 × base × height; here, base = {b_disp} and height = {h_disp}.",
            f"Multiply {b_disp} by {h_disp} and then divide by 2 to obtain the area.",
            f"Use half the product of the base and height to determine the area.",
            f"Determine the area with the formula 0.5*(base*height).",
            f"Calculate the area by taking half of the product of the given base and height."
        ]
    elif tri_type == 1:
        # Equilateral Triangle: generate side
        side = random.randint(5, 50)
        s_disp = rep(side)
        q_templates = [
            f"What is the area of an equilateral triangle with side {s_disp}?",
            f"Calculate the area of an equilateral triangle where each side is {s_disp}.",
            f"Determine the area for an equilateral triangle with side length {s_disp}.",
            f"Find the area of an equilateral triangle having a side of {s_disp}.",
            f"Compute the area of an equilateral triangle with side {s_disp}.",
            f"If an equilateral triangle has a side of {s_disp}, what is its area?",
            f"Determine the area of an equilateral triangle using the formula specific to it, with side {s_disp}.",
            f"Find the area by using the formula: area = (√3/4)*({s_disp})² for an equilateral triangle.",
            f"Calculate the area of an equilateral triangle by squaring the side {s_disp} and multiplying by √3/4.",
            f"Express the area as (√3/4) * {s_disp}² for an equilateral triangle with side {s_disp}.",
            f"Determine the area using the equilateral triangle formula for a side of {s_disp}.",
            f"Find the area of an equilateral triangle using the formula: area = (sqrt(3)/4)*side² with side = {s_disp}.",
            f"Compute the area of an equilateral triangle by applying (√3/4) to the square of {s_disp}.",
            f"Express the area of the triangle as (sqrt(3)/4) multiplied by {s_disp}².",
            f"Calculate the area of an equilateral triangle with side {s_disp} using its standard formula."
        ]
        e_templates = [
            f"For an equilateral triangle, square the side and multiply by √3, then divide by 4.",
            f"Use the formula: area = (√3/4) * (side)² for an equilateral triangle.",
            f"Calculate the area by taking the square of {s_disp}, multiplying by √3, and dividing by 4.",
            f"Remember: the area = (sqrt(3)/4)*side² for an equilateral triangle.",
            f"To find the area, compute (sqrt(3)/4) times {s_disp} squared.",
            f"Apply the equilateral triangle formula: area = (√3/4)*({s_disp})².",
            f"Square the side {s_disp}, multiply by √3, and then divide by 4 to get the area.",
            f"Determine the area using the formula (√3/4)*side².",
            f"Compute the area by using the equilateral triangle formula without numerical evaluation.",
            f"Area = (sqrt(3)/4) * side²; here, side = {s_disp}.",
            f"Multiply {s_disp}² by √3/4 to determine the area.",
            f"Use the relationship for equilateral triangles: area = (sqrt(3)/4) * (side squared).",
            f"Apply the formula for an equilateral triangle’s area without calculating the actual number.",
            f"Remember that the area is obtained by (√3/4) multiplied by the square of the side.",
            f"Determine the area of an equilateral triangle using the formula (√3/4)*side²."
        ]
    elif tri_type == 2:
        # General Triangle (using base & height) - similar to right triangle but with different phrasing
        base = random.randint(5, 50)
        height = random.randint(5, 50)
        b_disp, h_disp = rep(base), rep(height)
        q_templates = [
            f"What is the area of a triangle with base {b_disp} and corresponding height {h_disp}?",
            f"Calculate the area of a triangle given a base of {b_disp} and its height {h_disp}.",
            f"Determine the area for a triangle with base {b_disp} and height {h_disp}.",
            f"Find the area of a triangle where the base is {b_disp} and the height is {h_disp}.",
            f"Compute the area using the formula: area = 1/2 * base * height, with base {b_disp} and height {h_disp}.",
            f"If a triangle has a base of {b_disp} and a height of {h_disp}, what is its area?",
            f"Determine the area by multiplying {b_disp} and {h_disp} then halving the result.",
            f"Find the area by using 0.5 * {b_disp} * {h_disp}.",
            f"Calculate the triangle's area with base {b_disp} and height {h_disp}.",
            f"Express the area as 1/2 multiplied by {b_disp} and {h_disp}.",
            f"Determine the area using the relation: area = 0.5 * base * height, where base = {b_disp} and height = {h_disp}.",
            f"Find the area by computing half of the product of {b_disp} and {h_disp}.",
            f"Compute the area of a triangle as 1/2 * {b_disp} * {h_disp}.",
            f"Determine the area by taking half the product of its base {b_disp} and height {h_disp}.",
            f"Calculate the triangle's area using the formula: area = 0.5 * (base * height) with values {b_disp} and {h_disp}."
        ]
        e_templates = [
            f"Multiply the base by the height and divide by 2 to get the area.",
            f"Use the formula: area = 1/2 * base * height.",
            f"Calculate the area by taking half of the product of the base and height.",
            f"To find the area, multiply {b_disp} by {h_disp} and then divide by 2.",
            f"Remember that the area of any triangle is 0.5 times base times height.",
            f"Determine the area using the expression: 0.5 * (base × height).",
            f"Compute the area by halving the product of the base and height.",
            f"Use the relation: area = 0.5 * base * height for the triangle.",
            f"Multiply {b_disp} and {h_disp} and then take half to obtain the area.",
            f"Apply the formula for triangle area without computing the numeric value.",
            f"Area = 1/2 × base × height; here, base = {b_disp} and height = {h_disp}.",
            f"Determine the area by calculating half of {b_disp} multiplied by {h_disp}.",
            f"Compute the area using the standard triangle formula.",
            f"To solve, use 0.5 * base * height where base is {b_disp} and height is {h_disp}.",
            f"Calculate the area as half the product of {b_disp} and {h_disp}."
        ]
    else:
        # Triangle using Heron's formula
        # Generate three sides a, b, c satisfying triangle inequality.
        a = random.randint(5, 50)
        b = random.randint(5, 50)
        # For c, choose a random integer between |a-b|+1 and a+b-1
        c = random.randint(abs(a - b) + 1, a + b - 1)
        a_disp, b_disp, c_disp = rep(a), rep(b), rep(c)
        s = (a + b + c) / 2  # semiperimeter; not used in text, just for conceptual reference
        q_templates = [
            f"What is the area of a triangle with sides {a_disp}, {b_disp}, and {c_disp}?",
            f"Calculate the area of a triangle given its side lengths are {a_disp}, {b_disp}, and {c_disp}.",
            f"Determine the area for a triangle with sides {a_disp}, {b_disp}, and {c_disp} using Heron's formula.",
            f"Find the area of a triangle where the sides measure {a_disp}, {b_disp}, and {c_disp}.",
            f"Compute the area of a triangle with side lengths {a_disp}, {b_disp}, and {c_disp} using the semiperimeter method.",
            f"If a triangle has sides {a_disp}, {b_disp}, and {c_disp}, what is its area (use Heron’s formula)?",
            f"Determine the area of a triangle with sides {a_disp}, {b_disp}, and {c_disp} by first computing the semiperimeter.",
            f"Find the area of a triangle given the three sides: {a_disp}, {b_disp}, and {c_disp}, without computing the final number.",
            f"Calculate the triangle’s area using Heron’s formula for sides {a_disp}, {b_disp}, and {c_disp}.",
            f"Express the area of a triangle with sides {a_disp}, {b_disp}, and {c_disp} using the semiperimeter method.",
            f"Determine the area by applying Heron’s formula to a triangle with side lengths {a_disp}, {b_disp}, and {c_disp}.",
            f"Find the area of a triangle with sides {a_disp}, {b_disp}, and {c_disp} by first calculating the semiperimeter and then the area.",
            f"Compute the area using the concept: area = sqrt(s*(s-a)*(s-b)*(s-c)) for a triangle with sides {a_disp}, {b_disp}, and {c_disp}.",
            f"Determine the triangle's area using Heron’s formula without revealing the numerical result for sides {a_disp}, {b_disp}, and {c_disp}.",
            f"Calculate the area of a triangle with side lengths {a_disp}, {b_disp}, and {c_disp} using the standard Heron’s method."
        ]
        e_templates = [
            f"To find the area using Heron's formula, first compute the semiperimeter s = (a+b+c)/2, then calculate sqrt(s(s-a)(s-b)(s-c)).",
            f"Remember: for a triangle with sides a, b, and c, area = sqrt(s*(s-a)*(s-b)*(s-c)) where s is the semiperimeter.",
            f"Use Heron's formula: first compute s = (a+b+c)/2, then take the square root of s*(s-a)*(s-b)*(s-c).",
            f"Calculate the area by applying Heron’s formula without performing the actual numeric computation.",
            f"Determine the area by first finding the semiperimeter and then using the formula: area = sqrt(s(s-a)(s-b)(s-c)).",
            f"Apply Heron’s formula conceptually: compute s and then the square root of the product s(s-a)(s-b)(s-c).",
            f"Remember that the area of a triangle can be determined by Heron’s formula, which uses all three side lengths.",
            f"To solve, first compute the semiperimeter, then apply Heron’s formula without revealing the computed value.",
            f"Use the method of Heron’s formula to determine the area without numerical evaluation.",
            f"Find the area by computing the semiperimeter and then the square root of the product s(s-a)(s-b)(s-c).",
            f"Heron’s formula gives the area as sqrt(s*(s-a)*(s-b)*(s-c)); use that method conceptually.",
            f"Compute the area by following the steps of Heron’s formula without performing the final multiplication.",
            f"Determine the area using the triangle’s side lengths and Heron’s formula, leaving the answer in symbolic form.",
            f"Apply the standard Heron’s formula: first find s, then compute the square root of s(s-a)(s-b)(s-c).",
            f"Remember: area = sqrt(s*(s-a)*(s-b)*(s-c)); use this relationship for a triangle with sides {a_disp}, {b_disp}, and {c_disp}."
        ]
    # Randomly select a template from the chosen type
    if tri_type in [0, 1, 2]:
        question = random.choice(q_templates)
        explanation = random.choice(e_templates)
    else:
        question = random.choice(q_templates)
        explanation = random.choice(e_templates)
    
    return {"question": question, "explanation": explanation}

import json
import random

# Assume generate_triangle_area_problem_diverse() is defined elsewhere
# and returns a dictionary with "question" and "explanation" keys.

def generate_triangle_area_dataset(num_samples=5000):
    dataset = []
    for _ in range(num_samples):
        dataset.append(generate_triangle_area_problem_diverse())
    return dataset

if __name__ == "__main__":
    # Load the existing dataset from "final_expanded_dataset.json"
    with open("final_expanded_dataset.json", "r") as f:
        existing_data = json.load(f)
    print(f"Existing entries: {len(existing_data)}")

    # Generate 5000 new diverse triangle area problems
    new_triangle_data = generate_triangle_area_dataset(5000)
    print(f"New triangle area entries: {len(new_triangle_data)}")

    # Append the new triangle problems to the existing dataset
    combined_data = existing_data + new_triangle_data
    print(f"Total entries after merge: {len(combined_data)}")

    # Save the combined dataset back to "final_expanded_dataset.json"
    with open("final_dataset.json", "w") as f:
        json.dump(combined_data, f, indent=2)
    print("New triangle area problems have been added to final_expanded_dataset.json.")

    


Existing entries: 20000
New triangle area entries: 5000
Total entries after merge: 25000
New triangle area problems have been added to final_expanded_dataset.json.


In [7]:
import json
import random

# -------------------------
# Helper Functions
# -------------------------
def number_to_words(n):
    """
    Convert an integer n (0 <= n <= 1000) into its word representation.
    For example, 23 -> 'twenty-three'.
    """
    ones = ["zero", "one", "two", "three", "four", "five", "six", "seven",
            "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
            "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"]
    tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy",
            "eighty", "ninety"]
    if n < 20:
        return ones[n]
    elif n < 100:
        if n % 10 == 0:
            return tens[n // 10]
        else:
            return tens[n // 10] + "-" + ones[n % 10]
    elif n < 1000:
        if n % 100 == 0:
            return ones[n // 100] + " hundred"
        else:
            return ones[n // 100] + " hundred and " + number_to_words(n % 100)
    elif n == 1000:
        return "one thousand"
    else:
        return str(n)

def get_representation_function():
    """
    Returns a function that converts numbers either to digits or words.
    All numbers in one problem will use the same style.
    """
    return (lambda n: number_to_words(n)) if random.random() < 0.5 else (lambda n: str(n))

# -------------------------
# LCM Problem Generator
# -------------------------
def generate_lcm_problem():
    rep = get_representation_function()
    a = random.randint(2, 100)
    b = random.randint(2, 100)
    a_disp = rep(a)
    b_disp = rep(b)
    q_templates = [
        f"What is the least common multiple (LCM) of {a_disp} and {b_disp}?",
        f"Find the LCM of {a_disp} and {b_disp}.",
        f"Compute the least common multiple of {a_disp} and {b_disp}.",
        f"Determine the LCM for the numbers {a_disp} and {b_disp}.",
        f"Calculate the least common multiple of {a_disp} and {b_disp}.",
        f"Find the smallest number that is a multiple of both {a_disp} and {b_disp}.",
        f"What is the LCM of the numbers {a_disp} and {b_disp}?",
        f"Determine the least common multiple for {a_disp} and {b_disp}.",
        f"Find the least common multiple of {a_disp} and {b_disp} using any method.",
        f"Calculate the LCM of {a_disp} and {b_disp}.",
        f"What is the smallest common multiple of {a_disp} and {b_disp}?",
        f"Determine the smallest number that both {a_disp} and {b_disp} divide into evenly.",
        f"Find the LCM for {a_disp} and {b_disp}.",
        f"Compute the smallest number divisible by both {a_disp} and {b_disp}.",
        f"What is the least common multiple of {a_disp} and {b_disp}?"
    ]
    e_templates = [
        f"The LCM is the smallest number that is a multiple of both given numbers.",
        f"To determine the LCM, list multiples of each number and choose the smallest common one.",
        f"One method is to use the formula: LCM(a, b) = (a * b) / GCD(a, b) without computing the final number.",
        f"Find the LCM by identifying the smallest number that both numbers divide evenly.",
        f"The least common multiple is the smallest common multiple of the two numbers.",
        f"List the multiples of each number and pick the smallest that appears in both lists.",
        f"LCM is found by comparing the multiples of both numbers and selecting the smallest match.",
        f"You can also calculate the LCM by using the relationship between the product and the GCD.",
        f"The LCM is the minimal number into which both numbers divide exactly.",
        f"Determine the LCM by analyzing the prime factors and taking the highest powers.",
        f"Find the smallest number that is a multiple of both numbers.",
        f"To solve, list the multiples and choose the first common one.",
        f"Use the method of prime factorization to determine the LCM conceptually.",
        f"Calculate the LCM by ensuring all prime factors are represented at their maximum occurrence.",
        f"Determine the least common multiple by considering the factors of both numbers."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

# -------------------------
# HCF (GCD) Problem Generator
# -------------------------
def generate_hcf_problem():
    rep = get_representation_function()
    a = random.randint(2, 100)
    b = random.randint(2, 100)
    a_disp = rep(a)
    b_disp = rep(b)
    q_templates = [
        f"What is the highest common factor (HCF) of {a_disp} and {b_disp}?",
        f"Find the HCF of {a_disp} and {b_disp}.",
        f"Compute the greatest common divisor (GCD) of {a_disp} and {b_disp}.",
        f"Determine the HCF for the numbers {a_disp} and {b_disp}.",
        f"Calculate the highest common factor of {a_disp} and {b_disp}.",
        f"Find the greatest common divisor of {a_disp} and {b_disp}.",
        f"What is the GCD of the numbers {a_disp} and {b_disp}?",
        f"Determine the highest common factor for {a_disp} and {b_disp}.",
        f"Find the HCF of {a_disp} and {b_disp} using any method.",
        f"Calculate the GCD of {a_disp} and {b_disp}.",
        f"What is the greatest common divisor of {a_disp} and {b_disp}?",
        f"Determine the largest number that divides both {a_disp} and {b_disp} evenly.",
        f"Find the HCF for {a_disp} and {b_disp}.",
        f"Compute the highest common factor of {a_disp} and {b_disp}.",
        f"What is the greatest common factor of {a_disp} and {b_disp}?"
    ]
    e_templates = [
        f"The HCF (or GCD) is the largest number that divides both numbers without a remainder.",
        f"To find the HCF, list the factors of each number and choose the largest common one.",
        f"Use the Euclidean algorithm to determine the HCF conceptually.",
        f"The greatest common divisor is the largest number that divides both numbers evenly.",
        f"Determine the HCF by comparing the factors of both numbers.",
        f"Identify the largest factor common to both numbers without computing it fully.",
        f"You can use the Euclidean algorithm or list factors to find the HCF.",
        f"The HCF is the maximal number that divides both numbers exactly.",
        f"Find the HCF by determining the greatest divisor common to both numbers.",
        f"To compute the HCF, list all factors and select the largest one common to both.",
        f"Remember: the highest common factor is the greatest number that divides both numbers.",
        f"Determine the HCF using the division method conceptually.",
        f"Compute the HCF by repeatedly applying the Euclidean algorithm.",
        f"Identify the largest number that can evenly divide both given numbers.",
        f"Use prime factorization or the Euclidean algorithm to determine the HCF conceptually."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

# -------------------------
# Combined LCM & HCF Problem Generator
# -------------------------
def generate_lcm_hcf_problem():
    rep = get_representation_function()
    a = random.randint(2, 100)
    b = random.randint(2, 100)
    a_disp = rep(a)
    b_disp = rep(b)
    q_templates = [
        f"Find both the LCM and HCF of {a_disp} and {b_disp}.",
        f"Calculate the least common multiple and highest common factor of {a_disp} and {b_disp}.",
        f"Determine the LCM and HCF for the numbers {a_disp} and {b_disp}.",
        f"What are the LCM and GCD of {a_disp} and {b_disp}?",
        f"Find the least common multiple and greatest common divisor of {a_disp} and {b_disp}.",
        f"Compute both the LCM and HCF of {a_disp} and {b_disp}.",
        f"Determine the smallest common multiple and largest common factor for {a_disp} and {b_disp}.",
        f"Find the LCM and GCD for the numbers {a_disp} and {b_disp}.",
        f"Calculate the LCM and HCF of {a_disp} and {b_disp} using any method.",
        f"Determine the least common multiple and highest common factor of {a_disp} and {b_disp}.",
        f"Find both the smallest number that is a multiple of {a_disp} and {b_disp} and the largest number that divides them.",
        f"Compute the LCM and GCD for {a_disp} and {b_disp}.",
        f"What are the least common multiple and greatest common divisor of {a_disp} and {b_disp}?",
        f"Determine both the LCM and HCF of {a_disp} and {b_disp} using the appropriate formulas.",
        f"Find the LCM and HCF for {a_disp} and {b_disp}."
    ]
    e_templates = [
        f"The LCM is the smallest number that is a multiple of both numbers, while the HCF is the largest number that divides both evenly.",
        f"To solve, first find the LCM by listing multiples and then the HCF by listing factors or using the Euclidean algorithm.",
        f"Use the relationship LCM(a, b) = (a * b) / GCD(a, b) along with the Euclidean algorithm to compute the HCF.",
        f"Determine the LCM by finding the smallest common multiple and the HCF by finding the largest common factor.",
        f"Find the LCM as the minimal common multiple and the HCF as the maximal common divisor of the numbers.",
        f"One method is to compute the HCF (using the Euclidean algorithm) and then find the LCM with the formula: LCM = (a*b)/HCF.",
        f"First determine the HCF, then calculate the LCM as the product divided by the HCF.",
        f"List the multiples and factors of the numbers conceptually to identify the LCM and HCF.",
        f"Apply the formulas for LCM and HCF without computing the final numeric values.",
        f"The LCM is the smallest number divisible by both numbers, and the HCF is the largest number that divides them.",
        f"Use prime factorization to determine the HCF and then use the relationship LCM * HCF = a * b.",
        f"Determine the least common multiple and greatest common divisor using standard mathematical methods.",
        f"Find the LCM by comparing multiples and the HCF by comparing factors or applying the Euclidean method.",
        f"Use the relationship LCM * HCF = a * b to conceptually determine both values.",
        f"Find both the LCM and HCF by applying the methods for each without revealing the computed results."
    ]
    return {"question": random.choice(q_templates), "explanation": random.choice(e_templates)}

# -------------------------
# Combined Dataset Generator for LCM and HCF Problems
# -------------------------
def generate_lcm_hcf_dataset(num_samples=5000):
    dataset = []
    for _ in range(num_samples):
        # Randomly choose one of the three problem types
        problem_generator = random.choice([generate_lcm_problem, generate_hcf_problem, generate_lcm_hcf_problem])
        dataset.append(problem_generator())
    return dataset

if __name__ == "__main__":
    # Load the existing dataset from "final_dataset.json"
    with open("final_dataset.json", "r") as f:
        existing_data = json.load(f)
    print(f"Existing entries: {len(existing_data)}")
    
    # Generate 5,000 new LCM/HCF problems
    new_data = generate_lcm_hcf_dataset(5000)
    print(f"New LCM/HCF entries: {len(new_data)}")
    
    # Append the new entries to the existing dataset
    combined_data = existing_data + new_data
    print(f"Total entries after appending: {len(combined_data)}")
    
    # Save the final combined dataset to "dataset_30000.json"
    with open("dataset_30000.json", "w") as f:
        json.dump(combined_data, f, indent=2)
    
    print("Final dataset saved as 'dataset_30000.json'.")

Existing entries: 25000
New LCM/HCF entries: 5000
Total entries after appending: 30000
Final dataset saved as 'dataset_30000.json'.
