In [179]:
from datasets import load_dataset
from IPython.display import display, Markdown
import re
import random
random.seed(420)

import ipywidgets as widgets
from IPython.display import Markdown, display, clear_output

In [180]:
def remove_boxed(text):
    pattern = r'\\boxed{(.*)}'
    return re.sub(pattern, r'\1', text)

def fix_align(text):
    pattern = r'\\begin{align\*}(.*?)\\end{align\*}'
    text = re.sub(
        pattern, 
        '\n$$\n\\\\begin{align*}\t\\1\\\\end{align*}\n$$\n', 
        text, flags=re.DOTALL
    )
    
    return text

def convert_latex(text):
    # Convert display math mode
    text = re.sub(r'\\\[\s*(.*?)\s*\\\]', '\n$$\n\t\\1\n$$\n', text, flags=re.DOTALL)

    # Convert inline math mode
    text = re.sub(r'\\\((.*?)\\\)', r'$\1$', text, flags=re.DOTALL)

    text = re.sub(r'\$\$\$\$', r'$$\n$$', text)

    return text

fix = lambda x: convert_latex(fix_align(remove_boxed(x)))

def blacklist(x):
    if "[asy]" in x["solution"]:
        return True
    elif "eqnarray" in x["solution"]:
        return True
    elif "tabular" in x["solution"]:
        return True
    else:
        return False

In [181]:
math = load_dataset("hendrycks/competition_math")

Found cached dataset competition_math (/home/zhangir/.cache/huggingface/datasets/hendrycks___competition_math/default/1.0.0/52c6a268ae72ef772498d27551a3f682dac50cd8befddd0326d758cb6908b5f0)


  0%|          | 0/2 [00:00<?, ?it/s]

In [182]:
print(math["train"][0].keys())

dict_keys(['problem', 'level', 'type', 'solution'])


In [183]:
data = [{
    "input": x["problem"], 
    "output": fix(x["solution"]),
    "meta": {"level": x["level"], "type": x["type"], "id": 10**8+i}
    }
    for i, x in enumerate(math["train"]) if not blacklist(x)
]

In [184]:
from collections import Counter

levels = [x["meta"]["level"] for x in data]

levels_count = Counter(levels)

print(levels_count)

Counter({'Level 5': 1888, 'Level 4': 1517, 'Level 3': 1463, 'Level 2': 1276, 'Level 1': 544, 'Level ?': 1})


In [185]:
def str_of_row(x):
    return f"---INPUT: {x['input']}\n\n---OUTPUT: {x['output']}"

In [186]:
def display_item(data, index=0):
    clear_output(wait=True)
    item = data[index]
    text_display = Markdown(str_of_row(item))

    # Creating the buttons
    next_button = widgets.Button(description="Next")
    prev_button = widgets.Button(description="Previous")

    # Navigate through the dataset
    def navigate(step):
        nonlocal index
        index = min(max(0, index + step), len(data) - 1)
        display_item(data, index)

    next_button.on_click(lambda b: navigate(1))
    prev_button.on_click(lambda b: navigate(-1))

    # Displaying the components
    button_box = widgets.HBox([prev_button, next_button])
    display(button_box)
    display(text_display)
    display(Markdown(f"ID: {item['meta']['id']}"))
    display(Markdown(f"{index}/{len(data)}"))
    display(Markdown(f"Category: {item['meta']['type']}"))
    if "raw" in item:
        display(item["raw"])

In [187]:
display_item(data, index=0)

HBox(children=(Button(description='Previous', style=ButtonStyle()), Button(description='Next', style=ButtonSty…

---INPUT: Let \[f(x) = \left\{
\begin{array}{cl} ax+3, &\text{ if }x>2, \\
x-5 &\text{ if } -2 \le x \le 2, \\
2x-b &\text{ if } x <-2.
\end{array}
\right.\]Find $a+b$ if the piecewise function is continuous (which means that its graph can be drawn without lifting your pencil from the paper).

---OUTPUT: For the piecewise function to be continuous, the cases must "meet" at $2$ and $-2$. For example, $ax+3$ and $x-5$ must be equal when $x=2$. This implies $a(2)+3=2-5$, which we solve to get $2a=-6 \Rightarrow a=-3$. Similarly, $x-5$ and $2x-b$ must be equal when $x=-2$. Substituting, we get $-2-5=2(-2)-b$, which implies $b=3$. So $a+b=-3+3=0$.

ID: 100000000

0/6689

Category: Algebra

In [188]:
level_counts = {'Level 5': 300, 'Level 4': 165, 'Level 3': 150}

data_by_level = dict()
for k,v in level_counts.items():
    level_data = [x for x in data if x['meta']['level']==k]
    data_by_level[k] = random.sample(level_data, v)

In [None]:
display_item(data_by_level['Level 4'], index=159)

HBox(children=(Button(description='Previous', style=ButtonStyle()), Button(description='Next', style=ButtonSty…

---INPUT: Compute the sum of the geometric series $-1 + 2 - 4 + 8 - \cdots + 512$.

---OUTPUT: The first term is $-1$, the common ratio is $-2$, and there are 10 terms, so the sum equals 
$$
	\frac{(-1)((-2)^{10}-1)}{-2-1} = \frac{-1023}{-3} = 341.
$$


ID: 100001454

164/165

Category: Algebra

In [192]:
print(data_by_level['Level 5'][239]['output'])

Subtract 1 from both sides to obtain $8x \equiv 4 \pmod{12}$. Add 12 to the right-hand side to get $8x \equiv 16 \pmod{12}$. Now divide both sides by 8, remembering to divide 12 by the greatest common factor of 12 and 8, thus obtaining $x \equiv 2\pmod{3}$. This gives $a+m = 2 + 3 = 5}$. Here we have used the fact that $ad \equiv bd\pmod{m}$ if and only if $a\equiv b \pmod{m/\text{gcd}(m,d)$, for integers $m\geq 2$ and $a$, $b$, and $d$.
