In [204]:
import json
from copy import deepcopy
import random

from typing import List, Dict

from tqdm import tqdm
import ipywidgets as widgets
from IPython.display import Markdown, display, clear_output

In [205]:
def str_of_row(row):
    return "---INPUT:\n\n" + row["input"] + "\n\n---OUTPUT:\n\n" + row["output"]

In [206]:
def display_item(data, index=0):
    clear_output(wait=True)
    item = data[index]
    text_display = Markdown(str_of_row(item))

    # Creating the buttons
    next_button = widgets.Button(description="Next")
    prev_button = widgets.Button(description="Previous")

    # Navigate through the dataset
    def navigate(step):
        nonlocal index
        index = min(max(0, index + step), len(data) - 1)
        display_item(data, index)

    next_button.on_click(lambda b: navigate(1))
    prev_button.on_click(lambda b: navigate(-1))

    # Displaying the components
    button_box = widgets.HBox([prev_button, next_button])
    display(button_box)
    display(text_display)
    display(Markdown(f"ID: {item['meta']['id']}"))
    display(Markdown(f"{index}/{len(data)}"))
    display(Markdown(f"Category: {item['meta']['category']}"))
    # display(f"Raw problem: {item['problem']}")
    # display(f"Raw hints: {item['hints']}")
    if "raw" in item:
        display(item["raw"])

# Prefiltered data

In [None]:
with open("khan-prefiltered/prefiltered.jsonl") as f:
    data = [json.loads(x) for x in f]
nav = display_item(data, index=0)

HBox(children=(Button(description='Previous', style=ButtonStyle()), Button(description='Next', style=ButtonSty…

---INPUT:

Factor the following expression: $180x^2 - 125$

---OUTPUT:

We can start by factoring a

${5}$

out of each term:

$ {5}({36x^2} - {25})$

The second term is of the form

${a^2} - {b^2}$

, which is a difference of two squares so we can factor it as

${5}({a} + {b}) ({a} - {b})$

What are the values of

$a$

and

$b$

$ a = \sqrt{36x^2} = 6x$

$ b = \sqrt{25} = 5$

Use the values we found for

$a$

and

$b$

to complete the factored expression,

${5}({a} + {b}) ({a} - {b})$

So we can factor the expression as: ${5}({6x} + {5}) ({6x} - {5})$

ID: 1618055280

1/629

Category: 373

# GPT-3.5 cleaned data

In [234]:
import importlib
data_gui = importlib.import_module('data_gui')
display_item = getattr(data_gui, 'display_item')

In [244]:
!cat khan-judgements/judgements.yaml

1617960946: true
1618055280: true
1618066684: true
1618096508: true
1618096761: false
1618107624: true
1618156420: true
1618159538: true
1618189993: true
1618197312: false
1618202758: true
1618221119: true
16082493866: false
16082504271: true
16082853522: true
16082973146: false
16083303162: true
16083342301: true
16083346084: true
16083552714: true
16083635182: true
16083806253: true
16085918681: false
16086557877: true
16086936882: false


In [242]:
with open("khan-clean/clean.jsonl") as f:
    data = [json.loads(x) for x in f]

for i in range(len(data)):
    data[i]["text"] = "---Question:\n\n" + data[i]["input"] + "\n\n---Solution:\n\n" + \
        data[i]["output"]
    data[i]["id"] = data[i]["meta"]["id"]


In [None]:

display_item(data, "khan-judgements/judgements.yaml", index=25)

HBox(children=(Button(description='Accept', style=ButtonStyle()), Button(description='Reject', style=ButtonSty…

---Question:

Write an expression to represent: Eight more than the product of two and a number $x$.



---Solution:

The expression for eight more than the product of two and a number $x$ can be written as $8 + 2x$.

Index: 361/629

Post ID: 16085733204

Accepted: 289, Judged: 361