In [None]:
import json
from copy import deepcopy
import random

from typing import List, Dict

from tqdm import tqdm
import ipywidgets as widgets
from IPython.display import Markdown, display, clear_output

In [None]:
def str_of_row(row):
    return "---INPUT:\n\n" + row["input"] + "\n\n---OUTPUT:\n\n" + row["output"]

In [None]:
def display_item(data, index=0):
    clear_output(wait=True)
    item = data[index]
    text_display = Markdown(str_of_row(item))

    # Creating the buttons
    next_button = widgets.Button(description="Next")
    prev_button = widgets.Button(description="Previous")

    # Navigate through the dataset
    def navigate(step):
        nonlocal index
        index = min(max(0, index + step), len(data) - 1)
        display_item(data, index)

    next_button.on_click(lambda b: navigate(1))
    prev_button.on_click(lambda b: navigate(-1))

    # Displaying the components
    button_box = widgets.HBox([prev_button, next_button])
    display(button_box)
    display(text_display)
    display(Markdown(f"ID: {item['meta']['id']}"))
    display(Markdown(f"{index}/{len(data)}"))
    display(Markdown(f"Category: {item['meta']['category']}"))
    # display(f"Raw problem: {item['problem']}")
    # display(f"Raw hints: {item['hints']}")
    if "raw" in item:
        display(item["raw"])

# Prefiltered data

In [None]:
with open("khan-prefiltered/prefiltered.jsonl") as f:
    data = [json.loads(x) for x in f]
nav = display_item(data, index=0)

# GPT-3.5 cleaned data

In [None]:
# too late to make this repo a package
import importlib
data_gui = importlib.import_module('data_gui')
display_item = getattr(data_gui, 'display_item')

In [None]:
with open("khan-clean/clean.jsonl") as f:
    data = [json.loads(x) for x in f]

for i in range(len(data)):
    data[i]["text"] = "---Question:\n\n" + data[i]["input"] + "\n\n---Solution:\n\n" + \
        data[i]["output"]
    data[i]["id"] = data[i]["meta"]["id"]


In [None]:

display_item(data, "khan-judgements/judgements.yaml", index=0)