In [44]:
from datasets import load_dataset, concatenate_datasets
from collections import Counter


In [45]:
ds_autof = load_dataset("AI-MO/AutoformalizationV2B0", revision = "V2B5", split="train")
ds_autof

Dataset({
    features: ['natural_language', 'lean_code', 'theorem_names', 'has_header', 'source', 'id'],
    num_rows: 44509
})

# Update the annotation dataset

In [46]:
# download the most up to date numina-math-lean4 dataset

ds_numina = load_dataset("AI-MO/numina-math-lean4", split="train")
ds_numina

Dataset({
    features: ['uuid', 'id', 'problem', 'informal_solution', 'lean4_solution', 'created_at', 'tags', 'source', 'natural_language', 'lean_code', 'theorem_names', 'has_header', 'is_valid_no_sorry', 'is_valid_with_sorry', 'text'],
    num_rows: 1793
})

In [47]:
# filter for examples "is_valid_with_sorry" == True

ds_numina = ds_numina.filter(lambda x: x["is_valid_with_sorry"] == True)
ds_numina

Dataset({
    features: ['uuid', 'id', 'problem', 'informal_solution', 'lean4_solution', 'created_at', 'tags', 'source', 'natural_language', 'lean_code', 'theorem_names', 'has_header', 'is_valid_no_sorry', 'is_valid_with_sorry', 'text'],
    num_rows: 1693
})

In [48]:
# filter for examples with only 1 theorem 
ds_numina = ds_numina.filter(lambda x: len(x["theorem_names"]) == 1)
ds_numina

Dataset({
    features: ['uuid', 'id', 'problem', 'informal_solution', 'lean4_solution', 'created_at', 'tags', 'source', 'natural_language', 'lean_code', 'theorem_names', 'has_header', 'is_valid_no_sorry', 'is_valid_with_sorry', 'text'],
    num_rows: 1399
})

In [49]:
ds_numina = ds_numina.select_columns(['natural_language', 'lean_code', 'theorem_names', 'has_header', 'source'])
ds_numina = ds_numina.add_column('id', [None] * len(ds_numina))

def change_source(example):
    example["source"] = "numina:" + example["source"]
    return example

ds_numina = ds_numina.map(change_source, num_proc=4)

Map (num_proc=4):   0%|          | 0/1399 [00:00<?, ? examples/s]

In [50]:
ds_numina

Dataset({
    features: ['natural_language', 'lean_code', 'theorem_names', 'has_header', 'source', 'id'],
    num_rows: 1399
})

In [51]:
ds_autof = ds_autof.filter(lambda x: x["source"] != "numina")
ds_autof_new = concatenate_datasets([ds_autof, ds_numina])
ds_autof_new

Dataset({
    features: ['natural_language', 'lean_code', 'theorem_names', 'has_header', 'source', 'id'],
    num_rows: 45138
})

# Update the PutnamBench data points

In [52]:
ds_putnam = ds_autof_new.filter(lambda x: x["source"] == "Putnam")
ds_putnam

Filter:   0%|          | 0/45138 [00:00<?, ? examples/s]

Dataset({
    features: ['natural_language', 'lean_code', 'theorem_names', 'has_header', 'source', 'id'],
    num_rows: 640
})

In [53]:
ds_putnam_theorem_names = [len(x["theorem_names"]) for x in ds_putnam]
Counter(ds_putnam_theorem_names)

Counter({1: 640})

In [29]:
!git clone https://github.com/project-numina/Putnam.git

Cloning into 'Putnam'...
remote: Enumerating objects: 648, done.[K
remote: Counting objects: 100% (648/648), done.[K
remote: Compressing objects: 100% (646/646), done.[K
remote: Total 648 (delta 0), reused 648 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (648/648), 179.69 KiB | 1.76 MiB/s, done.


In [54]:
# list all paths in Putnam/Putnam
from os import listdir
from os.path import isfile, join

path = "Putnam/Putnam"
putnam_files = [path + "/" + f for f in listdir(path) if isfile(join(path, f))]
len(putnam_files)


640

In [55]:
def map_putnam(example):

    theorem_name = example["theorem_names"][0]
    found = False
    for file in putnam_files:
        if theorem_name in file:
            found = True
            # read the file
            with open(file, 'r') as f:
                example["lean_code"] = f.read()
            break
    
    if not found:
        raise ValueError(f"Could not find file for theorem {theorem_name}")
    
    return example

ds_putnam = ds_putnam.map(map_putnam, num_proc=4)
ds_putnam

Map (num_proc=4):   0%|          | 0/640 [00:00<?, ? examples/s]

Dataset({
    features: ['natural_language', 'lean_code', 'theorem_names', 'has_header', 'source', 'id'],
    num_rows: 640
})

In [56]:
print(ds_putnam[0]['lean_code'])

import Mathlib
open BigOperators

-- 6 * Real.pi ^ 2
theorem putnam_2006_a1
: ((MeasureTheory.volume {(x, y, z) : ℝ × ℝ × ℝ | (x ^ 2 + y ^ 2 + z ^ 2 + 8) ^ 2 ≤ 36 * (x ^ 2 + y ^ 2)}).toReal = ((6 * Real.pi ^ 2) : ℝ )) :=
sorry



In [57]:
ds_autof_new = ds_autof_new.filter(lambda x: x["source"] != "Putnam")
ds_autof_new = concatenate_datasets([ds_autof_new, ds_putnam])
ds_autof_new

Filter:   0%|          | 0/45138 [00:00<?, ? examples/s]

Dataset({
    features: ['natural_language', 'lean_code', 'theorem_names', 'has_header', 'source', 'id'],
    num_rows: 45138
})

# Find examples with 2 sorries

In [58]:
# filter for examples with at least 2 'sorry' in lean_code

ds_with_2sorry = ds_autof_new.filter(lambda x: x["lean_code"].count("sorry") >= 2)
ds_with_2sorry

Filter:   0%|          | 0/45138 [00:00<?, ? examples/s]

Dataset({
    features: ['natural_language', 'lean_code', 'theorem_names', 'has_header', 'source', 'id'],
    num_rows: 152
})

In [59]:
Counter(ds_with_2sorry["source"])

Counter({'compfile': 100,
         'bootstrap_2': 13,
         'bootstrap_4': 10,
         'bootstrap_5': 9,
         'bootstrap_0': 7,
         'bootstrap_6': 6,
         'bootstrap_3': 3,
         'bootstrap_7': 3,
         'bootstrap_1': 1})

In [61]:
# remove these examples

ds_autof_new = ds_autof_new.filter(lambda x: x["lean_code"].count("sorry") < 2, num_proc=4)
ds_autof_new

Filter (num_proc=4):   0%|          | 0/45138 [00:00<?, ? examples/s]

Dataset({
    features: ['natural_language', 'lean_code', 'theorem_names', 'has_header', 'source', 'id'],
    num_rows: 44986
})

In [62]:
Counter(ds_autof_new["source"])

Counter({'bootstrap_4': 8499,
         'bootstrap_7': 8090,
         'bootstrap_5': 6429,
         'bootstrap_2': 5009,
         'bootstrap_3': 4835,
         'bootstrap_6': 4331,
         'bootstrap_1': 2709,
         'bootstrap_0': 2087,
         'Putnam': 640,
         'minif2f': 485,
         'proofnet': 371,
         'numina:MATH-train': 346,
         'numina:cnk12': 340,
         'numina:MATH-test': 264,
         'numina:number-theory-books': 177,
         'numina:aops-forum': 106,
         'compfile': 102,
         'numina:aops-wiki': 85,
         'numina:unknown': 51,
         'numina:olympiads-ref': 27,
         'numina:aops': 3})

In [65]:
ds_autof_new.push_to_hub("AI-MO/AutoformalizationV3B0", private = True)


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/45 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/AI-MO/AutoformalizationV3B0/commit/d60e7968c60964b2b065de94a2ad5daa76267e0b', commit_message='Upload dataset', commit_description='', oid='d60e7968c60964b2b065de94a2ad5daa76267e0b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/AI-MO/AutoformalizationV3B0', endpoint='https://huggingface.co', repo_type='dataset', repo_id='AI-MO/AutoformalizationV3B0'), pr_revision=None, pr_num=None)

In [66]:
ds_autof_new.push_to_hub("AI-MO/AutoformalizationV3B0", revision = "V3B0", private = True)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/45 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/495 [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/datasets/AI-MO/AutoformalizationV3B0/commit/d60e7968c60964b2b065de94a2ad5daa76267e0b', commit_message='Upload dataset', commit_description='', oid='d60e7968c60964b2b065de94a2ad5daa76267e0b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/AI-MO/AutoformalizationV3B0', endpoint='https://huggingface.co', repo_type='dataset', repo_id='AI-MO/AutoformalizationV3B0'), pr_revision=None, pr_num=None)