In [12]:
import os
import os.path as osp
from pathlib import Path
import json
import pickle
import collections as C
import itertools as I

from common.constants import CORE_OPTIONS
from common.utils import remove_comments, remove_spaces, chunk_list
from common.pantograph.dataclasses import ProblemGenerationProcess

header = ("""
import Mathlib
import Aesop

""" + '\n'.join('set_option ' + t.replace('=', ' ') for t in CORE_OPTIONS)).strip()
print(header)

import Mathlib
import Aesop

set_option maxHeartbeats 0
set_option maxRecDepth 100000
set_option tactic.hygienic false
set_option pp.fullNames true
set_option pp.funBinderTypes true
set_option pp.piBinderTypes true


In [2]:
# result_pkl_path = '/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_ar_v2/Goedel-Prover-V2-8B.Numina-Lean.problem_generator/problem_generation.20250822-005458.pkl'
# result_pkl_path = '/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_ar_v2/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/problem_generation.20250828-223616.pkl'
# result_pkl_path = '/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_ar_v2/Goedel-Prover-V2-8B.Numina-Lean-reasseblmed.39509.problem_generator.nopack/problem_generation.20250903-221358.pkl'
# result_pkl_path = '/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_wg/Goedel-Prover-V2-8B.Numina-Lean.whole_statement_generatior.nopack/problem_generation.20250828-210118.pkl'
result_pkl_path = '/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_ar_v3/Goedel-Prover-V2-8B.Numina-Lean-reasseblmed.39509+FineLeanCorpus-reasseblmed.82438.problem_generator.nopack/problem_generation.20250906-021846.pkl'

In [3]:
path_chunks = result_pkl_path.strip('/').split('/')
agent = path_chunks[-3]
model_name = path_chunks[-2]
print(agent, model_name)

sft_ar_v3 Goedel-Prover-V2-8B.Numina-Lean-reasseblmed.39509+FineLeanCorpus-reasseblmed.82438.problem_generator.nopack


In [5]:
# with open(result_pkl_path, 'rb') as f:
#     data = pickle.load(f)

# results_all = C.defaultdict(list)
# idx_cnt = C.Counter()
# for ((ptype, source), idx), v in data.items():
#     idx_cnt[idx] += 1
#     results_all[(ptype, source)].append(v)

with open(result_pkl_path, 'rb') as f:
    conditions_sampled, finished = pickle.load(f)

In [9]:
# KC_cnt = C.Counter()
# for v in results_all.values():
#     for vv in v:
#         if (vv.formal_solution_draft or '') != '':
#             KC_cnt[len(remove_spaces(remove_comments(vv.formal_solution_draft)))] += 1
# print(sum((k * v) for (k, v) in KC_cnt.items()) / sum(KC_cnt.values()))

KC_cnt = C.Counter()
for vv in finished:
    if (vv.formal_solution_draft or '') != '':
        KC_cnt[len(remove_spaces(remove_comments(vv.formal_solution_draft)))] += 1
print(sum((k * v) for (k, v) in KC_cnt.items()) / sum(KC_cnt.values()))

274.141670991178


In [11]:
print('''
Agent\t{}
Model\t{}
#Proven\t{}
#Typechecked\t{}
#All\t{}
#Prompt Tokens\t{}
#Completion Tokens\t{}
Avg. KC@1\t{}
'''.format(
agent,
model_name,
sum([len([vv for vv in finished if (vv.formal_solution_draft or '') != ''])]),
sum([len([vv for vv in finished if (vv.formal_statement or '') != ''])]),
sum([len(finished)]),
sum([json.loads(vv.metainfo).get('prompt_tokens', 0) for vv in finished]),
sum([json.loads(vv.metainfo).get('completion_tokens', 0) for vv in finished]),
sum((k * v) for (k, v) in KC_cnt.items()) / sum(KC_cnt.values())
))



Agent	sft_ar_v3
Model	Goedel-Prover-V2-8B.Numina-Lean-reasseblmed.39509+FineLeanCorpus-reasseblmed.82438.problem_generator.nopack
#Proven	3854
#Typechecked	3854
#All	5000
#Prompt Tokens	62130198
#Completion Tokens	14437556
Avg. KC@1	274.141670991178



In [26]:
def format_condition(conditions: dict) -> str:
    if 'problem_type' in conditions.keys():
        assert 'source' in conditions.keys()
        # Numina-Lean
        # problem_type: str, e.g. unknown
        # source: str, e.g. unknown
        return f'Source=NuminaLean, problem_type={conditions["problem_type"]}, source={conditions["source"]}'
    elif 'domain' in conditions.keys():
        assert 'difficulty' in conditions.keys()
        # FineLeanCorups
        # domain: List[str], e.g. ['Algebra -> Intermediate Algebra -> Other', 'Applied Mathematics -> Other -> Other']
        # difficulty: int, e.g. 1
        return f'Source=FineLeanCorups, domain={conditions["domain"]}, difficulty={conditions["difficulty"]}'

In [34]:
subject_cnt = C.Counter()
difficulty_cnt = C.Counter()

In [35]:
# for (ptype, source), ds in results_all.items():
assert len(conditions_sampled) == len(finished)
output_lean_root = osp.join('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/results', model_name)
os.makedirs(output_lean_root, exist_ok=True)

results_all = sorted(list(zip(conditions_sampled, finished)), key=lambda x : x[0])
for i_chunk, chunk in chunk_list(results_all, chunksize=20):
    with open(osp.join(output_lean_root, f'{i_chunk}.lean'), 'w') as f:
        f.write(header + '\n\n')
        for (c, d) in chunk:
            c = {k : v for (k, v) in c}
            if 'problem_type' in c:
                subject_cnt[c['problem_type']] += 1
            if 'difficulty' in c:
                difficulty_cnt[c['difficulty']] += 1
            # raise
            if d.formal_solution_draft is not None:
                f.write('-- ' + format_condition(c)+'\n')
                assert d.formal_statement.endswith(':= sorry')
                f.write('\n'.join(l + ' in' for l in d.header.splitlines()) + '\n' + d.formal_statement[:-len(' sorry')] + ' by\n' + d.formal_solution_draft + '\n\n')

In [36]:
subject_cnt

Counter({'Algebra': 755,
         'Number Theory': 387,
         'unknown': 227,
         'Inequalities': 200,
         'Calculus': 32,
         'Other': 12,
         'NaN': 9,
         'Combinatorics': 5,
         'Geometry': 3,
         'Intermediate Algebra': 1,
         'Logic and Puzzles': 1})

In [24]:
c

{'problem_type': 'Algebra', 'source': 'olympiads'}

In [14]:
len(C.Counter(conditions_sampled))

1023

In [15]:
C.Counter(conditions_sampled)

Counter({(('difficulty', '1'),
          ('domain', "['Algebra -> Intermediate Algebra -> Other']")): 420,
         (('problem_type', 'Algebra'), ('source', 'olympiads')): 407,
         (('problem_type', 'unknown'), ('source', 'unknown')): 227,
         (('problem_type', 'Number Theory'), ('source', 'olympiads')): 195,
         (('difficulty', '5'),
          ('domain',
           "['Algebra -> Intermediate Algebra -> Inequalities']")): 194,
         (('difficulty', '1'),
          ('domain',
           "['Algebra -> Intermediate Algebra -> Polynomials']")): 111,
         (('problem_type', 'Inequalities'), ('source', 'olympiads')): 97,
         (('problem_type', 'Algebra'), ('source', 'secondary_math')): 92,
         (('difficulty', '2'),
          ('domain', "['Algebra -> Intermediate Algebra -> Other']")): 70,
         (('difficulty', '2'),
          ('domain',
           "['Algebra -> Intermediate Algebra -> Inequalities']")): 64,
         (('problem_type', 'Algebra'), ('source', 'm