In [1]:

import os
import json
import pickle
import collections as C
import itertools as I
import random

from common.constants import CORE_OPTIONS
from common.pantograph.dataclasses import ProblemGenerationProcess
from common.utils import remove_spaces, remove_comments

header = ("""
import Mathlib
import Aesop

""" + '\n'.join('set_option ' + t.replace('=', ' ') for t in CORE_OPTIONS)).strip()
print(header)

import Mathlib
import Aesop

set_option maxHeartbeats 0
set_option maxRecDepth 100000
set_option tactic.hygienic false
set_option pp.fullNames true
set_option pp.funBinderTypes true
set_option pp.piBinderTypes true


In [3]:
# with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/failed.pkl', 'rb') as f:
# with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/failed-revalidated.0830.pkl', 'rb') as f:
# with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/failed-revalidated.0830.v2.pkl', 'rb') as f:
# with open('/home/ma-user/workspace/formal_problem_generation/output_tmp/output/sft_ar_v3/Goedel-Prover-V2-8B.Numina-Lean-reasseblmed.39509.problem_generator.nopack.3epoch/inf_ds.pkl', 'rb') as f:
# with open('/home/ma-user/workspace/formal_problem_generation/output_tmp/output/sft_ar_v2/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/fpg_evaluate_falsify_prove.20250904-172423.pkl', 'rb') as f:
    # data = pickle.load(f)
with open('/home/ma-user/workspace/formal_problem_generation/output_tmp/output/sft_ar_v3/Goedel-Prover-V2-8B.Numina-Lean-reasseblmed.39509.problem_generator.nopack.3epoch/fpg_evaluate_kc.20250913-004304.pkl', 'rb') as f:
    (conditions, data) = pickle.load(f)

In [4]:
len(data)

5000

In [5]:
# data = list(data.values())

In [6]:
data = [d for d in data if d is not None]
len(data)

5000

In [7]:
for d in data:
    try:
        d.metainfo = json.loads(d.metainfo)
    except:
        pass

In [8]:
data[0].metainfo

{'prompt_tokens': 34713,
 'completion_tokens': 3266,
 'time_consumption': 752.8167641162872}

In [9]:
def is_falsified(d: ProblemGenerationProcess) -> bool:
    if 'eval_old_result' in d.metainfo:
        if any(p is not None for p in d.metainfo['eval_old_result']['falsify_proofs']):
            return True
    assert 'eval_result' in d.metainfo
    # 'eval_result' may contain both falsifying and satisfying
    if any(p is not None for p in d.metainfo['eval_result'].get('satisfy_proofs', [None])):
        return False
    return any(p is not None for p in d.metainfo['eval_result']['falsify_proofs'])

def is_proven(d: ProblemGenerationProcess) -> bool:
    if len(d.trajectory) > 0:
        # Deductive Exploration
        return len(d.formal_statement) > 0 and d.metainfo.get('is_solution_validated')
    else:
        # Baselines
        if len(d.formal_solution_draft or '') > 0:
            return True
        if any(p is not None for p in d.metainfo.get('eval_result', dict()).get('proofs', [None])):
            for p in d.metainfo.get('eval_result', dict()).get('proofs', [None]):
                if p is not None:
                    d.formal_solution_draft = p
            return True
        return False

def is_valid(d: ProblemGenerationProcess) -> bool:
    return is_proven(d) and not is_falsified(d)

def count_kc(d: ProblemGenerationProcess) -> int:
    # try:
        if 'proving_results' in d.metainfo:
            return len(remove_spaces(remove_comments(d.formal_solution_draft)))
        else:
            return d.metainfo['eval_result']['KC']
            # return len(remove_spaces(remove_comments(d.formal_solution_draft)))
    # except:
    #     # return len(remove_spaces(remove_comments(d.formal_solution_draft)))
    #     return float('nan')

In [10]:
submit_list = [
    d for d in data if len(d.steps) > 0 and d.steps[-1].is_submitting
]
len(submit_list)

3314

In [11]:
stmt_valid_list = [
    d for d in submit_list if d.metainfo['is_statement_validated']
]
len(stmt_valid_list)

3209

In [12]:
proof_valid_list = [
    d for d in stmt_valid_list if d.metainfo['is_solution_validated']
]
len(proof_valid_list)

3209

In [13]:
valid_list = [d for d in data if is_valid(d)]
len(valid_list)

2726

In [14]:
valid_list[0]

ProblemGenerationProcess(informal_problem='', informal_answer='', informal_solution='', header='', formal_statement='example\n(n : ℕ)\n(h₀ : n ≠ 0)\n(h₁ : ∀ p ∈ n.primeFactors, p - 1 ∣ n - 1)\n: n = ∏ a ∈ n.primeFactors, a ^ n.factorization a\n:= sorry', formal_solution_draft='have h₂: \n    n = ∏ p ∈ n.primeFactors, p ^ n.factorization p  := by {\n  nth_rw 1 [← Nat.factorization_prod_pow_eq_self h₀]\n  rfl\n}\n\nhave h₀₀ : 1 ≤ n := by omega\n\nhave h₄ : n = ∏ a ∈ n.primeFactors, (a ^ (n.factorization a)) := by simpa using h₂\n\nexact h₄', formal_proofs='', steps=[ProblemGenerationStep(step_draft='have n : ℕ := sorry', proof=None, new_contexts=[Variable(t='ℕ', v=None, name='n', t_type=None)]), ProblemGenerationStep(step_draft='have h₀ : n ≠ 0 := sorry', proof=None, new_contexts=[Variable(t='n ≠ 0', v=None, name='h₀', t_type=None)]), ProblemGenerationStep(step_draft='have h₂: \n    n = ∏ p ∈ n.primeFactors, p ^ n.factorization p  := by {\n  nth_rw 1 [← Nat.factorization_prod_pow_eq_self

In [18]:
output_list = []
for d in data:
    if not is_valid(d):
        output_list.append({
            'exploration_steps': d.steps,
            'is_valid': False
        })
    else:
        output_list.append({
            'formal_statement': d.formal_statement,
            'formal_proof_ours': d.formal_solution_draft,
            'formal_proof_three_provers': d.metainfo['eval_result']['proofs'][-1],
            'exploration_steps': d.steps,
            'is_valid': True
        })
    
import msgspec
with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/main.json', 'wb') as f:
    f.write(msgspec.json.encode(output_list))

In [19]:
oo = json.loads(msgspec.json.encode(output_list).decode())

In [20]:
oo[0]

{'exploration_steps': [{'step_draft': 'have a : ℕ → ℝ := sorry',
   'proof': None,
   'new_contexts': [{'t': 'ℕ → ℝ',
     'v': None,
     'name': 'a',
     'raw_name': '_uniq.32',
     't_type': None}]},
  {'step_draft': 'have h₁ : a 1 = 1 := sorry',
   'proof': None,
   'new_contexts': [{'t': 'a 1 = 1',
     'v': None,
     'name': 'h₁',
     'raw_name': '_uniq.115',
     't_type': None}]},
  {'step_draft': 'have h₂ : ∀ n > 0, a n + a (n + 1) = 1 / n - 1 / (n + 1) := sorry',
   'proof': None,
   'new_contexts': [{'t': '∀ n > 0, a n + a (n + 1) = 1 / ↑n - 1 / (↑n + 1)',
     'v': None,
     'name': 'h₂',
     'raw_name': '_uniq.824',
     't_type': None}]},
  {'step_draft': 'have h3 := h₂ 1 (by norm_num)',
   'proof': [],
   'new_contexts': [{'t': 'a 1 + a (1 + 1) = 1 / ↑1 - 1 / (↑1 + 1)',
     'v': None,
     'name': 'h3',
     'raw_name': '_uniq.18805',
     't_type': None}]},
  {'step_draft': 'have h4 : a 2 = -1 / 2 := by\n  norm_num [h₁] at h3\n  linarith',
   'proof': [],
   'new

In [30]:
len(output_list), len([d for d in output_list if d['formal_proof_three_provers'] is None])

(2726, 60)

In [9]:
def is_mixed(d: ProblemGenerationProcess) -> bool:
    now_deducing = False
    for (S, s) in d.trajectory:
        if d.steps[s].is_introducing:
            if now_deducing:
                return True
            continue
        if d.steps[s].is_deducing:
            now_deducing = False
    return False

def is_reassembled(d: ProblemGenerationProcess) -> bool:
    return 'original_trajectory' in d.metainfo and [s[1] for s in d.trajectory] != [s[1] for s in d.metainfo['original_trajectory']]
    

In [10]:
type(d.metainfo['original_trajectory'][1])

KeyError: 'original_trajectory'

In [11]:
len([d for d in data if is_valid(d) and is_mixed(d)]), len([d for d in data if  is_valid(d) and not is_mixed(d)])

(0, 2726)

In [12]:
len([d for d in data if is_valid(d) and is_reassembled(d)]), len([d for d in data if  is_valid(d) and not is_reassembled(d)])

(0, 2726)

In [13]:
kc_groups = C.defaultdict(list)

for d in data:
    # try:
    #     d.metainfo = json.loads(d.metainfo)
    # except:
    #     pass
    if not (is_valid(d)):
        continue
    match count_kc(d):
        case float('nan'):
            raise
        case None:
            raise
        case x if x < 300:
            kc_groups[300].append(d)
        case x if 300 <= x < 600:
            kc_groups[600].append(d)
        case x if 600 <= x < 900:
            kc_groups[900].append(d)
        case x if 900 <= x < 1200:
            kc_groups[1200].append(d)
        case x if 1200 < x:
            kc_groups[1201].append(d)

In [14]:
for k, v in kc_groups.items():
    print(k, len(v))

1201 287
600 747
300 1126
900 374
1200 192


In [76]:
result = random.choice(kc_groups[300])

meta = result.metainfo
if 'original_trajectory' in meta.keys():
    meta.pop('original_trajectory')
    meta['is_reassembled'] = True
# print('-- ' + json.dumps(meta) + '\n')

steps = result.steps
submission_name = steps[-1].step[len('submit_answer '):]
submission_fvar = [v for v in result.trajectory[-1][0] if v.name == submission_name]
assert len(submission_fvar) == 1, f'submission_name={submission_name}, new_context={[v.name for v in steps[-1].new_contexts]}'
submission_fvar = submission_fvar[0]

# Construct statement and proof
problem_hypotheses = []
for s in steps:
    if s.is_introducing:
        step_code = s.step_code
        assert step_code.startswith('have ') and step_code.endswith(' := sorry')
        problem_hypotheses.append('(' + step_code[len('have '):-len(' := sorry')] + ')')

formal_statement = 'example\n' + '\n'.join(problem_hypotheses) + '\n: ' + submission_fvar.t + '\n:= by\n'
print('\n'.join(l + ' in' for l in result.header.splitlines()) + '\n' + formal_statement + result.formal_solution_draft + '\n\n')
print('\n---\nexample: True := by trivial\n---\n')


example
(a : ℝ)
(d : ℝ)
(h₀ : a + 6 * d = 30)
(h₁ : a + 10 * d = 60)
: a + 20 * d = 135
:= by
have hd : d = 15 / 2 := by linarith

have h₂: 
    a + 20 * d = 135  := by {
  linarith [h₁, hd]
}

exact h₂



---
example: True := by trivial
---



In [None]:
# result = random.choice(kc_groups[900])

# meta = result.metainfo
# if 'original_trajectory' in meta.keys():
#     meta.pop('original_trajectory')
#     meta['is_reassembled'] = True
# # print('-- ' + json.dumps(meta) + '\n')

# steps = result.steps
# submission_name = steps[-1].step[len('submit_answer '):]
# submission_fvar = [v for v in result.trajectory[-1][0] if v.name == submission_name]
# assert len(submission_fvar) == 1, f'submission_name={submission_name}, new_context={[v.name for v in steps[-1].new_contexts]}'
# submission_fvar = submission_fvar[0]

# # Construct statement and proof
# problem_hypotheses = []
# for s in steps:
#     if s.is_introducing:
#         step_code = s.step_code
#         assert step_code.startswith('have ') and step_code.endswith(' := sorry')
#         problem_hypotheses.append('(' + step_code[len('have '):-len(' := sorry')] + ')')

# formal_statement = 'example\n' + '\n'.join(problem_hypotheses) + '\n: ' + submission_fvar.t + '\n:= by\n'
# print('\n'.join(l + ' in' for l in result.header.splitlines()) + '\n' + formal_statement + result.formal_solution_draft + '\n\n')
# print('\n---\nexample: True := by trivial\n---\n')


example
(a :  ℝ)
(f :  ℝ → ℝ)
(ha :  0 < a ∧ a < 1)
(hf :  f = λ x => a * x)
: f (f (f (1 - a))) = a ^ 3 * (1 - a)
:= by
have f1pos : ∀ x : ℝ, 0 < x → 0 < f x := by
  intro x xpos
  simp [hf]
  apply mul_pos
  exact ha.left
  exact xpos

have f1eqx : ∀ x, f x = a * x := by
  intro x
  rw [hf]

have f1neg : ∀ x : ℝ, x < 0 → f x < 0 := by
  intro x
  intro hx
  rw [f1eqx x]
  apply mul_neg_of_pos_of_neg
  linarith
  exact hx

have hf1 : f 1 = a := by simp [hf]

have ha1 : 0 < a := ha.1

have ha2 : a < 1 := ha.2

clear f1pos f1neg

clear ha

open scoped BigOperators in
open Topology Filter Real Complex TopologicalSpace Finset in
have ha3 : 0 < 1 - a := by linarith

have ha4 : 0 < a * (1 - a) := by positivity

open Real in
have eq1 : f 0 = (0 : ℝ) := by
  rw [f1eqx]
  ring_nf

have eq2 : f (1 - a) = a * (1 - a) := by
  rw [f1eqx]
  all_goals
    ring

have eq3 : f (f (1 - a)) = a ^ 2 * (1 - a) := by
  rw [eq2]
  rw [f1eqx]
  all_goals ring_nf

have eq4 : f (f (f (1 - a))) = a^3 * (1 - a) 

In [None]:
result = random.choice(kc_groups[900])

meta = result.metainfo
if 'original_trajectory' in meta.keys():
    meta.pop('original_trajectory')
    meta['is_reassembled'] = True
# print('-- ' + json.dumps(meta) + '\n')

steps = result.steps
submission_name = steps[-1].step[len('submit_answer '):]
submission_fvar = [v for v in result.trajectory[-1][0] if v.name == submission_name]
assert len(submission_fvar) == 1, f'submission_name={submission_name}, new_context={[v.name for v in steps[-1].new_contexts]}'
submission_fvar = submission_fvar[0]

# Construct statement and proof
problem_hypotheses = []
for s in steps:
    if s.is_introducing:
        step_code = s.step_code
        assert step_code.startswith('have ') and step_code.endswith(' := sorry')
        problem_hypotheses.append('(' + step_code[len('have '):-len(' := sorry')] + ')')

formal_statement = 'example\n' + '\n'.join(problem_hypotheses) + '\n: ' + submission_fvar.t + '\n:= by\n'
print('\n'.join(l + ' in' for l in result.header.splitlines()) + '\n' + formal_statement + result.formal_solution_draft + '\n\n')
print('\n---\nexample: True := by trivial\n---\n')

-- {"is_statement_validated": true, "is_solution_validated": true, "prompt_tokens": 7073, "completion_tokens": 817, "time_consumption": 289.12916350364685, "eval_result": {"satisfy_provers": ["/home/ma-user/local_cache/AI-MO/Kimina-Prover-Distill-8B", "/home/ma-user/local_cache/deepseek-ai/DeepSeek-Prover-V2-7B"], "satisfy_proofs": [null, "\n  have h_main : \u2203 (a : \u211d) (ha : 0 < a) (b : \u211d) (h1 : 1 / (a + b) = 12) (h2 : 1 / (a - b) = 10), True := by\n    /-\n    We need to find real numbers \\( a > 0 \\), \\( b \\), and the conditions \\( \\frac{1}{a + b} = 12 \\) and \\( \\frac{1}{a - b} = 10 \\).\n    \n    First, solve for \\( a + b \\) and \\( a - b \\) from the given equations:\n    \n    1. \\( \\frac{1}{a + b} = 12 \\) implies \\( a + b = \\frac{1}{12} \\).\n    \n    2. \\( \\frac{1}{a - b} = 10 \\) implies \\( a - b = \\frac{1}{10} \\).\n    \n    Next, solve the system of equations:\n    \n    \\[ a + b = \\frac{1}{12} \\]\n    \\[ a - b = \\frac{1}{10} \\]\n    \

In [None]:
kc_groups[1201]

In [None]:
# # Goedel-Prover-V2-8B
# s1 = {43,
# 5,
# 59,
# 16,
# 51,
# 21,
# 52,
# 25,
# 50,
# 11,
# 47,
# 32,
# 54,
# 31,
# 44,
# 22,
# 58,
# 34,}

# # DeepSeek-Prover-V2-7B
# s2 = {43,
# 5,
# 59,
# 0,
# 37,
# 57,
# 21,
# 50,
# 7,
# 19,
# 25,
# 11,
# 24,
# 47,
# 17,
# 4,
# 32,
# 22,
# 31,
# 58,
# 15,
# 41,
# 27,
# 30,
# 48,
# 46,
# 23,}

# # Kimina-Prover-Distill-8B
# s3 = {5,
# 43,
# 53,
# 57,
# 51,
# 28,
# 18,
# 37,
# 3,
# 21,
# 24,
# 14,
# 52,
# 25,
# 50,
# 7,
# 46,
# 11,
# 2,
# 12,
# 45,
# 32,
# 47,
# 17,
# 1,}

In [None]:
with open(f'/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/inf_64.lean', 'w') as f:
    f.write(header + '\n\n')
    for (idx, result) in enumerate(data):
        # with open(f'/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator/{ptype}.{source}.lean', 'w') as f:
        if idx not in s1.intersection(s2).intersection(s3):
            continue
        meta = result.metainfo
        if 'original_trajectory' in meta.keys():
            meta.pop('original_trajectory')
            meta['is_reassembled'] = True
        f.write('-- ' + json.dumps(meta) + '\n')
        
        steps = result.steps
        submission_name = steps[-1].step[len('submit_answer '):]
        submission_fvar = [v for v in result.trajectory[-1][0] if v.name == submission_name]
        assert len(submission_fvar) == 1, f'submission_name={submission_name}, new_context={[v.name for v in steps[-1].new_contexts]}'
        submission_fvar = submission_fvar[0]
        
        # Construct statement and proof
        problem_hypotheses = []
        for s in steps:
            if s.is_introducing:
                step_code = s.step_code
                assert step_code.startswith('have ') and step_code.endswith(' := sorry')
                problem_hypotheses.append('(' + step_code[len('have '):-len(' := sorry')] + ')')
        
        formal_statement = 'example\n' + '\n'.join(problem_hypotheses) + '\n: ' + submission_fvar.t + '\n:= by\n'
        f.write('\n'.join(l + ' in' for l in result.header.splitlines()) + '\n' + formal_statement + result.formal_solution_draft + '\n\n')
        f.write('\n---\nexample: True := by trivial\n---\n')

In [None]:
stmt_failed_to_validate = []
for vv in data:
    if (vv.formal_solution_draft or '') != '' and json.loads(vv.metainfo).get('is_statement_validated', False) == False:
        stmt_failed_to_validate.append(vv)
print(len(stmt_failed_to_validate))

proof_failed_to_validate = []
for vv in data:
    meta = json.loads(vv.metainfo)
    if (vv.formal_solution_draft or '') != '' and meta.get('is_statement_validated', False) == True and meta.get('is_solution_validated', False) == False:
        proof_failed_to_validate.append(vv)
print(len(proof_failed_to_validate))

In [None]:
# Statement fail: 83 -> 69
# Proof fail: 81 -> 29 -> 25

In [None]:
for result in data:
    result : ProblemGenerationProcess
    steps = result.steps
    submission_name = steps[-1].step_code[len('submit_answer '):]
    submission_fvar_pp = [v for v in result.trajectory[-1][0] if v.name == submission_name]
    assert len(submission_fvar_pp) == 1, f'submission_name={submission_name}, new_context={[v.name for v in steps[-1].new_contexts]}'
    submission_fvar_pp = submission_fvar_pp[0]
    meta = json.loads(result.metainfo)
    if 'Tendsto (λ (n : ℕ) => f (n * x) * (Real.log (n * x) / Real.log x) * ' in result.formal_statement:
        raise

In [None]:
open_set = set()
open_scoped_set = set()
option_set = set()
for s in result.steps:
    if s.is_introducing:
        for l in s.header.splitlines():
            l = l.strip()
            if l.endswith(' in'):
                l = l[:-len('in')].strip()
            if l.startswith('open scoped'):
                for elem in l[len('open scoped'):].strip().split():
                    open_scoped_set.add(elem)
            elif l.startswith('open'):
                for elem in l[len('open'):].strip().split():
                    open_set.add(elem)
            elif l.startswith('set_option'):
                option_set.add(l[len('set_option'):].strip())

In [None]:
open_set

In [None]:
open_scoped_set

In [None]:
header = \
                    (('open scoped ' + ' '.join(open_scoped_set) + '\n') if len(open_scoped_set) > 0 else '') + \
                    (('open ' + ' '.join(open_set) + '\n') if len(open_set) > 0 else '') + \
                    '\n'.join(['set_option ' + t for t in option_set])

In [None]:
print(header)

In [None]:
result.header

In [None]:
print(result.formal_statement)

In [None]:
# with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_ar_v2/Goedel-Prover-V2-8B.Numina-Lean.problem_generator/problem_generation.20250822-005458.pkl', 'rb') as f:
# with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_ar_v2/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/problem_generation.20250828-223616.pkl', 'rb') as f:
with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_ar_v2/Goedel-Prover-V2-8B.Numina-Lean-reasseblmed.39509.problem_generator.nopack/problem_generation.20250903-221358.pkl', 'rb') as f:
# with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_wg/Goedel-Prover-V2-8B.Numina-Lean.whole_statement_generatior.nopack/problem_generation.20250828-210118.pkl', 'rb') as f:
    data = pickle.load(f)



In [None]:
results_all = C.defaultdict(list)

In [None]:
idx_cnt = C.Counter()

In [None]:
for ((ptype, source), idx), v in data.items():
    idx_cnt[idx] += 1
    results_all[(ptype, source)].append(v)

In [None]:
sum([len([vv for vv in v if (vv.formal_solution_draft or '') != '']) for v in results_all.values()]), \
sum([len([vv for vv in v if (vv.formal_statement or '') != '']) for v in results_all.values()]), \
sum([len(v) for v in results_all.values()]), \
sum([json.loads(vv.metainfo)['prompt_tokens'] for v in results_all.values() for vv in v]), \
sum([json.loads(vv.metainfo)['completion_tokens'] for v in results_all.values() for vv in v])

(2773, 2773, 3800, 87653424, 13928957)

In [None]:
# # stmt_failed_to_validate = []
# # for v in results_all.values():
# #     for vv in v:
# #         if (vv.formal_solution_draft or '') != '' and json.loads(vv.metainfo).get('is_statement_validated', False) == False:
# #             stmt_failed_to_validate.append(vv)
# # print(len(stmt_failed_to_validate))

# for (i, result) in enumerate(stmt_failed_to_validate):
#     # with open(f'/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator/{ptype}.{source}.lean', 'w') as f:
#     os.makedirs('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/stmt_fail', exist_ok=True)
#     with open(f'/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/stmt_fail/{i}.lean', 'w') as f:
#         f.write(header + '\n\n')
#         if result.formal_statement == '':
#             raise
#         meta = json.loads(result.metainfo)
#         if 'original_trajectory' in meta.keys():
#             meta.pop('original_trajectory')
#             meta['is_reassembled'] = True
#         f.write('-- ' + json.dumps(meta) + '\n')
        
#         # Construct statement and proof
#         f.write('-- # Problem Generation Process\nexample : False := by\n')
#         for s in result.steps:
#             if s.is_submitting:
#                 f.write('\n'.join('-- ' + l for l in s.step.splitlines() if l.strip() != '') + '\n\n')
#             else:
#                 f.write(s.step + '\n\n')
#         f.write('sorry\n\n-- # Statement & Proof\n')
        
#         assert result.formal_statement.endswith(':= sorry')
#         f.write('\n'.join(l + ' in' for l in result.header.splitlines()) + '\n' + result.formal_statement[:-len(':= sorry')] + ':= by\n' + '\nhave h_submission := by {\n' + '\n'.join('  ' + l for l in result.formal_solution_draft.splitlines() if l.strip() != '') + '\n}\nexact h_submission\n' + '\n\n')

In [None]:
# # proof_failed_to_validate = []
# # for v in results_all.values():
# #     for vv in v:
# #         meta = json.loads(vv.metainfo)
# #         if (vv.formal_solution_draft or '') != '' and meta.get('is_statement_validated', False) == True and meta.get('is_solution_validated', False) == False:
# #             proof_failed_to_validate.append(vv)
# # print(len(proof_failed_to_validate))

# for (i, result) in enumerate(proof_failed_to_validate):
#     # with open(f'/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator/{ptype}.{source}.lean', 'w') as f:
#     os.makedirs('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/proof_fail', exist_ok=True)
#     with open(f'/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/proof_fail/{i}.lean', 'w') as f:
#         f.write(header + '\n\n')
#         if result.formal_statement == '':
#             raise
#         meta = json.loads(result.metainfo)
#         if 'original_trajectory' in meta.keys():
#             meta.pop('original_trajectory')
#             meta['is_reassembled'] = True
#         f.write('-- ' + json.dumps(meta) + '\n')
        
#         # Construct statement and proof
#         f.write('-- # Problem Generation Process\nexample : False := by\n')
#         for s in result.steps:
#             if s.is_submitting:
#                 f.write('\n'.join('-- ' + l for l in s.step.splitlines() if l.strip() != '') + '\n\n')
#             else:
#                 f.write(s.step + '\n\n')
#         f.write('sorry\n\n-- # Statement & Proof\n')
        
#         assert result.formal_statement.endswith(':= sorry')
#         f.write('\n'.join(l + ' in' for l in result.header.splitlines()) + '\n' + result.formal_statement[:-len(':= sorry')] + ':= by\n' + '\nhave h_submission := by {\n' + '\n'.join('  ' + l for l in result.formal_solution_draft.splitlines() if l.strip() != '') + '\n}\nexact h_submission\n' + '\n\n')

In [None]:
# result.formal_statement

In [None]:
# failed = stmt_failed_to_validate + proof_failed_to_validate
# with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/failed.pkl', 'wb') as f:
#     pickle.dump(failed, f, pickle.HIGHEST_PROTOCOL)

In [None]:
print(vv.header)
print(vv.formal_statement)
print(vv.formal_solution_draft)

In [None]:
for (ptype, source), ds in results_all.items():
    with open(f'/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean-reasseblmed.39509.problem_generator.nopack/{ptype}.{source}.lean', 'w') as f:
        f.write(header + '\n\n')
        for result in ds:
            if result.formal_solution_draft is not None:
                assert result.formal_statement.endswith(':= sorry')
                f.write('\n'.join(l + ' in' for l in result.header.splitlines()) + '\n' + result.formal_statement[:-len(' sorry')] + ' by\n' + result.formal_solution_draft + '\n\n')

In [None]:
for (ptype, source), ds in results_all.items():
    # with open(f'/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator/{ptype}.{source}.lean', 'w') as f:
    with open(f'/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F/Goedel-Prover-V2-8B.Numina-Lean.problem_generator.nopack/{ptype}.{source}.lean', 'w') as f:
        f.write(header + '\n\n')
        for result in ds:
            if result.formal_statement == '':
                continue
            meta = json.loads(result.metainfo)
            if 'original_trajectory' in meta.keys():
                meta.pop('original_trajectory')
                meta['is_reassembled'] = True
            f.write('-- ' + json.dumps(meta) + '\n')
            
            steps = result.steps
            submission_name = steps[-1].step[len('submit_answer '):]
            submission_fvar = [v for v in result.trajectory[-1][0] if v.name == submission_name]
            assert len(submission_fvar) == 1, f'submission_name={submission_name}, new_context={[v.name for v in steps[-1].new_contexts]}'
            submission_fvar = submission_fvar[0]
            
            # Construct statement and proof
            problem_hypotheses = []
            for s in steps:
                if s.is_introducing:
                    step_code = s.step_code
                    assert step_code.startswith('have ') and step_code.endswith(' := sorry')
                    problem_hypotheses.append('(' + step_code[len('have '):-len(' := sorry')] + ')')
            
            formal_statement = 'example\n' + '\n'.join(problem_hypotheses) + '\n: ' + submission_fvar.t + '\n:= by\n'
            f.write('\n'.join(l + ' in' for l in result.header.splitlines()) + '\n' + formal_statement + result.formal_solution_draft + '\n\n')