In [1]:
import os
import json
import pickle
import collections as C
import itertools as I

from common.constants import CORE_OPTIONS, BANNED_TOKENS
from common.utils import decompose_statement, parse_idents
from common.pantograph.server import PersistentServer, Server, TacticDraft
from common.pantograph.parsing_server import PersistentParsingServer

In [2]:
PARSING_IMPORTS = [
    "Lean.Parser",
    "Lean.PrettyPrinter.Delaborator.Attributes",
    "Lean.PrettyPrinter.Delaborator.Basic",
    "Lean.PrettyPrinter.Delaborator.SubExpr",
    "Lean.PrettyPrinter.Delaborator.TopDownAnalyze",
    "Lean.Meta.CoeAttr",
    "Mathlib",
    "Aesop"
]
PARSING_EXTRA_OPTIONS = [
    "pp.numericTypes=true",
    "pp.instanceTypes=true",
    "pp.coercions.types=true",
    "pp.mvars.withType=true",
    "pp.structureInstanceTypes=true"
]

In [3]:
parsing_server = Server(
    imports=PARSING_IMPORTS,
    project_path='/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F',
    core_options=CORE_OPTIONS,
    timeout=300,
    _sync_init=False
)
# parsing_server = PersistentParsingServer(
#     max_count=32,
#     is_state_based=True,
#     tag='',
#     _sync_init=False,
#     imports=["Mathlib", "Aesop"],
#     project_path='/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F',
#     core_options=CORE_OPTIONS,
#     timeout=300
# )

In [4]:
await parsing_server.restart_async()


In [5]:
formal_statement = r'''theorem mathd_algebra_10 (m : Nat) (hm : m > 1) (hf : False) : abs ((120 : ℝ) / 100 * 30 - 130 / 100 * 20) = 10 := by
  sorry'''
condition = r'''What is the positive difference between $120\%$ of 30 and $130\%$ of 20? Show that it is 10.'''

In [6]:

init_state = (await parsing_server.load_sorry_async(formal_statement))[-1].goal_state
print(init_state)

m : ℕ
hm : m > 1
hf : False
⊢ |120 / 100 * 30 - 130 / 100 * 20| = 10


In [7]:
init_state.goals[0].variables

[Variable(t='ℕ', v=None, name='m', t_type='Type'),
 Variable(t='m > 1', v=None, name='hm', t_type='Prop'),
 Variable(t='False', v=None, name='hf', t_type='Prop')]

In [20]:
units = await parsing_server.expr_type_async('open Real in\n1 > 0')
print(units)

Prop


In [15]:
check_state.payload['message']

KeyError: 'message'

In [7]:
from agent.proof_generation import Kimina_LLMWholeProofGenerationAgent, Goedel_LLMWholeProofGenerationAgent, DeepSeek_LLMWholeProofGenerationAgent



In [8]:
kimina = Kimina_LLMWholeProofGenerationAgent()
goedel = Goedel_LLMWholeProofGenerationAgent()
deepseek = DeepSeek_LLMWholeProofGenerationAgent()

In [9]:
print(deepseek.gen_prompt(init_state, formal_statement, None)[-1]['content'])

Complete the following Lean 4 code:

```lean4
import Mathlib
import Aesop

set_option maxHeartbeats 0
set_option maxRecDepth 100000
set_option tactic.hygienic false
set_option pp.fullNames true
set_option pp.funBinderTypes true
set_option pp.piBinderTypes true

theorem mathd_algebra_10 : abs ((120 : ℝ) / 100 * 30 - 130 / 100 * 20) = 10 := by
  sorry
```

Before producing the Lean 4 code to formally prove the given theorem, provide a detailed proof plan outlining the main proof steps and strategies.
The plan should highlight key ideas, intermediate lemmas, and proof structures that will guide the construction of the final formal proof.


In [10]:
formal_statement = """
import Mathlib
import Aesop

set_option maxHeartbeats 0
set_option maxRecDepth 100000
set_option tactic.hygienic false
set_option pp.fullNames true
set_option pp.funBinderTypes true
set_option pp.piBinderTypes true

/-- What is the positive difference between $120\%$ of 30 and $130\%$ of 20? Show that it is 10.-/
theorem mathd_algebra_10 : abs ((120 : ℝ) / 100 * 30 - 130 / 100 * 20) = 10 := by
  sorry
""".strip()

prompt = """
Complete the following Lean 4 code:

```lean4
{}
```

Before producing the Lean 4 code to formally prove the given theorem, provide a detailed proof plan outlining the main proof steps and strategies.
The plan should highlight key ideas, intermediate lemmas, and proof structures that will guide the construction of the final formal proof.
""".strip()

chat = [
  {"role": "user", "content": prompt.format(formal_statement)},
]

In [11]:
formal_statement = r'''theorem mathd_algebra_10 : abs ((120 : ℝ) / 100 * 30 - 130 / 100 * 20) = 10 := by
  sorry'''
condition = r'''What is the positive difference between $120\%$ of 30 and $130\%$ of 20? Show that it is 10.'''

In [12]:
deepseek.gen_prompt(init_state, formal_statement, condition) == chat

True

In [13]:
deepseek.gen_prompt(init_state, formal_statement, condition)[-1]['content'], chat[-1]['content']

('Complete the following Lean 4 code:\n\n```lean4\nimport Mathlib\nimport Aesop\n\nset_option maxHeartbeats 0\nset_option maxRecDepth 100000\nset_option tactic.hygienic false\nset_option pp.fullNames true\nset_option pp.funBinderTypes true\nset_option pp.piBinderTypes true\n\n/-- What is the positive difference between $120\\%$ of 30 and $130\\%$ of 20? Show that it is 10.-/\ntheorem mathd_algebra_10 : abs ((120 : ℝ) / 100 * 30 - 130 / 100 * 20) = 10 := by\n  sorry\n```\n\nBefore producing the Lean 4 code to formally prove the given theorem, provide a detailed proof plan outlining the main proof steps and strategies.\nThe plan should highlight key ideas, intermediate lemmas, and proof structures that will guide the construction of the final formal proof.',
 'Complete the following Lean 4 code:\n\n```lean4\nimport Mathlib\nimport Aesop\n\nset_option maxHeartbeats 0\nset_option maxRecDepth 100000\nset_option tactic.hygienic false\nset_option pp.fullNames true\nset_option pp.funBinderType

In [14]:
print(deepseek.gen_prompt(init_state, formal_statement, condition)[-1]['content'])

Complete the following Lean 4 code:

```lean4
import Mathlib
import Aesop

set_option maxHeartbeats 0
set_option maxRecDepth 100000
set_option tactic.hygienic false
set_option pp.fullNames true
set_option pp.funBinderTypes true
set_option pp.piBinderTypes true

/-- What is the positive difference between $120\%$ of 30 and $130\%$ of 20? Show that it is 10.-/
theorem mathd_algebra_10 : abs ((120 : ℝ) / 100 * 30 - 130 / 100 * 20) = 10 := by
  sorry
```

Before producing the Lean 4 code to formally prove the given theorem, provide a detailed proof plan outlining the main proof steps and strategies.
The plan should highlight key ideas, intermediate lemmas, and proof structures that will guide the construction of the final formal proof.


In [26]:
print(deepseek.gen_prompt(init_state, None, None)[-1]['content'])

Complete the following Lean 4 code:

```lean4
import Mathlib
import Aesop

set_option maxHeartbeats 0
set_option maxRecDepth 100000
set_option tactic.hygienic false
set_option pp.fullNames true
set_option pp.funBinderTypes true
set_option pp.piBinderTypes true

example
: |120 / 100 * 30 - 130 / 100 * 20| = 10:= by
  sorry
```

Before producing the Lean 4 code to formally prove the given theorem, provide a detailed proof plan outlining the main proof steps and strategies.
The plan should highlight key ideas, intermediate lemmas, and proof structures that will guide the construction of the final formal proof.


In [None]:
parsing_state = await parsing_server.load_statement_async(r'''False''', intros=['star', 'h'])

[32m2025-08-30 15:41:03.442[0m | [34m[1mDEBUG   [0m | [36mcommon.pantograph.parsing_server[0m:[36mcheck_restart_async[0m:[36m1455[0m - [34m[1mPersistentServer(): Restarting...[0m


In [5]:
print(parsing_state)

star : (Finset ((Fin (4 : (ℕ : Type)) : Type) × (Fin (4 : (ℕ : Type)) : Type) : Type) : Type)
h : (star =
    (Insert.insert
        (((0 : (Fin (4 : (ℕ : Type)) : Type)), (0 : (Fin (4 : (ℕ : Type)) : Type))) :
          ((Fin (4 : (ℕ : Type)) : Type) × (Fin (4 : (ℕ : Type)) : Type) : Type))
        (Insert.insert
            (((0 : (Fin (4 : (ℕ : Type)) : Type)), (3 : (Fin (4 : (ℕ : Type)) : Type))) :
              ((Fin (4 : (ℕ : Type)) : Type) × (Fin (4 : (ℕ : Type)) : Type) : Type))
            (Insert.insert
                (((3 : (Fin (4 : (ℕ : Type)) : Type)), (0 : (Fin (4 : (ℕ : Type)) : Type))) :
                  ((Fin (4 : (ℕ : Type)) : Type) × (Fin (4 : (ℕ : Type)) : Type) : Type))
                ({(((3 : (Fin (4 : (ℕ : Type)) : Type)), (3 : (Fin (4 : (ℕ : Type)) : Type))) :
                      ((Fin (4 : (ℕ : Type)) : Type) × (Fin (4 : (ℕ : Type)) : Type) : Type))} :
                  (Finset ((Fin (4 : (ℕ : Type)) : Type) × (Fin (4 : (ℕ : Type)) : Type) : Type) : Type)

In [None]:
parsing_state = await parsing_server.goal_tactic_async(parsing_state, 0, 'intros star h')
print(parsing_state)

In [None]:
assert len(parsing_state.goals) == 1
g = parsing_state.goals[0]

assert all(v.v is None for v in g.variables)
context = [
    (f'({v.name} : {v.t})') for v in g.variables
]
formal_statement = 'example\n' + ('' if len(context) == 0 else '\n'.join(context) + '\n') + ': ' + g.target + '\n := sorry'
print(formal_statement)

In [2]:
server = PersistentServer(
    max_count=32,
    is_state_based=False,
    tag='',
    _sync_init=False,
    imports=["Mathlib", "Aesop"],
    project_path='/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/data/MiniF2F',
    core_options=CORE_OPTIONS,
    timeout=300
)

In [3]:
p = '/cache/data/MUSTARDSauce_lean4/WpMIDDK2Algebra911Step.lean'

with open(p, 'r') as f:
    code = f.read()

In [13]:
formal_statement = r'''open Real

#check ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ)

-- check the type of our expression
-- Let's simplify step by step
-- Step 1: simplify the expression in the parentheses (3^2*5)
example : (3 ^ 2 * 5 : ℝ) = 45 := by norm_num

-- Step 2: apply the exponent 3 to the 45
example : (45 ^ 3 : ℝ) = 91125 := by norm_num

-- Step 3: simplify the denominator, 3^4 equals 81, and the 5^3 equals 125
example : (3 ^ 4 * 5 ^ 3 : ℝ) = 10125 := by norm_num

-- Final Step: we divide the numerator by the denominator
example : ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ) = 9 := by sorry'''
print(formal_statement)

open Real

#check ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ)

-- check the type of our expression
-- Let's simplify step by step
-- Step 1: simplify the expression in the parentheses (3^2*5)
example : (3 ^ 2 * 5 : ℝ) = 45 := by norm_num

-- Step 2: apply the exponent 3 to the 45
example : (45 ^ 3 : ℝ) = 91125 := by norm_num

-- Step 3: simplify the denominator, 3^4 equals 81, and the 5^3 equals 125
example : (3 ^ 4 * 5 ^ 3 : ℝ) = 10125 := by norm_num

-- Final Step: we divide the numerator by the denominator
example : ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ) = 9 := by sorry


In [9]:
units = await server.load_code_async(formal_statement)

In [10]:
for u in units:
    print(u)

CompilationUnit(i_begin=0, i_end=11, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=11, i_end=196, messages=['(3 ^ 2 * 5) ^ 3 / (3 ^ 4 * 5 ^ 3) : ℝ\n'], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=196, i_end=287, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=287, i_end=411, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=411, i_end=525, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)


In [14]:
statement_header = formal_statement.encode()[:units[-1].i_begin].decode()
statement_body = formal_statement.encode()[units[-1].i_begin:units[-1].i_end].decode()
print(statement_header)
print('--------')
print(statement_body)

open Real

#check ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ)

-- check the type of our expression
-- Let's simplify step by step
-- Step 1: simplify the expression in the parentheses (3^2*5)
example : (3 ^ 2 * 5 : ℝ) = 45 := by norm_num

-- Step 2: apply the exponent 3 to the 45
example : (45 ^ 3 : ℝ) = 91125 := by norm_num

-- Step 3: simplify the denominator, 3^4 equals 81, and the 5^3 equals 125
example : (3 ^ 4 * 5 ^ 3 : ℝ) = 10125 := by norm_num

-- Final Step: we divide the numerator by the denominator

--------
example : ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ) = 9 := by sorry


In [4]:
units = await server.tactic_invocations_async(p)

[32m2025-09-10 15:49:09.121[0m | [34m[1mDEBUG   [0m | [36mcommon.pantograph.server[0m:[36mcheck_restart_async[0m:[36m570[0m - [34m[1mPersistentServer(): Restarting...[0m


In [5]:
for u in units:
    print(u)

CompilationUnit(i_begin=30, i_end=41, messages=[], invocations=[], goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=41, i_end=226, messages=['(3 ^ 2 * 5) ^ 3 / (3 ^ 4 * 5 ^ 3) : ℝ\n'], invocations=[], goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=226, i_end=317, messages=[], invocations=[TacticInvocation(before=['⊢ 3 ^ 2 * 5 = 45'], after=[], tactic='norm_num\n  -- Step 2: apply the exponent 3 to the 45', used_constants=['Real', 'HMul.hMul', 'Mathlib.Meta.NormNum.isNat_eq_true', 'Mathlib.Meta.NormNum.IsNatPowT.run', 'AddGroupWithOne.toAddMonoidWithOne', 'Monoid.toNatPow', 'Real.semiring', 'AddMonoidWithOne.toNatCast', 'Mathlib.Meta.NormNum.IsNatPowT.bit0', 'Real.instRing', 'instOfNatNat', 'Mathlib.Meta.NormNum.isNat_ofNat', 'Nat.instAddMonoidWithOne', 'Real.instMonoid', 'instNatAtLeastTwo', 'HPow.hPow', 'Nat', 'Real.instMul', 'eq_true', 'of_eq_true', 'Mathlib.Meta.NormNum.isNat_mul', 'Eq.refl', 'instHPow', '

In [28]:
last_ptr = 0
code_with_sorry = []
parse_results = []
for u in units:
    if len(u.invocations) > 0:
        # Add previous code
        code_with_sorry.append(code.encode()[last_ptr:u.i_begin].decode())
        last_ptr = u.i_end
        # Add theorem code and `sorry``
        theorem_and_proof = code.encode()[u.i_begin:u.i_end].decode()
        theorem, proof = theorem_and_proof.split(':=', maxsplit=1)
        theorem = theorem + ' := by\n  sorry'
        if proof.lstrip().startswith('by'):
            proof = proof.lstrip()[len('by'):]
        code_with_sorry.append(theorem)
        parse_results.append(
            dict(code=code.encode()[:u.i_begin].decode() + '\n' + theorem, proof=proof)
        )
    else:
        pass

In [29]:
idx = 3
print(parse_results[idx]['code'], parse_results[idx]['proof'], sep='\n---\n')

import Mathlib
import Aesop


open Real

#check ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ)

-- check the type of our expression
-- Let's simplify step by step
-- Step 1: simplify the expression in the parentheses (3^2*5)
example : (3 ^ 2 * 5 : ℝ) = 45 := by norm_num

-- Step 2: apply the exponent 3 to the 45
example : (45 ^ 3 : ℝ) = 91125 := by norm_num

-- Step 3: simplify the denominator, 3^4 equals 81, and the 5^3 equals 125
example : (3 ^ 4 * 5 ^ 3 : ℝ) = 10125 := by norm_num

-- Final Step: we divide the numerator by the denominator

example : ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ) = 9  := by
  sorry
---
 norm_num



In [30]:
print('\n'.join(code_with_sorry))

import Mathlib
import Aesop


open Real

#check ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ)

-- check the type of our expression
-- Let's simplify step by step
-- Step 1: simplify the expression in the parentheses (3^2*5)

example : (3 ^ 2 * 5 : ℝ) = 45  := by
  sorry

example : (45 ^ 3 : ℝ) = 91125  := by
  sorry

example : (3 ^ 4 * 5 ^ 3 : ℝ) = 10125  := by
  sorry

example : ((3 ^ 2 * 5) ^ 3 : ℝ) / (3 ^ 4 * 5 ^ 3 : ℝ) = 9  := by
  sorry


In [45]:
print(parse_results[0]['proof'])


  unfold rectangleArea
  exact by decide



In [38]:
print(theorem)
print(proof)

example : rectangleArea 7 3 = 21 := sorry
 by
  unfold rectangleArea
  exact by decide



In [33]:
code_segment = code.encode()[u.i_begin:u.i_end].decode()
print(code_segment)

example : rectangleArea 7 3 = 21 := by
  unfold rectangleArea
  exact by decide



In [7]:
init_state = await server.load_sorry_async(r'''
open Nat

-- defintions
def tomMarbles : ℕ :=
  105

def johnMarbles : ℕ :=
  98

def totalMarbles :=
  tomMarbles + johnMarbles

def estimateTom :=
  (tomMarbles + 5) / 10 * 10

def estimateJohn :=
  (johnMarbles + 5) / 10 * 10

def estimateTotal :=
  estimateTom + estimateJohn

def marblesEach :=
  totalMarbles / 2

def tomGave :=
  tomMarbles - marblesEach

-- proofs
example : estimateTom = 110 :=
  sorry

example : estimateJohn = 100 :=
  sorry

example : estimateTotal = 210 :=
  sorry

example : marblesEach = 101 :=
  sorry

example : tomGave = 4 :=
  sorry''')

In [9]:
units = init_state

In [10]:
for u in units:
    print(u)

CompilationUnit(i_begin=0, i_end=25, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=25, i_end=56, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=56, i_end=87, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=87, i_end=135, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=135, i_end=184, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=184, i_end=235, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=235, i_end=286, messages=[], invocations=None, goal_state=None, goal_src_boundaries=None, new_constants=None)
CompilationUnit(i_begin=286, i_end=325, messages=[], invocations=None,

In [18]:
for u in units:
    if u.goal_state is not None:
        final_state = await server.server.goal_tactic_async(u.goal_state, 0, 'aesop')
        assert final_state.is_solved
        
        try:
            falsifying_state = await server.server.goal_tactic_async(u.goal_state, 0, 'aesop')
            final_state = await server.server.goal_tactic_async(falsifying_state, 0, 'aesop')
            assert final_state.is_solved
        except:
            print(f'Failed to falsify state {[str(u.goal_state)]}')

Failed to falsify state ['⊢ estimateTom = 110']
Failed to falsify state ['⊢ estimateJohn = 100']
Failed to falsify state ['⊢ estimateTotal = 210']
Failed to falsify state ['⊢ marblesEach = 101']
Failed to falsify state ['⊢ tomGave = 4']


In [None]:
with open('/home/ma-user/workspace/formal_problem_generation/formal_problem_generation/output/sft_wg/Goedel-Prover-V2-8B.Numina-Lean.whole_statement_generatior.nopack/problem_generation.20250828-210118.pkl', 'rb') as f:
    data = pickle.load(f)

results_all = C.defaultdict(list)
for ((ptype, source), idx), v in data.items():
    results_all[(ptype, source)].append(v)

print(
    sum([len([vv for vv in v if (vv.formal_solution_draft or '') != '']) for v in results_all.values()]), \
    sum([len([vv for vv in v if (vv.formal_statement or '') != '']) for v in results_all.values()]), \
    sum([len(v) for v in results_all.values()])
)

In [None]:
for (ptype, source), ds in results_all.items():
    for result in ds:
        if result.formal_solution_draft is not None:
            # if '<think>' in result.formal_solution_draft:
            #     raise
            raise

In [None]:
formal_statement = result.formal_statement
load_header = result.header

proof_code = result.formal_solution_draft

In [None]:
variables = []
context, target = decompose_statement(formal_statement)
for declaration in context:
    if declaration[0] == '[':
        try:
            var_names, var_type = declaration[1:-1].split(':', 1)
        except ValueError:
            var_names = '_'
            var_type = declaration[1:-1]
        for name in var_names.strip().split():
            # print(name, var_type)
            variables.append((name.strip(), var_type))
    else:
        assert '✝' not in declaration, f'declaration: {declaration}'
        try:
            var_names, var_type = declaration[1:-1].split(':', 1)
        except ValueError:
            var_names = declaration[1:-1]
            var_type = None
        for name in var_names.strip().split():
            if '✝' in name:
                name = '_'
            variables.append((name.strip(), var_type))

In [None]:
print((('∀ ' + '\n'.join(context) + '\n, ') if len(context) > 0 else '') + target)

In [None]:
print([v[0] for v in variables])

In [None]:
print(proof_code)

In [None]:
init_state = await server.load_statement_async(
    statement=(('∀ ' + '\n'.join(context) + '\n, ') if len(context) > 0 else '') + target,
    intros=[v[0] for v in variables],
    header=load_header
)

In [None]:
print(init_state)

In [None]:
next_state = await server.goal_tactic_async(init_state, 0, r'''have h₄: 
    ∀ C C' : ℕ, C = r + b + y → C' = r + b + y → {X | X ≤ C / 2 ∧ X ≤ C' / 2}.ncard ≤ 1275  := by {
  exfalso
  have h₄ : r ≥ 50 := by omega
  have h₅ : r ≤ 49 := by omega
  omega
}''')

In [None]:
print(next_state)

In [None]:
final_state = await server.goal_tactic_async(next_state, 0, r'''exact h₄''')
print(final_state)

In [None]:
idents = set(proof_code.split()).union(parse_idents(proof_code))
# No banned token allowed
for banned_token in BANNED_TOKENS:
    assert banned_token not in idents, f'Banned token "{banned_token}" in proof "{proof_code}"'
# Goal should be solved
final_state = await server.goal_tactic_async(init_state, 0, '{\n' + proof_code + '\n}')

In [None]:
proof_code.startswith('<think>')