In [10]:
# List available models to find the correct model name
for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(f"Model: {m.name}")

Model: models/gemini-2.5-pro-preview-03-25
Model: models/gemini-2.5-flash-preview-05-20
Model: models/gemini-2.5-flash
Model: models/gemini-2.5-flash-lite-preview-06-17
Model: models/gemini-2.5-pro-preview-05-06
Model: models/gemini-2.5-pro-preview-06-05
Model: models/gemini-2.5-pro
Model: models/gemini-2.0-flash-exp
Model: models/gemini-2.0-flash
Model: models/gemini-2.0-flash-001
Model: models/gemini-2.0-flash-exp-image-generation
Model: models/gemini-2.0-flash-lite-001
Model: models/gemini-2.0-flash-lite
Model: models/gemini-2.0-flash-preview-image-generation
Model: models/gemini-2.0-flash-lite-preview-02-05
Model: models/gemini-2.0-flash-lite-preview
Model: models/gemini-2.0-pro-exp
Model: models/gemini-2.0-pro-exp-02-05
Model: models/gemini-exp-1206
Model: models/gemini-2.0-flash-thinking-exp-01-21
Model: models/gemini-2.0-flash-thinking-exp
Model: models/gemini-2.0-flash-thinking-exp-1219
Model: models/gemini-2.5-flash-preview-tts
Model: models/gemini-2.5-pro-preview-tts
Model: m

# HOL4 to LEAN Translation using Gemini API

This notebook translates HOL4 theorem statements to LEAN using Google's Gemini API.

## Setup and Imports

In [None]:
# Install required packages
# !pip install google-generativeai

In [1]:
import json
import google.generativeai as genai
import time
from typing import List, Dict
import os

  from .autonotebook import tqdm as notebook_tqdm


## Configuration

Set your Gemini API key here. You can get one from: https://makersuite.google.com/app/apikey

In [2]:
# Set your API key
API_KEY = "AIzaSyD0OsnEw4-Y86oMHITsAFTWbInIlAqQLfE"  


genai.configure(api_key=API_KEY)

## File Paths Configuration

In [3]:
# Input and output file paths
INPUT_FILE = "test_file/output_namespacePropsScript.json"
OUTPUT_FILE = "test_file/output_namespacePropsScript_lean.json"

## Initialize Gemini Model

In [11]:
# Initialize the Gemini model
# Use gemini-2.5-flash (faster, cheaper) or gemini-2.5-pro (more capable)
model = genai.GenerativeModel('gemini-2.5-flash')

print("Gemini model initialized successfully!")

Gemini model initialized successfully!


## Translation Functions

### Translation Strategy

This notebook now uses a **batch translation approach** where all statements are sent to the LLM at once. This has several advantages:

1. **Dependency Awareness**: The LLM can see all Datatypes, Definitions, and Theorems together, understanding how they relate to each other.

2. **Type Consistency**: When translating theorems that reference datatypes or definitions, the LLM knows exactly how those types were translated.

3. **Efficiency**: Only one API call is needed instead of multiple calls (though this may hit token limits for very large files).

4. **Ordering**: Statements are automatically sorted (Datatypes → Definitions → Theorems) to ensure dependencies are presented in the correct order.

**Note**: For very large files (>100 items), you may need to split them into chunks to avoid token limits.


In [None]:
def translate_all_statements(data: List[Dict]) -> List[Dict]:
    """
    Translate all HOL4 statements to LEAN in one API call, considering dependencies.
    
    Args:
        data: List of dictionaries with 'kind', 'name', and 'statement' fields
    
    Returns:
        List of translated items with LEAN statements
    """
    # Sort data to put Datatypes first, then Definitions, then Theorems
    # This ensures dependencies are defined before they're used
    kind_order = {'Datatype': 0, 'Definition': 1, 'Theorem': 2}
    sorted_data = sorted(data, key=lambda x: kind_order.get(x['kind'], 3))
    
    # Build the prompt with all statements
    prompt = """You are an expert in formal theorem proving systems. Translate ALL of the following HOL4 statements to LEAN 4 syntax.

IMPORTANT: The statements are ordered by dependency - Datatypes first, then Definitions, then Theorems. Many theorems and definitions depend on the datatypes and earlier definitions. Please consider these dependencies when translating.

Instructions:
- Use LEAN 4 syntax (not LEAN 3)
- Preserve the logical structure and meaning
- Use appropriate LEAN type annotations
- Handle option types (SOME/NONE in HOL4 → some/none in LEAN)
- Convert HOL4 list notation to LEAN list notation
- Use LEAN's unicode symbols where appropriate (e.g., ∀, ∃, →, ∧, ∨)
- Pay attention to type definitions (Datatypes) as later statements may reference them
- Ensure definitions are properly typed based on earlier type definitions

Format your response as a JSON array where each element has:
{
  "name": "original_name",
  "statement": "translated LEAN 4 statement"
}

Here are the HOL4 statements to translate:

"""
    
    # Add all statements to the prompt
    for i, item in enumerate(sorted_data, 1):
        prompt += f"\n{i}. {item['kind']}: {item['name']}\n"
        prompt += f"   HOL4 Statement:\n   {item['statement']}\n"
    
    prompt += "\n\nPlease provide the translations as a JSON array. Include ONLY the JSON array in your response, no additional text or markdown."
    
    try:
        print("Sending all statements to LLM for translation...")
        response = model.generate_content(prompt)
        response_text = response.text.strip()
        
        # Clean up markdown formatting if present
        if response_text.startswith("```json"):
            response_text = response_text.replace("```json", "").replace("```", "").strip()
        elif response_text.startswith("```"):
            lines = response_text.split("\n")
            response_text = "\n".join(lines[1:-1]).strip()
        
        # Parse the JSON response
        translated_items = json.loads(response_text)
        
        # Match translations back to original items (preserving original order)
        name_to_translation = {item['name']: item['statement'] for item in translated_items}
        
        result = []
        for item in data:  # Use original order
            lean_statement = name_to_translation.get(item['name'], f"[Translation not found for {item['name']}]")
            translated_item = {
                "kind": item['kind'],
                "name": item['name'],
                "statement": lean_statement,
                "original_hol4": item['statement']
            }
            if 'source_file' in item:
                translated_item['source_file'] = item['source_file']
            result.append(translated_item)
        
        return result
        
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON response: {str(e)}")
        print(f"Response text: {response_text[:500]}...")
        raise
    except Exception as e:
        print(f"Error during translation: {str(e)}")
        raise


def translate_json_file(input_path: str, output_path: str) -> None:
    """
    Translate all statements in a JSON file from HOL4 to LEAN in one batch.
    
    Args:
        input_path: Path to input JSON file
        output_path: Path to output JSON file
    """
    # Load the input JSON file
    print(f"Loading input file: {input_path}")
    with open(input_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    print(f"Found {len(data)} items to translate")
    print(f"  Datatypes: {sum(1 for x in data if x['kind'] == 'Datatype')}")
    print(f"  Definitions: {sum(1 for x in data if x['kind'] == 'Definition')}")
    print(f"  Theorems: {sum(1 for x in data if x['kind'] == 'Theorem')}")
    
    # Translate all statements at once
    translated_data = translate_all_statements(data)
    
    # Save the translated data
    print(f"\nSaving translated data to: {output_path}")
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(translated_data, f, indent=2, ensure_ascii=False)
    
    print(f"\nTranslation complete! {len(translated_data)} items translated.")


In [None]:
def translate_json_file_chunked(input_path: str, output_path: str, chunk_size: int = 50) -> None:
    """
    Translate statements in chunks for large files.
    Useful when the file is too large to process in one API call.
    
    Args:
        input_path: Path to input JSON file
        output_path: Path to output JSON file
        chunk_size: Number of items to process per chunk
    """
    # Load the input JSON file
    print(f"Loading input file: {input_path}")
    with open(input_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    print(f"Found {len(data)} items to translate")
    
    # Sort by dependency order
    kind_order = {'Datatype': 0, 'Definition': 1, 'Theorem': 2}
    sorted_data = sorted(data, key=lambda x: kind_order.get(x['kind'], 3))
    
    # Split into chunks
    chunks = [sorted_data[i:i + chunk_size] for i in range(0, len(sorted_data), chunk_size)]
    print(f"Processing in {len(chunks)} chunks of up to {chunk_size} items each")
    
    all_translated = []
    for i, chunk in enumerate(chunks, 1):
        print(f"\nProcessing chunk {i}/{len(chunks)} ({len(chunk)} items)...")
        translated_chunk = translate_all_statements(chunk)
        all_translated.extend(translated_chunk)
        
        # Small delay between chunks
        if i < len(chunks):
            time.sleep(2)
    
    # Restore original order
    name_to_item = {item['name']: item for item in all_translated}
    result = [name_to_item[item['name']] for item in data if item['name'] in name_to_item]
    
    # Save the translated data
    print(f"\nSaving translated data to: {output_path}")
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(result, f, indent=2, ensure_ascii=False)
    
    print(f"\nTranslation complete! {len(result)} items translated.")


## Test Translation on a Single Example

Let's test the translation on one theorem first:

In [None]:
# Load a sample from the input file
with open(INPUT_FILE, 'r', encoding='utf-8') as f:
    sample_data = json.load(f)

print(f"Loaded {len(sample_data)} items")
print(f"  Datatypes: {sum(1 for x in sample_data if x['kind'] == 'Datatype')}")
print(f"  Definitions: {sum(1 for x in sample_data if x['kind'] == 'Definition')}")
print(f"  Theorems: {sum(1 for x in sample_data if x['kind'] == 'Theorem')}")

# Test translation on a small subset (first 5 items)
test_sample = sample_data[:5]
print(f"\nTesting translation on first {len(test_sample)} items...")

translated_sample = translate_all_statements(test_sample)

print(f"\nTranslation Results:")
print("="*80)
for item in translated_sample:
    print(f"\n{item['kind']}: {item['name']}")
    print(f"HOL4: {item['original_hol4'][:100]}..." if len(item['original_hol4']) > 100 else f"HOL4: {item['original_hol4']}")
    print(f"LEAN: {item['statement'][:100]}..." if len(item['statement']) > 100 else f"LEAN: {item['statement']}")
    print("-"*80)


Testing translation for: mk_id_surj

Original HOL4 statement:
!id. ?p n. id = mk_id p n

LEAN translation:
∀ (id : Id), ∃ (p : String) (n : Nat), id = mk_id p n

LEAN translation:
∀ (id : Id), ∃ (p : String) (n : Nat), id = mk_id p n


## Translate All Statements

Choose the appropriate translation method based on file size:

- **`translate_json_file()`**: For small to medium files (< 50 items). Translates everything in one API call with full context.
- **`translate_json_file_chunked()`**: For large files (> 50 items). Processes in chunks to avoid token limits.

Both methods maintain dependency order (Datatypes → Definitions → Theorems) within each chunk.


In [None]:
# Check file size to decide which method to use
with open(INPUT_FILE, 'r', encoding='utf-8') as f:
    data = json.load(f)

num_items = len(data)
print(f"File contains {num_items} items")

if num_items <= 50:
    print("Using single-batch translation (all at once)...")
    translate_json_file(INPUT_FILE, OUTPUT_FILE)
else:
    print(f"Using chunked translation (chunks of 50 items)...")
    translate_json_file_chunked(INPUT_FILE, OUTPUT_FILE, chunk_size=50)


Loading input file: test_file/output_namespacePropsScript.json
Found 56 items to translate

Translating 1/56: mk_id_surj
  Original: !id. ?p n. id = mk_id p n...
  LEAN:     theorem mk_id_surj {IdType PType NType : Type} (mk_id : PType → NType → IdType) : ∀ (id : IdType), ∃...
  Original: !id. ?p n. id = mk_id p n...
  LEAN:     theorem mk_id_surj {IdType PType NType : Type} (mk_id : PType → NType → IdType) : ∀ (id : IdType), ∃...

Translating 2/56: mk_id_thm

Translating 2/56: mk_id_thm
  Original: !id. mk_id (id_to_mods id) (id_to_n id) = id...
  LEAN:     theorem mk_id_thm {IdType ModsType NType : Type} (mk_id : ModsType → NType → IdType) (id_to_mods : I...
  Original: !id. mk_id (id_to_mods id) (id_to_n id) = id...
  LEAN:     theorem mk_id_thm {IdType ModsType NType : Type} (mk_id : ModsType → NType → IdType) (id_to_mods : I...

Translating 3/56: nsSub_mono2

Translating 3/56: nsSub_mono2
  Original: (!x y z. nsLookup e1 x = SOME y ∧ nsLookup e2 x = SOME z ∧ R1 x y z ⇒ R2 x y z) ⇒

## Verify the Output

Let's check the translated output:

In [None]:
# Load and display the translated data
with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
    translated_data = json.load(f)

print(f"Total translated items: {len(translated_data)}")
print("\nFirst 3 translated theorems:")
print("="*80)

for i, item in enumerate(translated_data[:3]):
    print(f"\n{i+1}. {item['kind']}: {item['name']}")
    print(f"   HOL4: {item['original_hol4']}")
    print(f"   LEAN: {item['statement']}")
    print("-"*80)

## Export Statistics

In [None]:
# Generate statistics about the translation
with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
    translated_data = json.load(f)

print("Translation Statistics:")
print("="*50)
print(f"Total items translated: {len(translated_data)}")

# Count by kind
kind_counts = {}
for item in translated_data:
    kind = item['kind']
    kind_counts[kind] = kind_counts.get(kind, 0) + 1

print("\nBreakdown by kind:")
for kind, count in kind_counts.items():
    print(f"  {kind}: {count}")

# Check for translation errors
errors = [item for item in translated_data if "[Translation Error" in item['statement']]
print(f"\nTranslation errors: {len(errors)}")

if errors:
    print("\nItems with errors:")
    for item in errors:
        print(f"  - {item['name']}")

## Check Dependency Awareness

Let's verify that the translation properly handles dependencies between Datatypes, Definitions, and Theorems:


In [None]:
# Show examples of how datatypes, definitions, and theorems are related
with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
    translated_data = json.load(f)

# Find datatypes
datatypes = [item for item in translated_data if item['kind'] == 'Datatype']
definitions = [item for item in translated_data if item['kind'] == 'Definition']
theorems = [item for item in translated_data if item['kind'] == 'Theorem']

print("Dependency Chain Example:")
print("="*80)

if datatypes:
    print("\n1. DATATYPE (defines types used by definitions and theorems):")
    dt = datatypes[0]
    print(f"   Name: {dt['name']}")
    print(f"   HOL4: {dt['original_hol4'][:80]}...")
    print(f"   LEAN: {dt['statement'][:80]}...")

if definitions:
    print("\n2. DEFINITION (may use datatypes, used by theorems):")
    defn = definitions[0]
    print(f"   Name: {defn['name']}")
    print(f"   HOL4: {defn['original_hol4'][:80]}...")
    print(f"   LEAN: {defn['statement'][:80]}...")

if theorems:
    print("\n3. THEOREM (may use datatypes and definitions):")
    thm = theorems[0]
    print(f"   Name: {thm['name']}")
    print(f"   HOL4: {thm['original_hol4'][:80]}...")
    print(f"   LEAN: {thm['statement'][:80]}...")

print("\n" + "="*80)
print("The LLM translated all of these together, so it knows:")
print("- What types are defined in Datatypes")
print("- What functions/values are defined in Definitions")
print("- How to translate Theorems that reference these types and definitions")
