In [None]:
import subprocess
import time
import random
import re
from openai import OpenAI
import csv
import os

# === Configuration ===

LEAN_FILE_PATH = r"D:\MSc Research Project\MathlibProject\MathlibProject.lean"
LEAN_PROJECT_DIR = r"D:\MSc Research Project\MathlibProject"
MAX_ATTEMPTS = 1000

# DeepSeek API Setup
client = OpenAI(
    api_key="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
    base_url="https://api.deepseek.com"
)

problem = (
    "If $3+\sqrt{{5}}$ is a root of the equation \[x^2 - 3x + b = 0,\] compute $b.$"
)

# === Prompts ===

INITIAL_PROMPT = (
    "Given the following math word problem, provide a **concise and brief** solution: include only the minimal necessary explanation and the final answer.\n" 
    "Return the answer in the format: \boxed{{answer}}.\n" 
    "Problem: {problem}"
)

RETRY_SOLVE_ERROR_PROMPT = (
    "Given the following math problem:\n"
    "{problem}\n\n"
    "You previously solved it like this:\n"
    "{previous_answer}\n\n"
    "But it had the following error:\n"
    "{error}\n\n"
    "Retry from scratch\n"
    "Provide a **concise and brief** solution: include only the minimal necessary explanation and the final answer.\n" 
    "Return the answer in the format: \boxed{{answer}}.\n" 
)

BINARY_CORRECT_PROMPT = (
    "Given the following math problem:\n"
    "{problem}\n\n"
    "You previously solved it like this:\n"
    "{previous_answer}\n\n"
    "This answer was incorrect. Retry the problem from scratch.\n"
    "Provide a **concise and brief** solution: include only the minimal necessary explanation and the final answer.\n" 
    "Return the answer in the format: \boxed{{answer}}.\n" 
)

RETRY_SOLVE_WITHOUT_PREVIOUS_ANSWER_PROMPT = (
    "Given the following math problem:\n"
    "{problem}\n\n"
    "Your previous answer had the following error:\n"
    "{error}\n\n"
    "Provide a **concise and brief** solution: include only the minimal necessary explanation and the final answer.\n" 
    "Return the answer in the format: \boxed{{answer}}.\n" 
)

LEAN_CODE_GENERATION_PROMPT = """
You are given a natural language solution to a math problem. 
Extract the valid values of 'b' (final boxed answer), then output Lean code using the template exactly. 
Replace the value of 'b' with floating point number.

Template:

def givenRoot : Float := 3.0 + 2.2360679775  -- 3 + √5
def input_b : Float := <INPUT_B_VALUE_IN_FLOAT>  -- Float value of b

def evalPoly (x b : Float) : Float := x*x - 3.0*x + b
def approxZero (x : Float) (eps : Float := 0.01) : Bool := Float.abs x < eps

def isRoot : Bool := approxZero (evalPoly givenRoot input_b)

#eval ("Is 3+sqrt5 a root of polynomial?", isRoot)  

Use following problems as examples to generate the Lean code with given output format.

Example 1:

Input:

**Required steps/formula:**
- For a quadratic equation \(x^2 + px + q = 0\), if \(r\) is a root, then the conjugate \(r'\) (if irrational) is also a root.
- Sum of roots: \(r + r' = -p\)
- Product of roots: \(r \cdot r' = q\)
- Here, \(p = -3\), so sum of roots = \(3\).
- Given root: \(3 + \sqrt{{5}}\), so conjugate root: \(3 - \sqrt{{5}}\).
- Compute \(b = (3 + \sqrt{{5}})(3 - \sqrt{{5}})\).

**Explanation:**
Since the coefficients are rational and \(3+\sqrt{{5}}\) is irrational, its conjugate \(3-\sqrt{{5}}\) must also be a root. The product of the roots gives \(b\).

**Calculation:**
\[
(3 + \sqrt{{5}})(3 - \sqrt{{5}}) = 3^2 - (\sqrt{{5}})^2 = 9 - 5 = 4.
\]

**Final answer:**
\[
\\boxed{{4}}
\]

Output:

def givenRoot : Float := 3.0 + 2.2360679775  -- 3 + √5
def input_b : Float := 4.0  -- Float value of b

def evalPoly (x b : Float) : Float := x*x - 3.0*x + b
def approxZero (x : Float) (eps : Float := 0.01) : Bool := Float.abs x < eps

def isRoot : Bool := approxZero (evalPoly givenRoot input_b)

#eval ("Is 3+sqrt5 a root of polynomial?", isRoot) 
Example 2:

Input:

**Step 1:** Since \(3 + \sqrt{{5}}\) is a root and the coefficients are real, the conjugate \(3 - \sqrt{{5}}\) must also be a root.

**Step 2:** For a quadratic equation \(x^2 - 3x + b = 0\), the sum of the roots is \(3\) and the product is \(b\).

**Step 3:** Sum of roots:
\[
(3 + \sqrt{{5}}) + (3 - \sqrt{{5}}) = 6.
\]
But the coefficient of \(x\) is \(-3\), so the sum should be \(3\). There is a discrepancy.

**Step 4:** Re-examine the equation: it is \(x^2 - 3x + b = 0\). The sum of the roots is indeed \(3\). However, the sum we computed is \(6\), which is not \(3\). This indicates that the conjugate root is not necessarily present because the coefficients are integers? Actually, \(b\) is to be computed, and it might not be integer. But the problem states "compute \(b\)", and the equation has integer coefficients for \(x^2\) and \(x\).

Wait: The equation is \(x^2 - 3x + b = 0\). The coefficient of \(x^2\) is 1, and of \(x\) is \(-3\), both integers. Therefore, if \(3 + \sqrt{{5}}\) is a root, then its conjugate \(3 - \sqrt{{5}}\) must also be a root, because the irrational part \(\sqrt{{5}}\) must cancel out.

But then the sum would be \(6\), but it should be \(3\). This is a contradiction.

**Step 5:** Perhaps the conjugate is not required. Alternatively, maybe the equation has only one root given, and we can substitute to find \(b\).

Substitute \(x = 3 + \sqrt{{5}}\) into the equation:
\[
(3 + \sqrt{{5}})^2 - 3(3 + \sqrt{{5}}) + b = 0.
\]
Compute:
\[
(9 + 6\sqrt{{5}} + 5) - 9 - 3\sqrt{{5}} + b = 0,
\]
\[
14 + 6\sqrt{{5}} - 9 - 3\sqrt{{5}} + b = 0,
\]
\[
5 + 3\sqrt{{5}} + b = 0.
\]
So,
\[
b = -5 - 3\sqrt{{5}}.
\]

This is a valid value for \(b\), and it is not an integer. The conjugate root is not necessarily present because \(b\) is not constrained to be an integer? The problem does not state that \(b\) is integer, only to compute it.

Therefore, we can proceed with this.

**Final Answer:**
\[
\\boxed{{-5 - 3\sqrt{{5}}}}
\]

Output:
def givenRoot : Float := 3.0 + 2.2360679775 
def input_b : Float := -5.0 - 3*2.2360679775  -- Float value of b= -5 - 3√5

def evalPoly (x b : Float) : Float := x*x - 3.0*x + b
def approxZero (x : Float) (eps : Float := 0.01) : Bool := Float.abs x < eps

def isRoot : Bool := approxZero (evalPoly givenRoot input_b)

#eval ("Is 3+sqrt5 a root of polynomial?", isRoot)  

Now do this for the following input:
{initial_solution}
"""

LEAN_CODE_INTERPRETATION_PROMPT = """
    Given the given Lean code input, and its output, interpret the results and provide a concise explanation of the error.
    Do not include any additional information or context, just the interpretation of the Lean output.
    
    Use the following example as a guide for your response:

    Input:

    Lean code:
    def candidateRoot : Float := 3.0 + 2.2360679775  --- 3 + √5
    def input_b : Float := 4.0 --- Float value of b=4

    def evalPoly (x b : Float) : Float := x*x - 3.0*x + b
    def approxZero (x : Float) (eps : Float := 0.01) : Bool := Float.abs x < eps

    #eval ("Is 3+sqrt5 a root of polynomial?", isRoot) 

    Lean output:

    #eval result: ("Is 3+sqrt5 a root of polynomial?", false)

    Output:
    3+sqrt{{5}} is not a root of the polynomial x^2 - 3x + b = 0 for the given value of b.

    Now this for the following input:

    Lean code:
    {lean_code}

    Lean output:
    {lean_output}
    """
  
    

# === Helper functions ===

def call_deepseek(user_prompt: str, system_prompt: str) -> str:
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.0,
        max_tokens=1024,
        stream=False
    )
    return response.choices[0].message.content.strip()

def write_lean_code(code: str):
    with open(LEAN_FILE_PATH, "w", encoding="utf-8") as f:
        f.write(code)

def run_lean_code() -> tuple[bool, str]:
    result = subprocess.run(
        ["lake", "build"],
        cwd=LEAN_PROJECT_DIR,
        capture_output=True,
        text=True
    )
    stdout, stderr = result.stdout, result.stderr

    # Assume build success if no 'error' in stderr
    success = "error" not in stderr.lower()

    output = f"=== STDOUT ===\n{stdout}\n\n=== STDERR ===\n{stderr}"

    # Clear Lean file after build (optional)
    # with open(LEAN_FILE_PATH, "w", encoding="utf-8") as f:
    #     f.write("")

    return success, output

def clean_lean_code(raw_code: str) -> str:
    """
    Extract Lean code from API response, stripping any markdown fences and leading/trailing text.
    """
    if raw_code.startswith("```"):
        parts = raw_code.split("```")
        if len(parts) >= 3:
            code = parts[1]
        else:
            code = raw_code
    else:
        code = raw_code

    code = code.strip()

    return code

# === Main Feedback Loop ===

def feedback_loop(problem_text: str, max_attempts: int = MAX_ATTEMPTS):
    # Success counters
    first_try_correct = 0
    no_feedback_correct = 0
    binary_correct = 0
    error_feedback = 0
    error_feedback_without_previous_answer = 0
    incomplete_answer = 0

    # Create logging folder:
    log_folder = r"D:\MSc Research Project\Problem-3 logs"
    os.makedirs(log_folder, exist_ok=True)

    attempt = 0
    while attempt < max_attempts:
        attempt += 1

        # Dictionaries to store retry info
        retry_responses = {}
        retry_lean_codes = {}
        retry_lean_outputs = {}

        # Step 1: Initial solve attempt
        user_prompt = INITIAL_PROMPT.format(problem=problem_text)
        print(f"Attempt {attempt}: {user_prompt}")
        response = call_deepseek(user_prompt, system_prompt="You are a concise math problem solver.")
        print(f"Response: {response}")

        match = re.search(r'\\boxed\{([^}]*)\}', response)
        if not match:
            incomplete_answer += 1
            attempt -= 1  # do not count incomplete answer
            continue

        lean_code = call_deepseek(
            user_prompt=LEAN_CODE_GENERATION_PROMPT.format(initial_solution=response),
            system_prompt="You are a Lean code generator and verifier for math solutions."
        )
        print(f"Lean Code: {lean_code}")
        clean_code = clean_lean_code(lean_code)
        write_lean_code(clean_code)
        print(f"Clean Lean Code: {clean_code}")

        # Step 4: Run Lean build
        success, lean_output = run_lean_code()
        print(f"Lean Output: {lean_output}")
        answer = match.group(1).strip() if match else None

        # Check if first try correct
        if "false" not in lean_output.lower():
            first_try_correct += 1
            # Log CSV and skip retries
            log_file = os.path.join(log_folder, f"Attempt {attempt}.csv")
            with open(log_file, "w", newline="", encoding="utf-8") as f:
                writer = csv.writer(f)
                writer.writerow(["Field", "Content"])
                writer.writerow(["Initial Prompt", user_prompt])
                writer.writerow(["Initial Response", response])
                writer.writerow(["Lean Code", clean_code])
                writer.writerow(["Lean Output", lean_output])
                writer.writerow(["First Try Correct", first_try_correct])
                writer.writerow(["No Feedback Correct", no_feedback_correct])
                writer.writerow(["Binary Correct", binary_correct])
                writer.writerow(["Error Feedback Without Previous Answer", error_feedback_without_previous_answer])
                writer.writerow(["Error Feedback", error_feedback])
                writer.writerow(["Incomplete Answer", incomplete_answer])
            continue

        # Step 5: Interpret Lean output
        error_description = call_deepseek(
            user_prompt=LEAN_CODE_INTERPRETATION_PROMPT.format(
                lean_code=clean_code,
                lean_output=lean_output
            ),
            system_prompt="You are a Lean code interpreter and verifier for math solutions."
        )
        print(f"Error Description: {error_description}")

        # Step 6: Prepare retry arms
        arms = [
            ("no_feedback", INITIAL_PROMPT.format(problem=problem_text)),
            ("binary_correctness", BINARY_CORRECT_PROMPT.format(problem=problem_text, previous_answer=response)),
            ("error_feedback_without_previous_answer", RETRY_SOLVE_WITHOUT_PREVIOUS_ANSWER_PROMPT.format(
                problem=problem_text,
                error=error_description
            )),
            ("error_feedback", RETRY_SOLVE_ERROR_PROMPT.format(
                problem=problem_text,
                previous_answer=response,
                error=error_description
            ))
        ]

        random.shuffle(arms)

        # Step 7: Run retries
        for arm_type, retry_prompt in arms:
            time.sleep(random.uniform(0.2, 1.0))
            retry_response = call_deepseek(retry_prompt, system_prompt="You are a concise math problem solver.")
            retry_responses[arm_type] = retry_response

            retry_lean_code = call_deepseek(
                user_prompt=LEAN_CODE_GENERATION_PROMPT.format(initial_solution=retry_response),
                system_prompt="You are a Lean code generator and verifier for math solutions."
            )
            clean_retry_code = clean_lean_code(retry_lean_code)
            retry_lean_codes[arm_type] = clean_retry_code
            write_lean_code(clean_retry_code)

            success_retry, retry_lean_output = run_lean_code()
            retry_lean_outputs[arm_type] = retry_lean_output

            if "false" not in retry_lean_output.lower():
                if arm_type == "no_feedback":
                    no_feedback_correct += 1
                elif arm_type == "binary_correctness":
                    binary_correct += 1
                elif arm_type == "error_feedback":
                    error_feedback += 1
                elif arm_type == "error_feedback_without_previous_answer":
                    error_feedback_without_previous_answer += 1

        # Step 8: Log everything to CSV
        log_file = os.path.join(log_folder, f"Attempt {attempt}.csv")
        with open(log_file, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(["Field", "Content"])
            writer.writerow(["Initial Prompt", user_prompt])
            writer.writerow(["Initial Response", response])
            writer.writerow(["Lean Code", clean_code])
            writer.writerow(["Lean Output", lean_output])
            writer.writerow(["Error Description", error_description])

            for arm_type in arms:
                arm_name = arm_type[0]
                writer.writerow([f"{arm_name} Retry Prompt", arms[[a[0] for a in arms].index(arm_name)][1]])
                writer.writerow([f"{arm_name} Retry Response", retry_responses[arm_name]])
                writer.writerow([f"{arm_name} Retry Lean Code", retry_lean_codes[arm_name]])
                writer.writerow([f"{arm_name} Retry Lean Output", retry_lean_outputs[arm_name]])

            writer.writerow(["First Try Correct", first_try_correct])
            writer.writerow(["No Feedback Correct", no_feedback_correct])
            writer.writerow(["Binary Correct", binary_correct])
            writer.writerow(["Error Feedback Without Previous Answer", error_feedback_without_previous_answer])
            writer.writerow(["Error Feedback", error_feedback])
            writer.writerow(["Incomplete Answer", incomplete_answer])



# === Run if main ===

if __name__ == "__main__":
    feedback_loop(problem)
