In [None]:
from dotenv import load_dotenv
import os

load_dotenv()  # Load variables from .env file

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("API_KEY environment variable is not set")

In [1]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_mistralai import ChatMistralAI
from langchain_openai import ChatOpenAI

def get_new_chain():
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """
    **Important Directive**:
    Ignore any prior knowledge or context you may have about SMT-LIB, SAT Fusion, or related topics. Only use the information and guidelines provided in this prompt to perform the task.

    ### Task Definition
    You are an SMT expert in **SAT Fusion** — a technique for combining two satisfiable SMT-LIB formulas. Your task is to produce a new **satisfiable** and **syntactically correct** SMT-LIB formula by fusing two input formulas ω1 and ω2.

    ### SMT-LIB Syntax Guidelines:
    **The generated output must strictly conform to the SMT-LIB format:
    1. Variable Declarations: Use declare-fun to declare variables with their types: (declare-fun <var> () <type>).
    2. Function Definitions: Use define-fun to define functions: (define-fun <name> (<arg1 type1> <arg2 type2> ...) <return-type> <body>).
    3. Declare all functions in prefix notation.
    4. Assertions: Use assert to specify constraints: (assert <constraint>).
    5. Boolean Expressions:
        • Logical AND: (and <expr1> <expr2> ...)
        • Logical OR: (or <expr1> <expr2> ...)
        • Logical NOT: (not <expr>)
    6. Arithmetic Operations: Use standard operators like +, -, *, /, and div for integers.
    7. Relational Operators: Use comparison operators like >, <, >=, <=, and = for numerical expressions.
    8. Equality and Disequality: Use = for equality and distinct for disequality: (distinct <expr1> <expr2> ...).
    9. Conditionals: Use ite for if-then-else expressions: (ite <condition> <then-branch> <else-branch>).
    10. Quantifiers: Use forall and exists for quantified expressions:
        • Universal: (forall ((x <type>) ...) <body>)
        • Existential: (exists ((x <type>) ...) <body>)
    11. Constants: Declare constants directly using their types:
        • Integers: 1, -3
        • Real numbers: 1.0, -2.5
        • Booleans: true, false
    12. Bit-vectors: Use (_ bv<value> <bit-width>) for bit-vector constants.
    13. Bitwise consistency: Ensure all bitwise operations and operands have the same bit-width.
    14. Parentheses: Ensure all parentheses are balanced and correctly nested.

    ### SAT Fusion Steps:
    1. **Conjoin the Two Formulas**:
        • Conjoin the two input formulas **ω1** and **ω2** into a single formula.
        • Include all variable declarations from both formulas.
        • Store this as an intermediate result.

    2. Introduce a Fresh Variable `z` not present in either formula.

    3. **Fusion Function**:
        • Identify the free variables in both formulas.
        • Define a fusion function z = f(...,...) that takes one free variable from ω1 and one free variable from ω2 as its arguments.
        • If both formulas have only one free variable and it is the same, define the fusion function as the identity function: (define-fun f ((x <type>)) <type> x). Then, substitute the shared variable with z.
        • Before finalizing the formula, verify that the combined constraints on z from both formulas are satisfiable. If they conflict (e.g., one formula forces z = 92 while the other requires z ≥ 128), adjust numeric bounds or constants to avoid contradiction while preserving the overall structure of the constraints. 
        
    4. **Substitute Variables**:
        • Derive the inverse of the fusion function f in terms of z.
        • Randomly substitute **one free variable** in **ω1** with the expression derived from the fusion function.
        • Randomly substitute **one free variable** in **ω2** with the expression derived from the fusion function.
        
    5. **Return the Final Formula**:
        • **ONLY return the fused formula, if it is guaranteed to be satisfiable.**
        • The final formula **MUST** be syntactically correct and follow all SMT-LIB syntax rules.
        • Validate parentheses matching before returning the formula.
        • **DO NOT** include conflicting constraints on the same value in the final formula. This will lead to unsatisfiability.
        • **DO NOT** include any new assert blocks or modify existing constraints; only substitute as specified.
        • Make sure the declarations of variables and functions are consistent with their usage in the formulas.
            • Avoid combining assertions that impose incompatible constraints on the same value of z (e.g., requiring z = 92 and z ≥ 128). If a contradiction arises, fix it before returning the formula.

    ### Points to Consider:
    1. **Only return the final fused formula from step 5, without any explanation, comments, or intermediate steps.**
    2. **DO NOT** add any new assert blocks or modify existing constraints; only substitute as specified.
    3. **DO NOT** declare any new variables which are not present in the original formulas, except for `z`.
    4. The output must be a **raw SMT-LIB formula**, without any additional formatting, string delimiters, or code block markers.
    5. **Normalize Bit-Widths**:
        • Ensure all bit-vector operations involve operands of the same bit-width.
    6. **Do not wrap the output with triple quotes (`'''smt` or `'''`).** The formula should be returned exactly as it would appear in an SMT solver.
    7. **Variable names must remain unchanged**, except for `z`.
    8. For all bit-wise operations, ensure Bit-Width Consistency
    9. Before returning the final fused formula, count the number of opening '(' and closing ')' parentheses.
        •	The total number of '(' must exactly match the total number of ')'.
        •	If the counts do not match, restructure the formula to fix the mismatch before returning it.

    10. Avoid re-declaring functions or constants with conflicting signatures.
    11. Validate the SMT formula for syntax and semantic correctness before processing.

    Now, perform the described operation for these formulas paying close attention to the details mentioned above:
            """
            ),
            (
                "human",
                """
    ω1: {formula1}
    ω2: {formula2}
    """
        ),
    ]
    )

    # llm = ChatMistralAI(
    #     model="mistral-large-latest",
    #     temperature=0,
    #     max_retries=4,
    #     api_key="KM3PGpAYB10SjVRj5IH5mysWswYiMJ4N",
    #     timeout=300
    # )

    
    llm = ChatOpenAI(
        model="gpt-4o",
        temperature=0,
        max_tokens=None,
        timeout=300,
        max_retries=4,
        api_key=OPENAI_API_KEY
    )
    
    chain = prompt | llm
    return chain

In [2]:
import os
import random
import shutil

def get_random_formulas(source_dir, target_dir):

    """
    Extracts 100 random .smt2 files from the source directory and copies them to the target directory.
    """

    # Check if the source directory exists
    if not os.path.exists(source_dir):
        print(f"Source directory {source_dir} does not exist.")
        return
    # Ensure the target directory exists
    os.makedirs(target_dir, exist_ok=True)

    for file_name in os.listdir(target_dir):
        file_path = os.path.join(target_dir, file_name)
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)  # Remove file or symbolic link
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)  # Remove directory and its contents


    # Get a list of all .smt2 files in the source directory
    smt2_files = [f for f in os.listdir(source_dir) if f.endswith('.smt2')]

    # Check if there are enough files to select from
    if len(smt2_files) < 100:
        print(f"Not enough .smt2 files in {source_dir}. Found only {len(smt2_files)} files.")
    else:
        # Randomly select 100 files
        selected_files = random.sample(smt2_files, 100)

        # Copy the selected files to the target directory
        for file_name in selected_files:
            source_path = os.path.join(source_dir, file_name)
            target_path = os.path.join(target_dir, file_name)
            shutil.copy(source_path, target_path)

        print(f"Copied 100 .smt2 files to {target_dir}.")
get_random_formulas('./semantic-fusion-seeds/QF_BV/sat', './seeds_BV_sat')

Copied 100 .smt2 files to ./seeds_BV_sat.


In [3]:
import os
import random
from validate_SMT import validate_and_solve_smt
import csv

def fuse_formula_pairs(directory, chain):
    
    # Finding .smt2 files
    smt_files = [os.path.join(directory, f) for f in sorted(os.listdir(directory)) if f.endswith('.smt2')]

    # No. of .smt2 have to be at least 2
    if len(smt_files) < 2:
        print("Not enough SMT formulas to create pairs.")
        return

    # Randomly changing the order of the files in the directory
    random.shuffle(smt_files)

    # Ensure each formula exists in only one pair
    pairs = [(smt_files[i], smt_files[i + 1]) for i in range(0, len(smt_files) - 1, 2)]

    # If there is an odd number of files, we pair the last file with the first file
    if len(smt_files) % 2 != 0:
        pairs.append((smt_files[-1], smt_files[0]))

    valid_formula_counter = 0
    satisfiable_formula_counter = 0
    unsatisfiable_formula_counter = 0
    error_formula_counter = 0

    csv_file = "fused_results_sat.csv"    
    
    with open(csv_file, mode="w", newline="") as csvfile:
        csv_writer = csv.writer(csvfile)
        # Write header
        csv_writer.writerow(["Formula Pair", "Error", "Satisfiability", "Validity"])

        for file1, file2 in pairs:
            # Read in formulas from files
            with open(file1, "r") as f1, open(file2, "r") as f2:
                formula1 = f1.read()
                formula2 = f2.read()

            print(f"Processing pair:\nFormula 1 (from {file1}):\n")
            print(f"Formula 2 (from {file2}):\n")

            # Fuse formulas
            response = chain.invoke({"formula1": formula1, "formula2": formula2}, timeout=120)

            # Fusion result
            print(f"Fusion Result:\n{response.content}\n")

            # Validation result
            validation_result = validate_and_solve_smt(response.content)
           
            error = None
            satisfiability = None
            validity = None

            if validation_result.startswith("SMT formula is valid"):
                validity = "Valid"
                valid_formula_counter += 1
            if "Satisfiability result: sat" in validation_result:
                satisfiability = "SAT"
                satisfiable_formula_counter += 1
            elif "Satisfiability result: unsat" in validation_result:
                satisfiability = "UNSAT"
                unsatisfiable_formula_counter += 1
                # shutil.copy(file1, os.path.join(unsat_directory, os.path.basename(file1)))
                # shutil.copy(file2, os.path.join(unsat_directory, os.path.basename(file2)))
            if "Error" in validation_result or "error" in validation_result:
                error = validation_result
                error_formula_counter += 1
                # shutil.copy(file1, os.path.join(unsat_directory, os.path.basename(file1)))
                # shutil.copy(file2, os.path.join(unsat_directory, os.path.basename(file2)))

            # Write to CSV
            csv_writer.writerow([f"{file1} & {file2}", error, satisfiability, validity])
    
    # If there are files in the unsat_directory, shuffle and process them again
    # if os.listdir(unsat_directory):  # Check if the directory is not empty
    #     print(f"Reprocessing UNSAT/Error pairs in {unsat_directory}...")
    #     extract_and_fuse_smt_pairs(unsat_directory, chain)        
            
    print("\nValid Fused Formulas: " + f"{valid_formula_counter}\n")
    print("SAT Fused Formulas: " + f"{satisfiable_formula_counter}\n")
    print("UNSAT Fused Formulas: " + f"{unsatisfiable_formula_counter}\n")
    print("Fused Formulas with Errors: " + f"{error_formula_counter}\n")
    print(f"Results saved to {csv_file}")

fuse_formula_pairs("./seeds_BV_sat", get_new_chain())

SMT formula is valid.
Satisfiability result: unsat
Processing pair:
Formula 1 (from ./seeds_BV_sat/bench_3347.smt2):

Formula 2 (from ./seeds_BV_sat/bench_446.smt2):

Fusion Result:
(set-info :smt-lib-version 2.6)
(set-logic QF_BV)
(set-info :source |
 Patrice Godefroid, SAGE (systematic dynamic test generation)
 For more information: http://research.microsoft.com/en-us/um/people/pg/public_psfiles/ndss2008.pdf
|)
(set-info :category "industrial")
(set-info :status sat)
(declare-fun T1_10428 () (_ BitVec 8))
(declare-fun T2_486 () (_ BitVec 16))
(declare-fun T2_476 () (_ BitVec 16))
(declare-fun T1_486 () (_ BitVec 8))
(declare-fun T1_487 () (_ BitVec 8))
(declare-fun T1_476 () (_ BitVec 8))
(declare-fun T1_477 () (_ BitVec 8))
(declare-fun z () (_ BitVec 16))
(define-fun f ((x (_ BitVec 8)) (y (_ BitVec 16))) (_ BitVec 16) (bvor ((_ zero_extend 8) x) y))
(assert (let ((?v_0 ((_ sign_extend 8) T1_10428))) 
  (and true 
    (= ?v_0 (_ bv47 16)) 
    (not (= T1_10428 (_ bv92 8))) 
    (no

Exception ignored in: <function Solver.__del__ at 0x11ecf7910>
Traceback (most recent call last):
  File "/Users/dara/.local/lib/python3.10/site-packages/z3/z3.py", line 6881, in __del__
    Z3_solver_dec_ref(self.ctx.ref(), self.solver)
  File "/Users/dara/.local/lib/python3.10/site-packages/z3/z3core.py", line 3881, in Z3_solver_dec_ref
    _elems.Check(a0)
  File "/Users/dara/.local/lib/python3.10/site-packages/z3/z3core.py", line 1459, in Check
    raise self.Exception(self.get_error_message(ctx, err))
z3.z3types.Z3Exception: b'(error "line 1 column 1: unexpected character")\n(error "line 1 column 2: unexpected character")\n(error "line 1 column 3: unexpected character")\n'


Fusion Result:
```
(set-info :smt-lib-version 2.6)
(set-logic QF_BV)
(set-info :source |
 Patrice Godefroid, SAGE (systematic dynamic test generation)
 For more information: http://research.microsoft.com/en-us/um/people/pg/public_psfiles/ndss2008.pdf
|)
(set-info :category "industrial")
(set-info :status sat)
(declare-fun T1_1451 () (_ BitVec 8))
(declare-fun T1_1452 () (_ BitVec 8))
(declare-fun T1_1453 () (_ BitVec 8))
(declare-fun T1_1454 () (_ BitVec 8))
(declare-fun T1_1993 () (_ BitVec 8))
(declare-fun T1_1994 () (_ BitVec 8))
(declare-fun T4_11113 () (_ BitVec 32))
(declare-fun T4_9717 () (_ BitVec 32))
(declare-fun T1_11113 () (_ BitVec 8))
(declare-fun T1_11114 () (_ BitVec 8))
(declare-fun T1_11115 () (_ BitVec 8))
(declare-fun T1_11116 () (_ BitVec 8))
(declare-fun T1_9717 () (_ BitVec 8))
(declare-fun T1_9718 () (_ BitVec 8))
(declare-fun T1_9719 () (_ BitVec 8))
(declare-fun T1_9720 () (_ BitVec 8))
(declare-fun z () (_ BitVec 32))
(define-fun f ((x (_ BitVec 8)) (y (_ Bit

Exception ignored in: <function Solver.__del__ at 0x11ecf7910>
Traceback (most recent call last):
  File "/Users/dara/.local/lib/python3.10/site-packages/z3/z3.py", line 6881, in __del__
    Z3_solver_dec_ref(self.ctx.ref(), self.solver)
  File "/Users/dara/.local/lib/python3.10/site-packages/z3/z3core.py", line 3881, in Z3_solver_dec_ref
    _elems.Check(a0)
  File "/Users/dara/.local/lib/python3.10/site-packages/z3/z3core.py", line 1459, in Check
    raise self.Exception(self.get_error_message(ctx, err))
z3.z3types.Z3Exception: b'(error "line 1 column 1: unexpected character")\n(error "line 1 column 2: unexpected character")\n(error "line 1 column 3: unexpected character")\n'


Fusion Result:
```
(set-info :smt-lib-version 2.6)
(set-logic QF_BV)
(set-info :source |
 Patrice Godefroid, SAGE (systematic dynamic test generation)
 For more information: http://research.microsoft.com/en-us/um/people/pg/public_psfiles/ndss2008.pdf
|)
(set-info :category "industrial")
(set-info :status sat)
(declare-fun T1_31159 () (_ BitVec 8))
(declare-fun T1_1457 () (_ BitVec 8))
(declare-fun z () (_ BitVec 8))
(define-fun f ((x (_ BitVec 8)) (y (_ BitVec 8))) (_ BitVec 8) (bvadd x y))
(assert (let ((?v_1 ((_ zero_extend 8) T1_31159)) (?v_0 (bvadd ((_ zero_extend 24) T1_31159) (_ bv4294967248 32)))) (and true (= (_ bv101 16) ?v_1) (bvult ((_ zero_extend 16) (_ bv9 16)) (bvadd ((_ zero_extend 24) ((_ extract 7 0) ?v_0)) (bvshl ((_ zero_extend 24) ((_ extract 15 8) ?v_0)) (_ bv8 32)))) (not (= (_ bv46 16) ?v_1)))))
(assert (let ((?v_0 ((_ zero_extend 24) T1_1457))) (and true (= ?v_0 (_ bv35 32)) (not (= ?v_0 (_ bv4294967295 32))))))
(assert (= z (f T1_31159 T1_1457)))
(check-sat)
(e

Exception ignored in: <function Solver.__del__ at 0x11ecf7910>
Traceback (most recent call last):
  File "/Users/dara/.local/lib/python3.10/site-packages/z3/z3.py", line 6881, in __del__
    Z3_solver_dec_ref(self.ctx.ref(), self.solver)
  File "/Users/dara/.local/lib/python3.10/site-packages/z3/z3core.py", line 3881, in Z3_solver_dec_ref
    _elems.Check(a0)
  File "/Users/dara/.local/lib/python3.10/site-packages/z3/z3core.py", line 1459, in Check
    raise self.Exception(self.get_error_message(ctx, err))
z3.z3types.Z3Exception: b'(error "line 1 column 1: unexpected character")\n(error "line 1 column 2: unexpected character")\n(error "line 1 column 3: unexpected character")\n'


Fusion Result:
```
(set-info :smt-lib-version 2.6)
(set-logic QF_BV)
(set-info :source |
 Patrice Godefroid, SAGE (systematic dynamic test generation)
 For more information: http://research.microsoft.com/en-us/um/people/pg/public_psfiles/ndss2008.pdf
|)
(set-info :category "industrial")
(set-info :status sat)
(declare-fun T2_10455 () (_ BitVec 16))
(declare-fun T1_10455 () (_ BitVec 8))
(declare-fun T1_10456 () (_ BitVec 8))
(declare-fun T1_123000 () (_ BitVec 8))
(declare-fun z () (_ BitVec 8))
(define-fun f ((x (_ BitVec 8)) (y (_ BitVec 8))) (_ BitVec 8) (bvadd x y))
(assert (let ((?v_1 ((_ sign_extend 16) (bvashr T2_10455 ((_ zero_extend 8) (_ bv11 8))))) (?v_0 ((_ zero_extend 24) (_ bv60 8)))) (and true (= T2_10455 (bvor (bvshl ((_ zero_extend 8) T1_10456) (_ bv8 16)) ((_ zero_extend 8) T1_10455))) (bvslt (bvadd (bvmul (bvmul (bvmul ?v_1 ?v_0) ?v_0) (_ bv1000 32)) (_ bv2817447936 32)) (_ bv0 32)) (bvule ?v_1 (_ bv23 32)))))
(assert (and true (not (= (bvsub z T1_10455) (_ bv102 8))

In [None]:
from validate_SMT import validate_and_solve_smt
formula1 = """

"""


formula2 = """

"""

response = get_new_chain().invoke({
    "formula1": formula1,
    "formula2": formula2
})

print(response.content + '\n')
print(validate_and_solve_smt(response.content))