## demo for parsing

In [1]:
%load_ext autoreload
%autoreload 2
from tree_sitter_languages import get_parser
parser = get_parser("java")

with open("mbjp/MBJP_1.java", "r") as file:
    source_code = file.read()
print(source_code)

import utils
tree = parser.parse(bytes(source_code, "utf8"))
print(utils.format_code(tree.root_node.sexp()))

import java.io.*;
import java.lang.*;
import java.util.*;
import java.math.*;


class MinCost {
    /**
     * * Write a function to find the minimum cost path to reach (m, n) from (0, 0) for the given cost matrix cost[][] and a position (m, n) in cost[][].
     *
     * > minCost([[1, 2, 3], [4, 8, 2], [1, 5, 3]], 2, 2)
     * 8
     * > minCost([[2, 3, 4], [5, 9, 3], [2, 6, 4]], 2, 2)
     * 12
     * > minCost([[3, 4, 5], [6, 10, 4], [3, 7, 5]], 2, 2)
     * 16
     */
    public static int minCost(List<List<Integer>> cost, int m, int n) {
        int[][] T = new int[m+1][n+1];
        for(int i = 0 ; i < m+1; i++){
            for(int j = 0; j < n+1; j++){
                if(i == 0 && j == 0) T[i][j] = cost.get(i).get(j);
                else if(i == 0) T[i][j] = T[i][j-1] + cost.get(i).get(j);
                else if(j == 0) T[i][j] = T[i-1][j] + cost.get(i).get(j);
                else T[i][j] = Math.min(T[i-1][j-1], Math.min(T[i-1][j], T[i][j-1])) + cost.get(i).get(j);
         

## Parse all solutions

In [5]:
# get program model from a java file
def getProgram(file_path):
    with open(file_path, "r") as file:
        source_code = file.read()
        utils.source_code_line = source_code.split("\n")
    tree = parser.parse(bytes(source_code, "utf8"))
    return utils.visit_program(tree.root_node)

# print program as a java file
def printJava(program):
    return utils.addIndentation(program.toString())

# validate the parser and the java generation
def validateJava(file_path,isMBJP=True):
    try:
        program = getProgram(file_path)
        javaCode= printJava(program)
        if isMBJP: # validating a mbjp benchmark solution
            from mxeval.execution import check_correctness_java as check
            import json
            with open("mbjp.json", "r") as file:
                mbjp = json.load(file)
            mbjp_name=file_path.split("/")[-1].split(".")[0].replace("_","/") #MBJP/1
            for data in mbjp:
                if data['task_id']==mbjp_name:
                    result=check(data,javaCode,solution_complete=True)
                    if result['passed']:
                        return True
                    else:
                        print(f"\nError in parsing java file: {file_path}")
                        print(javaCode)
                        print(result['result'])
                        return False
            return False
        else:
            parser.parse(bytes(javaCode, "utf8"))
            return True
    except Exception as e:
        print(f"\nError in parsing java file: {file_path}")
        print(e)
        return False
    
print(validateJava("mbjp/MBJP_1.java"))


Error in parsing java file: mbjp/MBJP_1.java
No module named 'mxeval.execution'
False


In [6]:
import os
import subprocess
# Path to the solutions folder
folder_path = "mbjp"

# Get the list of Java code files in the folder
java_files = [file for file in os.listdir(folder_path) if file.endswith(".java")]

java_files.sort()
error_cnt=0
# Loop through each Java code file
for file_name in java_files[:0]:
    file_path = os.path.join(folder_path, file_name)
    
    # using the parser to validate the java file
    if not validateJava(file_path):
        error_cnt+=1
print(f"Total errors: {error_cnt} out of {len(java_files)} files.")

Total errors: 0 out of 862 files.


## Translating Program Model into Coq Proof

In [4]:
import shutil
import os
# coq proof test
def validateCoqProof(file_path):
    program=getProgram(file_path)
    coqProof=program.toCoq().toString()
    prefix="""From PLF Require Import Syntax.
Open Scope string_scope.

Example prog_well_typed : exists p, program_well_typed p.
Proof.
unfold program_well_typed.
eexists.
eexists."""
    suffix="""  Unshelve.
  all: apply STyVoid.
Defined.
Definition prog := the_exists_term (prog_well_typed).
Print prog."""
    coqProof=f"{prefix}\n{coqProof}\n{suffix}"
    with open("../coq_code/test.v", "w") as file:
        file.write(coqProof)
    res = subprocess.run(
                # coqc -Q ../coq_code PLF ../coq_code/test.v
                ["coqc", "-Q","../coq_code", "PLF", "../coq_code/test.v"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                timeout=60,
            )
    if res.returncode != 0:
        def copy_and_rename_test_v(file_path):
            # Get the directory path and file name
            directory = os.path.dirname(file_path)
            file_name = os.path.basename(file_path)
            
            # Create the new file name with the same prefix as the original file
            new_file_name = file_name.split(".")[0] + ".v"
            
            # Create the new file path
            new_file_path = os.path.join("../coq_code", new_file_name)
            
            # Copy the test.v file to the new file path
            shutil.copyfile("../coq_code/test.v", new_file_path)
            shutil.copyfile(file_path, os.path.join("../coq_code", file_name))
            
            return new_file_path

        copy_and_rename_test_v(file_path)
        
        print(f"\nError in parsing coq proof: {file_path}")
        print(res.stderr.decode("utf-8"))
        return False
    else:
        file_name = file_path.split("/")[-1].split(".")[0]
        with open(f"mbjp/{file_name}.v", "w") as file:
            file.write(coqProof)
        return True

validateOne=1
if validateOne:
    validateCoqProof("mbjp/MBJP_937.java")
else:   
    error_cnt=0
    # Loop through each Java code file
    for file_name in java_files[:]:
        file_path = os.path.join(folder_path, file_name)
        coq_file_path = os.path.join(folder_path, file_name.split(".")[0] + ".v")
        if os.path.exists(coq_file_path): #already validated
            continue
        # using the parser to validate the java file
        if not validateJava(file_path):
            error_cnt+=1
            continue

        # using coqc to validate the coq proof
        if not validateCoqProof(file_path):
            error_cnt+=1
    print(f"Total errors: {error_cnt} out of {len(java_files)} files.")


Error in parsing coq proof: mbjp/MBJP_937.java
File "../coq_code/test.v", line 1, characters 0-31:
Error: File /data_ssd/hzc/Proof2Prog/coq_code/Syntax.vo has bad version
number 81999 (expected 81500). It is corrupted or was compiled with another
version of Coq.




In [6]:
def remove_all_mbjp_files_in_coq_folder():
    folder_path = "../coq_code"
    # Get the list of Java code files in the folder
    java_files = [file for file in os.listdir(folder_path) if file.startswith("MBJP")]

    # Loop through each Java code file
    for file_name in java_files:
        file_path = os.path.join(folder_path, file_name)
        os.remove(file_path)
remove_all_mbjp_files_in_coq_folder()