In [1]:
import subprocess
import re
from math import isclose

def is_float(value):
    # 주어진 값이 부동소수점 수인지 확인
    if isinstance(value, bytes):  # value가 bytes 객체인지 확인
        value = value.decode('utf-8')  # bytes를 문자열로 디코딩
    return bool(re.match(r'^-?\d+(\.\d+)?$', value))

def compare_outputs(actual, expected, partial=False):
    if partial:
        expected = expected[:len(actual)]
    if len(actual) > len(expected):
        return False  # 실제 출력이 예상 출력보다 길면, 일치하지 않습니다.
    for output, gold in zip(actual, expected):
        if is_float(output) and is_float(gold):
            if not isclose(float(output), float(gold), rel_tol=1e-9):
                return False
        elif output != gold:
            return False
    return True

def run_test_cases(file, input_data_list, expected_output_list):
    Test_Failure_List = []
    # 테스트 케이스를 실행하고, 정확도를 계산
    correct_tests = 0

    for index, cpp_input in enumerate(input_data_list):
        partial = False
        try:
            process = subprocess.run([f'./source_compile_folder/{file[:-4]}'], input=cpp_input, text=True, capture_output=True, timeout=10)
            actual_output = process.stdout.strip().split()
        except subprocess.TimeoutExpired as e:
            actual_output = e.stdout.strip().split() if e.stdout else []
            partial = True
        if compare_outputs(actual_output, expected_output_list[index], partial=partial):
            correct_tests += 1
        else:
            expected_output = ' '.join(expected_output_list[index])
            actual_output = [output.decode('utf-8') if isinstance(output, bytes) else output for output in actual_output]
            actual_output_joined = ' '.join(actual_output)
            with open(f'./source_test_case_error_log/{file[:-4]}.txt', 'a') as err_log_file:
                err_log_file.write(f"Test Case {index + 1} Failed: {expected_output}||| {actual_output_joined}'")
            Test_Failure_List.append(index)

    test_case_length = len(input_data_list)
    logical_error_rate = 1 - correct_tests / test_case_length
    print(f'Logical error rate: {logical_error_rate}')

    return [logical_error_rate] + Test_Failure_List   

In [2]:
import os
import json
import subprocess
from tqdm import tqdm

path = './CodeT5_LineNumber_cpp_source/'
file_list = os.listdir(path)
file_list.sort()
compile_error = []
output_logical_error_rate = {}
test_case_error = []
time_out_error = []

for cpp_file in tqdm(file_list):
    filepath = path + '/' + cpp_file
    test_case_txt_number = cpp_file.split('Problem_')[1].split('_')[0]
    test_cases_file_path = './cpp_test_case/' + test_case_txt_number + '.txt'
    
    compile_error_log_file = "./source_test_result/compile_error_log.txt"

    compile_result = subprocess.run(['g++', '-o', f'./source_compile_folder/{cpp_file[:-4]}', filepath], capture_output=True)
    if compile_result.returncode != 0:
        compile_error.append(f'{cpp_file[:-4]}')
        with open(compile_error_log_file, 'a') as compile_error_file:
            compile_error_file.write(f'{cpp_file[:-4]}\n')
            compile_error_file.write(f'{compile_result.stderr}\n\n')
        continue
        
    with open(test_cases_file_path, 'r') as test_file:
        test_cases = test_file.read().split(', ')  # 각 테스트 케이스 분리
    
    if len(test_cases) < 2:
        print(f'Test Case Error - {test_case_txt_number}.txt')
        test_case_error.append(test_case_txt_number)
        continue
        
    test_data_list = []
    output_list = []

    for index, test_case in enumerate(test_cases):
        if not test_case.strip():
            continue
        input_part, output_part = test_case.split('\noutput: ')
        input_data = input_part.split('input: ')[1]
        expected_output = output_part.strip()
        test_data_list.append(input_data)
        output_list.append(expected_output)

        if index == len(test_cases) - 1:
            output_list[index] = output_list[index].split('\n')[0]

    test_input_list = []
    for in_data in test_data_list:
        in_data = in_data.replace("\\n", "\n")
        test_input_list.append(in_data[1:-1].strip())

    test_output_list = []
    for out_data in output_list:
        out_data = out_data.replace("\\n", '\n')
        out_data = out_data.replace("\"", "")
        out_data = out_data.split()
        test_output_list.append(out_data)

    error_line_number = []
    test_case_length = len(test_output_list)
    correct_test = len(test_output_list)
    test_result = run_test_cases(cpp_file, test_input_list, test_output_list)
    output_logical_error_rate[cpp_file[:-4]] = test_result
    print(f'{cpp_file[:-4]} Error rate = {test_result[0]}')

pretty_print = json.dumps(output_logical_error_rate)
print(pretty_print)

print(f'Compile Error {len(compile_error)}')
print(f'Test case error {len(test_case_error)}')

100%|█████████▉| 859/861 [2:37:02<04:27, 133.96s/it]

Logical error rate: 0.275
Problem_9_source_22 Error rate = 0.275


100%|█████████▉| 860/861 [2:37:04<01:34, 94.31s/it] 

Logical error rate: 0.125
Problem_9_source_23 Error rate = 0.125


100%|██████████| 861/861 [2:37:06<00:00, 10.95s/it]

Logical error rate: 0.125
Problem_9_source_24 Error rate = 0.125
{"Problem_0_source_0": [0.17000000000000004, 1, 3, 5, 6, 7, 8, 9, 12, 13, 16, 17, 32, 34, 47, 85, 86, 94, 102, 103, 108, 109, 113, 119, 122, 143, 144, 145, 146, 158, 159, 167, 169, 170, 197], "Problem_0_source_1": [0.010000000000000009, 11, 100], "Problem_0_source_2": [1.0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 




In [10]:
import json

with open('./source_test_result/source_logical_error_rate.json', 'w') as f:
    json.dump(output_logical_error_rate, f, indent=4)

In [4]:
import pickle
with open("./source_test_result/timeout_error.pkl", "wb") as f1:
    pickle.dump(time_out_error, f1)

In [5]:
with open("./source_test_result/compile_error.pkl", "wb") as f2:
    pickle.dump(compile_error, f2)

In [6]:
with open("./source_test_result/timeout_error.pkl", "rb") as f3:
    timeout_list = pickle.load(f3)
print(timeout_list)

[]


In [7]:
with open("./source_test_result/compile_error.pkl", "rb") as f4:
    error_list = pickle.load(f4)
print(error_list)

['Problem_133_source_636', 'Problem_133_source_637', 'Problem_156_source_801', 'Problem_72_source_375', 'Problem_72_source_376', 'Problem_76_source_399']


In [11]:
with open('./source_test_result/source_logical_error_rate.json', 'r') as f:
    data = json.load(f)

In [12]:
perfect_cpp = []
error_cpp = []
unstable_cpp = []
unstable_ratio = []

for key, value in data.items():
    if value[0] == 0.0:
        perfect_cpp.append(key)
    elif value[0] == 1.0:
        error_cpp.append(key)
    else:
        unstable_cpp.append(key)
        unstable_ratio.append(value[0])

print(f'perfect_code {len(perfect_cpp)}')
print(f'Error_code {len(error_cpp)}')
print(f'Unstable_code {len(unstable_cpp)}')
print(f'compile_error {len(compile_error)}')
print(f'runtime_error {len(timeout_list)}')
print(f'test_case_error {len(test_case_error)}')
print(f'total sum {len(perfect_cpp) + len(error_cpp) + len(unstable_cpp) + len(compile_error) + len(timeout_list) + len(test_case_error)}')

perfect_code 210
Error_code 110
Unstable_code 528
compile_error 6
runtime_error 0
test_case_error 7
total sum 861


In [14]:
print(test_case_error)

['132', '132', '132', '132', '132', '132', '132']
