In [1]:
import json
import os
import time
import subprocess
from openai import OpenAI

In [2]:
# Get dataset directory
directory = os.getcwd()
dataset_directory = os.path.join(directory, 'problems_dataset')

In [3]:
#Function to get all the filenames of dataset problems
def get_problem_filenames():
    filenames = []
    for filename in os.listdir(dataset_directory):
        if filename.endswith('.json'):
            filenames.append(filename)
    return filenames

In [4]:
# Function to load a problem file from dataset directory
def load_problem(problem_filename):
    path = os.path.join(dataset_directory, problem_filename)
    with open(path, 'r') as json_file:
        problem = json.load(json_file)
    return problem

In [5]:
# Custom exception classes with error messages
class CompilationError():
    def __init__(self, message):
        self.message = message

class CompilationSuccess():
    pass

class RuntimeError():
    def __init__(self, message):
        self.message = message

class RunSuccess():
    def __init__(self, output):
        self.output = output

class TestFailed():
    def __init__(self, message, input, expected_output, output, number_of_tests, tests_passed):
        self.message = message
        self.input = input
        self.expected_output = expected_output
        self.output = output
        self.number_of_tests = number_of_tests
        self.tests_passed = tests_passed

class TestSuccess():
    def __init__(self, message, number_of_tests):
        self.message = message
        self.number_of_tests = number_of_tests

In [6]:
# Function to compile the C++ program
def compile_cpp(source_file, exec_file):
    compile_command = ["g++", source_file, "-o", exec_file]
    result = subprocess.run(compile_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if result.returncode != 0:
        return CompilationError(f"Compilation failed: {result.stderr.decode('utf-8')}")
    return CompilationSuccess()

In [7]:
# Function to run the compiled program with the provided input and capture the output
def run_program(executable, input_data):
    process = subprocess.Popen([f"./{executable}"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate(input=input_data.encode())
    if process.returncode != 0:
        return RuntimeError(f"Runtime error: {error.decode('utf-8')}")
    return RunSuccess(output.decode('utf-8'))

In [8]:
# Function to run tests and compare output
def run_tests(test_cases, executable):
    f = open('tmp_output.txt', 'w')
    for i, test in enumerate(test_cases):
        input_data = test["input"]
        expected_output = test["output"]
        output = run_program(executable, input_data)
        if output.__class__.__name__ != "RunSuccess":
            return output
        
        actual_output = output.output

        # Compare outputs
        if actual_output.strip() == expected_output.strip():
            f.write(f"Test case {i+1}: PASSED\n")
        else:
            f.write(f"Test case {i+1}: FAILED\n")
            f.write(f"Expected output:\n{expected_output}\n")
            f.write(f"Actual output:\n{actual_output}\n")
            return TestFailed(f"Test case {i+1} failed", input_data, expected_output, actual_output, len(test_cases), i+1)
    f.close()
    return TestSuccess("All test cases passed", len(test_cases)) 

In [9]:
# Function to parse test cases from the problem JSON
def parse_test_cases(problem):
    test_cases = []
    for test_case in problem["public_test"]:
        test_cases.append({
            "input": test_case["input"],
            "output": test_case["output"]
        })
    for test_case in problem["generated_test"]:
        test_cases.append({
            "input": test_case["input"],
            "output": test_case["output"]
        })
    return test_cases

In [10]:
# Function to test solution
def test_solution(problem, solution_code):
    # Write the solution to a file
    source_file = "solution.cpp"
    with open(source_file, "w") as file:
        file.write(solution_code)
    
    # Compile the solution
    executable = "solution"
    r = compile_cpp(source_file, executable)
    if r.__class__.__name__ != "CompilationSuccess":
        return r
    
    # Parse test cases
    test_cases = parse_test_cases(problem)
    
    # Run the tests
    r = run_tests(test_cases, executable)

    # Clean up
    os.remove(source_file)
    os.remove(executable)

    return r
        

In [11]:
# # Mock solution
# solution_code = "#include<iostream> \nusing namespace std; \nint main() { \nint a, b; \ncin >> a >> b; \ncout << a + b << endl; \nreturn 0; \n} \n"
# # Load addition.json problem
# with open('addition.json', 'r') as json_file:
#     problem = json.load(json_file)

# result = test_solution(problem, solution_code)

# if result.__class__.__name__ == "CompilationError":
#     print(result.message)
# elif result.__class__.__name__ == "RuntimeError":
#     print(result.message)
# elif result.__class__.__name__ == "TestFailed":
#     print(result.message)
#     print(f"Input: {result.input}")
#     print(f"Expected output: {result.expected_output}")
#     print(f"Actual output: {result.output}")
# else:
#     print(result.message)


In [12]:
client = OpenAI(api_key='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')

# Function to generate a solution using OpenAI's GPT-3.5
def generate_solution(problem, previous_code=None, previous_error=None, failed_input=None, failed_output=None, expected_output=None):
    title = problem['name']
    description = problem['description']
    cf_tags = problem['tags']
    dificulty = problem['difficulty']
    rating = problem['rating']
    public_test = problem['public_test']

    # Create a prompt based on the problem description
    prompt = f"You need to solve the following codeforces problem.\
                \nProblem: {title}\
                \nDescription: {description}\
                \nCodefoce tags: {cf_tags}\
                \nDifuclty: {dificulty}\
                \nRating: {rating}\
                \nExample of test cases:\
                \nInput: {public_test[0]['input']}\
                \nOutput: {public_test[0]['output']}\
                \nWrite a C++ solution to solve the problem."
    if (previous_code != None):
        prompt += f"\nModify and correct this code:"
        prompt += f"\nPrevious code: {previous_code}"
    if (previous_error != None):
        prompt += f"\nError for the provided code: {previous_error}"
    if (failed_input != None):
        prompt += f"\nYour code failed for the following input/output:"
        prompt += f"\nFailed input: {failed_input}"
        prompt += f"\nFailed output: {failed_output}"
        prompt += f"\nExpected output: {expected_output}"

    # Call OpenAI API to generate the solution
    response = client.chat.completions.create(model="gpt-3.5-turbo",
    messages=[
            {
                "role": "system", 
                "content": "You are a C++ programing contest participant. You are given a problem statement and you need to write a C++ program to solve it."
            },
            {
                "role": "system", 
                "content": "You SHOULD NOT use any external libraries or functions. You can only use the standard C++ library."
            },
            {
                "role": "system", 
                "content": "You SHOULD ONLY provide the solution code. Do not include any input/output code or function signature."
            },
            {
                "role": "system", 
                "content": "You MUST NOT provide any other comments or explanations. Only the code is required."
            },
            {"role": "user", "content": prompt}
    ])

    # Extract the solution from the API response
    solution_code = response.choices[0].message.content.strip()
    solution_code = solution_code.replace("```cpp", "")
    solution_code = solution_code.replace("```", "")
    return solution_code

In [15]:
# Solve each problem in the dataset
output_file = open("direct_prompting_and_corrections_out.txt", "w")
problem_filenames = get_problem_filenames()
problems_solved = 0
iter = 0
numeber_of_corrections = 5
for problem_filename in problem_filenames:
    iter += 1
    problem = load_problem(problem_filename)
    output_file.write(f"Solving problem: {problem['name']}...\n")
    
    # Generate a solution
    solution_code = generate_solution(problem)

    # Test the solution
    result = test_solution(problem, solution_code)

    correction = 0
    while (result.__class__.__name__ == "CompilationError" or result.__class__.__name__ == "RuntimeError") and correction < numeber_of_corrections:
        print(result.message)
        # Generate a solution
        solution_code = generate_solution(problem, solution_code, result.message)

        # Test the solution
        result = test_solution(problem, solution_code)

        correction += 1

    # Check result
    if result.__class__.__name__ == "CompilationError":
        print(result.message)
        output_file.write("Compilation error\n")
    elif result.__class__.__name__ == "RuntimeError":
        print(result.message)
        output_file.write("Runtime error\n")
    if result.__class__.__name__ == "TestFailed":
        print(result.message)
        output_file.write("Test passed: " + str(result.tests_passed) + "/" + str(result.number_of_tests) + "\n")
    elif result.__class__.__name__ == "TestSuccess":
        print(result.message)
        output_file.write("Test passed: " + str(result.number_of_tests) + "/" + str(result.number_of_tests) + "\n")
        problems_solved += 1
    output_file.write("\n")

    # Wait 20 seconds before solving the next problem
    time.sleep(1)

    if iter == 10:
        break


output_file.write(f"Problems solved: {problems_solved}/{len(problem_filenames)}")
    

Test case 2 failed
Compilation failed: solution.cpp: In function ‘int main()’:
solution.cpp:3:5: error: ‘cin’ was not declared in this scope
    3 |     cin >> n >> m;
      |     ^~~
solution.cpp:5:5: error: ‘vector’ was not declared in this scope
    5 |     vector<set<int>> boxes(n);
      |     ^~~~~~
solution.cpp:5:12: error: ‘set’ was not declared in this scope
    5 |     vector<set<int>> boxes(n);
      |            ^~~
solution.cpp:5:16: error: expected primary-expression before ‘int’
    5 |     vector<set<int>> boxes(n);
      |                ^~~
solution.cpp:6:12: error: expected primary-expression before ‘int’
    6 |     vector<int> presents_count(n);
      |            ^~~
solution.cpp:7:12: error: ‘pair’ was not declared in this scope
    7 |     vector<pair<int, int>> presents_kind;
      |            ^~~~
solution.cpp:7:17: error: expected primary-expression before ‘int’
    7 |     vector<pair<int, int>> presents_kind;
      |                 ^~~
solution.cpp:12:9: 

21