In [2]:
import pandas as pd
import os
import re

In [3]:
# Read the problems file
problems = pd.read_json('code4bench_no_loops.json')
loop_problems = pd.read_json('code4bench_loops.json')

In [4]:
# For each of the problems make a folder with the index
for i, problem in problems.iterrows():
    # define the range of problems to extract here
    if i == 976:
        # Get the context of the problem
        context = problem['context']
        
        # Get the path, start with the current directory and add the folder name
        path = os.path.join(os.getcwd(), "..", "tmp", f"{i}")

        # Create the folder if it does not exist
        if not os.path.exists(path):
            os.makedirs(path)
        
        # Write the context to a file
        with open(os.path.join(path, 'context.txt'), 'w', encoding="utf-8") as f:
            f.write(context)

In [None]:
index = 976
print(problems.loc[index])
print(problems.loc[index]['sourceCode'])
problems.loc[index]['sourceCode'].replace("\r", " ").replace("\n", " ").replace("\t", " ")

id                                                         1575
sourceCode    #include<stdio.h>\nint main()\n{\n\tint n;\n\t...
fullname                                                   199A
name                                                          A
countline                                                     8
context       A. Hexadecimal's theorem time limit per test 2...
Name: 976, dtype: object
#include<stdio.h>
int main()
{
	int n;
	scanf("%d",&n);
	printf("0 0 %d",n);
	return 0;
}


IndexError: single positional indexer is out-of-bounds

In [11]:
# Select a random problem from the loop problems
problem = loop_problems.sample()

# Get the index of the problem
i = problem.index[0]

# If the problem does not have a loop then generate a new one. Do this in a loop
while not problem['sourceCode'].str.contains('for|while|do').any():
    problem = loop_problems.sample()

# Get the context of the problem
context = problem['context'].values[0]

# Get the path, start with the current directory and add the folder name
path = os.path.join(os.getcwd(), "..", "loop_problems", f"{i}")

# Create the folder if it does not exist
if not os.path.exists(path):
    os.makedirs(path)

# Write the context to a file
with open(os.path.join(path, 'context.txt'), 'w', encoding="utf-8") as f:
    f.write(context)
    
# Also write the context commented to the source code
with open(os.path.join(path, 'solution.c'), 'w', encoding="utf-8") as f:
    f.write(f"/* {context} */\n\n")
    f.write(problem['sourceCode'].values[0].replace("\r", " ").replace("\t", " "))
    

In [6]:
# Get problem 690A1 from filtering the problems
problem = problems[problems['fullname'] == '690A1']
print(problem['sourceCode'].values[0].replace("\r", " ").replace("\t", " "))

#include<stdio.h>  
#include<conio.h>  
  
int main()  
{  
    long long n;  
    scanf("%I64d", &n);  
    long long result;  
    result = n / 2;  
    if(n%2 == 0)  
    {  
        printf("%I64d", result);  
    }  
    else  
        printf("%I64d", result + 1);  
}


In [28]:
# Get the problem 581A
problem = problems[problems['fullname'] == '581A']

print(problem['sourceCode'].values[0])

# Function to transform the source code to a function instead. This is done by:
# 1. remove any imports
# 2. for each input variable (scanf) we introduce a function parameter
# 3. for each output variable (printf) we introduce a output parameter (a pointer)
# 4. Create a function name
# 5. return void

def transform_source_code(source_code):
    # Step 1: Remove any imports
    lines = source_code.split('\n')
    
    # Remove any lines that start with #include
    lines_no_include = [line for line in lines if not re.match(r'^\s*#include', line)]
    code = '\n'.join(lines_no_include)

    # Initialize lists to store input and output variables
    input_vars = []
    output_vars = []

    # Patterns to identify scanf and printf statements
    scanf_pattern = r'scanf\s*\(.*?\);'
    printf_pattern = r'printf\s*\(.*?\);'
    
    # Find all scanf statements
    scanf_statements = re.findall(scanf_pattern, code, flags=re.DOTALL)
    
    # Extract all variables
    for scanf_statement in scanf_statements:
        vars_in_scanf = re.findall(r'&\s*(\w+)', scanf_statement)
        
        # Find the data type of the variable
        for var in vars_in_scanf:
            data_type = re.search(r'int|float|double|char', code).group()
            input_vars.append((data_type, var))
        
    # Create the parameters
    parameters = ', '.join([f'{data_type} {var}' for data_type, var in input_vars])
            
    # Remove the scanf statements
    code = re.sub(scanf_pattern, '', code, flags=re.DOTALL)
    
    # Replace the int main() with void transformed_function(parameters)
    code  = re.sub(r'int\s+main\s*\(\s*\)', f"void transformed_function({parameters})", code)
    
    



    # code_without_scanf = re.sub(scanf_pattern, process_scanf, code_without_imports, flags=re.DOTALL)

    # # Step 3: Process printf statements to extract output variables
    # def process_printf(match):
    #     nonlocal output_vars
    #     printf_str = match.group()
    #     # Extract variable names in printf
    #     vars_in_printf = re.findall(r'[^%]%\w\s*,\s*(\w+)', printf_str)
    #     output_vars.extend(vars_in_printf)
    #     # Remove the printf statement from code
    #     return ''

    # code_without_printf = re.sub(printf_pattern, process_printf, code_without_scanf, flags=re.DOTALL)

    # # Step 4: Create a function name
    # function_name = 'transformed_function'

    # # Prepare function parameters
    # input_params = ', '.join(f'int {var}' for var in input_vars)
    # output_params = ', '.join(f'int *{var}' for var in output_vars)
    # all_params = ', '.join(filter(None, [input_params, output_params]))

    # # Construct the function definition
    # function_def = f'void {function_name}({all_params}) {{\n'

    # # Prepare output assignments
    # output_assignments = ''
    # for var in output_vars:
    #     output_assignments += f'    *{var} = {var};\n'

    # # Combine the transformed code
    # transformed_code = function_def + code_without_printf + '\n' + output_assignments + '}\n'

    # return transformed_code.strip()
    return code


print(transform_source_code(problem['sourceCode'].values[0]))

#include<stdio.h>
int main()
{
    int a,b,d;
    scanf("%d %d",&a,&b);
    if(a>b)
        printf("%d",b);
    else
        printf("%d",a);
    d=abs(a-b);

    printf(" %d\n",d/2);
    return 0;

}
void transformed_function(int a, int b)
{
    int a,b,d;
    
    if(a>b)
        printf("%d",b);
    else
        printf("%d",a);
    d=abs(a-b);

    printf(" %d\n",d/2);
    return 0;

}
