# Process data format to make it compatible with OpenAI

In [2]:
import os
import json

## Convert api docs into json format

In [3]:
# Directory containing the .txt files
directory = "api_docs"

# Initialize an empty list to store the data
data = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        function_name = os.path.splitext(filename)[0]  # Get the function name (without .txt)
        with open(os.path.join(directory, filename), "r", encoding="utf-8") as f:
            api_doc = f.read().strip()  # Read the content of the file
        # Append the data in the desired format
        data.append({"function_name": function_name, "api_doc": api_doc})

# Write the data to a JSON file, each line as a separate JSON object
# with open("../our_proptest_data/api_docs.jsonl", "w", encoding="utf-8") as json_file:
#     for item in data:
#         json_file.write(json.dumps(item) + "\n")

## Convert json format into txt format

In [5]:
with open('../our_proptest_data/output/property_1_2.jsonl', 'r') as f:
    output_dir = '../our_proptest_data/properties'
    for line in f:
        data = json.loads(line.strip())
        function_name = data['function_name']
        properties = data['properties'][0]

        txt_file_name = function_name + '.txt'
        txt_file_path = os.path.join(output_dir, txt_file_name)

        # with open(txt_file_path, 'w') as txt_file:
        #     txt_file.write(properties)

## Write json file into .py 

In [4]:
with open('../our_proptest_data/output_jsonl/pbt_0_1.jsonl', 'r') as f:
    output_dir = '../our_proptest_data/proptest'
    for line in f:
        data = json.loads(line.strip())
        function_name = data['function_name']
        pbt = data['pbt']
        
        function_dir = os.path.join(output_dir, function_name)
        os.makedirs(function_dir, exist_ok=True)

        for i, pbt_data in enumerate(pbt):
            j = i+1
            if pbt_data.startswith('```python\n'):
                pbt_data = pbt_data[len('```python\n'):].strip()
            if pbt_data.endswith('```'):
                pbt_data = pbt_data[:-len('```')].strip()
            txt_file_name = f'pbt_{j}.py'
            txt_file_path = os.path.join(function_dir, txt_file_name)

            # with open(txt_file_path, 'w') as txt_file:
            #     txt_file.write(pbt_data)

## Write mutants in json format into .py file

In [4]:
with open('../our_proptest_data/output_jsonl/mutants/statistics_variance.jsonl', 'r') as f:
    output_dir = '../our_proptest_data/mutants'
    for line in f:
        data = json.loads(line.strip())
        function_name = data['function_name']
        mutants = data['mutants']
        
        function_dir = os.path.join(output_dir, function_name)
        os.makedirs(function_dir, exist_ok=True)

        for i, mutant_data in enumerate(mutants):
            j = i+1
            if mutant_data.startswith('```python\n'):
                mutant_data = mutant_data[len('```python\n'):].strip()
            if mutant_data.endswith('```'):
                mutant_data = mutant_data[:-len('```')].strip()
            txt_file_name = f'mutant_{j}.py'
            txt_file_path = os.path.join(function_dir, txt_file_name)

            with open(txt_file_path, 'w') as txt_file:
                txt_file.write(mutant_data)