In [5]:
import os
import json
import shutil

# Define the main directory containing the "Vanilla" and "Original" folders
main_directory = r"D:\Projects\ToolsForTheJobBenchmark\benchmark\queries\single_category\independent"
vanilla_folder = os.path.join(main_directory, "Vanilla")
original_folder = os.path.join(main_directory, "Original")

# Log file to store any issues encountered
error_log = os.path.join(main_directory, "error_log.txt")

# Iterate over all category folders in Vanilla
for category_folder in os.listdir(vanilla_folder):
    category_path = os.path.join(vanilla_folder, category_folder)

    # Ensure it's a folder (category)
    if os.path.isdir(category_path):
        # Create the same category folder in "Original"
        new_category_path = os.path.join(original_folder, category_folder)
        os.makedirs(new_category_path, exist_ok=True)

        # Iterate over all JSON files inside the category folder
        file_count = 1  # Start a custom file counter for each category folder
        for json_file in os.listdir(category_path):
            if json_file.endswith(".json"):
                json_file_path = os.path.join(category_path, json_file)

                try:
                    # Open and read the content of the JSON file with UTF-8 encoding
                    with open(json_file_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)

                    # Process each query in the file
                    for query in data:
                        # Ensure the correct category name is assigned or fixed in all APIs
                        for api in query['api_list']:
                            # Fix the category_name (if missing or incorrect)
                            if 'category_name' not in api or api['category_name'] != category_folder:
                                api['category_name'] = category_folder  # Correct category name based on folder

                        # Generate a new independent JSON file for each query
                        new_file_name = f"query_{file_count}.json"
                        new_file_path = os.path.join(new_category_path, new_file_name)

                        # Write the individual query to a new JSON file inside "Original"
                        with open(new_file_path, 'w', encoding='utf-8') as new_f:
                            json.dump([query], new_f, indent=4)

                        file_count += 1  # Increment the file count

                except json.JSONDecodeError as e:
                    # Log the file causing the issue and the error message
                    with open(error_log, 'a') as log_f:
                        log_f.write(f"JSON decode error in file {json_file_path}: {str(e)}\n")

                    print(f"Skipping file {json_file_path} due to JSON decode error.")

                except UnicodeDecodeError as e:
                    # Log the file causing the issue and the error message
                    with open(error_log, 'a') as log_f:
                        log_f.write(f"Unicode decode error in file {json_file_path}: {str(e)}\n")

                    print(f"Skipping file {json_file_path} due to Unicode decode error.")

print("Process completed with error handling!")

Process completed with error handling!


In [None]:
import os
import json

# Define the "Original" folder path
original_folder = r"D:\Projects\ToolsForTheJobBenchmark\benchmark\queries\single_category\independent\Original"

# Dictionary to store counts for each category
category_query_counts = {}

# Iterate over all category folders in Original
for category_folder in os.listdir(original_folder):
    category_path = os.path.join(original_folder, category_folder)

    # Ensure it's a folder (category)
    if os.path.isdir(category_path):
        # Initialize the count for this category
        query_count = 0

        # Iterate over all JSON files inside the category folder
        for json_file in os.listdir(category_path):
            if json_file.endswith(".json"):
                json_file_path = os.path.join(category_path, json_file)

                # Open and read the content of the JSON file
                try:
                    with open(json_file_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)

                    # Each file should contain exactly one query based on our restructuring
                    query_count += 1

                except Exception as e:
                    print(f"Error reading file {json_file_path}: {str(e)}")

        # Store the count in the dictionary
        category_query_counts[category_folder] = query_count

# Print out the counts for each category
for category, count in category_query_counts.items():
    print(f"Category: {category}, Number of Queries: {count}")
