In [4]:
import os
import json

def collect_queries(folder_name):
    """
    Collects all queries from JSON files in the specified folder and its subfolders.
    Returns a list of queries with updated 'query_id's.
    """
    queries = []
    query_id_counter = 1

    # Walk through the folder
    for root, dirs, files in os.walk(folder_name):
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)
                # Read the JSON data from the file
                with open(file_path, 'r') as f:
                    try:
                        data = json.load(f)
                    except json.JSONDecodeError as e:
                        print(f"Error decoding JSON from file {file_path}: {e}")
                        continue

                    if isinstance(data, list):
                        for query in data:
                            if isinstance(query, dict):
                                # Update 'query_id'
                                query['query_id'] = query_id_counter
                                query_id_counter += 1
                                queries.append(query)
                            else:
                                print(f"Expected a dict in file {file_path}, got {type(query)}")
                    elif isinstance(data, dict):
                        # Single query in dict
                        data['query_id'] = query_id_counter
                        query_id_counter += 1
                        queries.append(data)
                    else:
                        print(f"Unexpected data type in file {file_path}: {type(data)}")

    return queries

def main():
    folder_types = ["Original", "No-tools", "Replaceable", "Non-replaceable", "Underspecified"]

    for folder_name in folder_types:
        if os.path.exists(folder_name):
            print(f"Processing folder: {folder_name}")
            queries = collect_queries(folder_name)

            # Write the concatenated queries to all_{folder_name}_queries.json
            output_file = os.path.join(folder_name, f"all_{folder_name}_queries.json")
            with open(output_file, 'w') as f:
                json.dump(queries, f, indent=4)
            print(f"Written {len(queries)} queries to {output_file}")
        else:
            print(f"Folder {folder_name} does not exist.")

if __name__ == "__main__":
    main()


Processing folder: Underspecified
Written 555 queries to Underspecified\all_Underspecified_queries.json
