In [2]:
import json

def read_json_file(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

def write_json_file(data, file_path):
    with open(file_path, 'w') as file:
        json.dump(data, file, indent=4)

def build_parent_map(categories):
    parent_map = {1: {"name": "Philosophy", "parent_ids": [], "primary_parent_id": None}}  # Include root category
    for category in categories:
        parent_map[category[1]] = {
            "name": category[0],
            "parent_ids": list(map(int, category[2].split(','))),
            "primary_parent_id": category[3]
        }
    return parent_map

def determine_category_level(current_id, parent_map):
    level = 1
    while current_id != 1:
        if parent_map[current_id]["primary_parent_id"] == 1:
            break
        current_id = parent_map[current_id]["primary_parent_id"]
        level += 1
    return level

def level_to_name(level):
    return {
        0: "Root",
        1: "Clusters",
        2: "Areas",
        3: "Subareas",
        4: "Topics",
        5: "Subtopics"
    }.get(level, "Unknown")

def update_categories_with_full_paths(categories):
    parent_map = build_parent_map(categories)
    updated_categories = {
        "1": {
        "title": "Philosophy",
        "level": "0 Root",
        "primary_parent": [],
        "path_primary_parents": [],
        "path_all_parents": []}
    }
    
    for category in categories:
        current_id = category[1]
        primary_path = []
        all_parents_path = []

        level = determine_category_level(current_id, parent_map)
        
        # Traverse through primary parents to root
        while current_id != 1:
            category_info = parent_map.get(current_id, None)
            if not category_info:
                break
            if category_info['primary_parent_id'] != 1:
                primary_path.append(str((category_info['primary_parent_id'], parent_map[category_info['primary_parent_id']]['name'])))
            current_id = category_info['primary_parent_id']

        # traverse through all parents to root
        current_id = category[1]
        for lvl in range(1, level+1):
            all_parents_path.append([str((id, parent_map[id]['name'])) for id in map(int, parent_map[current_id]['parent_ids']) if id in parent_map])
            current_id = parent_map[current_id]['primary_parent_id']

        # Reverse to maintain order from root to the category
        primary_path.reverse()
        all_parents_path.reverse()

        # Create the detailed path for all parents
        # for lvl in range(1, level+1):
        #     all_parents_path[f"level{lvl}"] = [(id, parent_map[id]['name']) for id in map(int, categories[lvl-1][2].split(',')) if id in parent_map]

        # get all parents found during traversal
        # put them in the correct level

        # Prepare the updated category entry as a dictionary
        updated_categories[category[1]] = {
            "title": category[0],
            "level": f"{level} {level_to_name(level)}",
            "primary_parent": str((category[3], parent_map[category[3]]['name'])),
            "path_primary_parents": primary_path,
            "path_all_parents": all_parents_path
        }
    
    return updated_categories

def main():
    input_file = 'input_taxonomy.json'
    output_file = 'philpaper_taxonomy_full_path.json'
    
    categories = read_json_file(input_file)
    updated_categories = update_categories_with_full_paths(categories)
    write_json_file(updated_categories, output_file)
    print(f'Updated categories have been written to {output_file}')

if __name__ == "__main__":
    main()


Updated categories have been written to philpaper_taxonomy_full_path.json
