get evolution

In [None]:
import os
import json
from datetime import datetime

time_name = 'time'
def sort_nodes(nodes):
    available_nodes = []
    not_available_nodes = []

    for node in nodes:
        if node[time_name] != "Not available":
            available_nodes.append(node)
        else:
            not_available_nodes.append(node)

    # Sort By Published At
    available_nodes.sort(key=lambda x: datetime.fromisoformat(x[time_name].replace("Z", "+00:00")))
    
    # Sort By First Letter Of Name
    not_available_nodes.sort(key=lambda x: x['name'].lower())
    sorted_nodes = available_nodes + not_available_nodes

    return sorted_nodes

def process_json_files_in_folder(folder_path):
    json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]
    
    for json_file in json_files:
        with open(os.path.join(folder_path, json_file), 'r', encoding='utf-8') as f:
            data = json.load(f)

        if 'nodes' in data:
            sorted_nodes = sort_nodes(data['nodes'])
            data['nodes'] = sorted_nodes

        with open(os.path.join(folder_path, json_file), 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=4)

        print(f"Processed and sorted nodes in {json_file}")

def process_multiple_folders(folder_paths):
    for folder_path in folder_paths:
        if os.path.exists(folder_path):
            process_json_files_in_folder(folder_path)
        else:
            print(f"Folder {folder_path} does not exist.")

folder_paths = [
''
]

process_multiple_folders(folder_paths)


In [None]:
import os
import json
import pandas as pd
import math

def replace_nan_with_none(data):
    if isinstance(data, dict):
        return {key: replace_nan_with_none(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [replace_nan_with_none(item) for item in data]
    elif data is None or (isinstance(data, float) and math.isnan(data)):
        return None
    else:
        return data
    
#Initialize Global Statistical Variables
total_name_change = 0
total_version_change = 0
total_description_change = 0
total_dependencies_change = 0
total_code_change = 0
total_change_num = 0

def compare_nodes(prev_node, curr_node):
    global total_name_change, total_version_change, total_description_change, total_dependencies_change, total_code_change, total_change_num

    # Ensure That All Values Have Been Processed As None Instead Of Na N Or Null
    prev_node = replace_nan_with_none(prev_node)
    curr_node = replace_nan_with_none(curr_node)

    if prev_node['name'] != curr_node['name']:
        total_name_change += 1
    if (prev_node['name'] == curr_node['name']) and (prev_node.get('version', 'none') != curr_node.get('version', 'none')):
        total_version_change += 1
    if (prev_node.get('SHA-256', 'none') != 'none') and (curr_node.get('SHA-256', 'none') != 'none') and (prev_node['description'] != curr_node['description']):
        total_description_change += 1
    if (prev_node.get('SHA-256', 'none') != 'none') and (curr_node.get('SHA-256', 'none') != 'none') and (prev_node.get('SHA-256', 'none') != curr_node.get('SHA-256', 'none')):
        total_code_change += 1
    if (prev_node.get('SHA-256', 'none') != 'none') and (curr_node.get('SHA-256', 'none') != 'none') and (prev_node['dependencies'] != curr_node['dependencies']):
        total_dependencies_change += 1

def process_json_files_in_folder(folder_path):
    global total_name_change, total_version_change, total_description_change, total_dependencies_change, total_code_change
    
    json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]
    
    for json_file in json_files:
        with open(os.path.join(folder_path, json_file), 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        nodes = data.get('nodes', [])
        
        for i in range(1, len(nodes)):
            prev_node = nodes[i - 1]
            curr_node = nodes[i]
            compare_nodes(prev_node, curr_node)

def process_multiple_folders(folder_paths):
    for folder_path in folder_paths:
        process_json_files_in_folder(folder_path)
    
    total_change_num = total_name_change+total_version_change
    # Prepare the final results for Excel
    final_results = {
        "CN": total_name_change / total_change_num,
        "CV": total_version_change / total_change_num,
        "CD": total_description_change / total_change_num,
        "CDep": total_dependencies_change / total_change_num,
        "CC": total_code_change / total_change_num
    }

    # Convert to DataFrame
    df = pd.DataFrame(list(final_results.items()), columns=['Change Type', 'Value'])
    
    output_file = ''
    df.to_excel(output_file, index=False)
    
    print(f"Final aggregated results saved to {output_file}")
    print("Final aggregated results across all folders:", final_results)

# 调用函数处理多个文件夹
folder_paths = [

]

process_multiple_folders(folder_paths)
