In [2]:
# overlap 
import json
import os
from collections import defaultdict

def load_json_files(file_paths):
    data = []
    for file_path in file_paths:
        with open(file_path, 'r', encoding='utf-8') as file:
            try:
                data.append(json.load(file))
            except json.JSONDecodeError:
                print(f"Json Parsing Error: File {file_path} not a valid json format")
    return data

def count_source_pairs(json_data):
    nodes_dict = {}
    pair_counts = defaultdict(int)

    for node in json_data.get('nodes', []):
        nodes_dict[node['id']] = node['Source']

    for link in json_data.get('links', []):
        source_id = link['source']
        target_id = link['target']
        source_source = nodes_dict.get(source_id)
        target_source = nodes_dict.get(target_id)

        if source_source and target_source:
            # Sort Symmetrical Source Target Into Tuples To Make Them Unique
            pair = tuple(sorted([source_source, target_source]))
            pair_counts[pair] += 1

    return pair_counts

def main(file_paths):
    total_pair_counts = defaultdict(int)

    for file_path in file_paths:
        json_data = load_json_files([file_path])[0]
        pair_counts = count_source_pairs(json_data)
        
        print(f"\n{os.path.basename(file_path)}  source pairs Num.:")
        for pair, count in pair_counts.items():
            print(f"{pair}: {count}")
            total_pair_counts[pair] += count

    print("\nTotal Count Of All Files")
    for pair, count in total_pair_counts.items():
        print(f"{pair}: {count}")

if __name__ == "__main__":
    # KG path
    file_paths = [
        "KG/Duplicated_Edge/duplicate/npm_graph_name.json",
        "KG/Duplicated_Edge/duplicate/pypi_graph_name.json",
        "KG/Duplicated_Edge/duplicate/ruby_graph_name.json"
    ]
    main(file_paths)


npm_graph_name.json  source pairs Num.:
('Backstabber-Knife', 'Maloss'): 115
('Backstabber-Knife', 'Phylum'): 48
('Backstabber-Knife', 'Socket'): 1
('Backstabber-Knife', 'Blogs'): 36
('Blogs', 'Maloss'): 6
('Backstabber-Knife', 'Snyk.io'): 1
('Maloss', 'Snyk.io'): 1
('Backstabber-Knife', 'Tianwen'): 1
('Maloss', 'Tianwen'): 61
('Maloss', 'Socket'): 3
('GitHub Advisory', 'Maloss'): 6
('Blogs', 'GitHub Advisory'): 1
('Phylum', 'Snyk.io'): 9
('Phylum', 'Tianwen'): 269
('Socket', 'Tianwen'): 2

pypi_graph_name.json  source pairs Num.:
('Backstabber-Knife', 'Mal-PyPI Dataset'): 2897
('Backstabber-Knife', 'Maloss'): 216
('Mal-PyPI Dataset', 'Maloss'): 201
('Backstabber-Knife', 'Phylum'): 918
('Mal-PyPI Dataset', 'Phylum'): 918
('Backstabber-Knife', 'Tianwen'): 35
('Mal-PyPI Dataset', 'Tianwen'): 32
('Maloss', 'Tianwen'): 8
('Backstabber-Knife', 'DataDog'): 7
('DataDog', 'Mal-PyPI Dataset'): 7
('DataDog', 'Phylum'): 15
('Backstabber-Knife', 'Snyk.io'): 2
('Snyk.io', 'Tianwen'): 106
('Phylum'

In [None]:
# #  Result 

# ('Backstabber-Knife', 'Maloss'): 115
# ('Backstabber-Knife', 'Phylum'): 48
# ('Backstabber-Knife', 'Socket'): 1
# ('Backstabber-Knife', 'Blogs'): 36
# ('Blogs', 'Maloss'): 6
# ('Backstabber-Knife', 'Snyk.io'): 1
# ('Maloss', 'Snyk.io'): 1
# ('Backstabber-Knife', 'Tianwen'): 1
# ('Maloss', 'Tianwen'): 61
# ('Maloss', 'Socket'): 3
# ('GitHub Advisory', 'Maloss'): 6
# ('Blogs', 'GitHub Advisory'): 1
# ('Phylum', 'Snyk.io'): 9
# ('Phylum', 'Tianwen'): 269
# ('Socket', 'Tianwen'): 2

# pypi_graph_name.json  source pairs Num.:
# ('Backstabber-Knife', 'Mal-PyPI Dataset'): 2897
# ('Backstabber-Knife', 'Maloss'): 216
# ('Mal-PyPI Dataset', 'Maloss'): 201
# ('Backstabber-Knife', 'Phylum'): 918
# ('Mal-PyPI Dataset', 'Phylum'): 918
# ('Backstabber-Knife', 'Tianwen'): 35
# ('Mal-PyPI Dataset', 'Tianwen'): 32
# ('Maloss', 'Tianwen'): 8
# ('Backstabber-Knife', 'DataDog'): 7
# ('DataDog', 'Mal-PyPI Dataset'): 7
# ('DataDog', 'Phylum'): 15
# ('Backstabber-Knife', 'Snyk.io'): 2
# ('Snyk.io', 'Tianwen'): 106
# ('Phylum', 'Snyk.io'): 8
# ('Phylum', 'Tianwen'): 3
# ('Blogs', 'DataDog'): 1

# ruby_graph_name.json  source pairs Num.:
# ('Backstabber-Knife', 'Maloss'): 37

# Total Count Of All Files
# ('Backstabber-Knife', 'Maloss'): 368
# ('Backstabber-Knife', 'Phylum'): 966
# ('Backstabber-Knife', 'Socket'): 1
# ('Backstabber-Knife', 'Blogs'): 36
# ('Blogs', 'Maloss'): 6
# ('Backstabber-Knife', 'Snyk.io'): 3
# ('Maloss', 'Snyk.io'): 1
# ('Backstabber-Knife', 'Tianwen'): 36
# ('Maloss', 'Tianwen'): 69
# ('Maloss', 'Socket'): 3
# ('GitHub Advisory', 'Maloss'): 6
# ('Blogs', 'GitHub Advisory'): 1
# ('Phylum', 'Snyk.io'): 17
# ('Phylum', 'Tianwen'): 272
# ('Socket', 'Tianwen'): 2
# ('Backstabber-Knife', 'Mal-PyPI Dataset'): 2897
# ('Mal-PyPI Dataset', 'Maloss'): 201
# ('Mal-PyPI Dataset', 'Phylum'): 918
# ('Mal-PyPI Dataset', 'Tianwen'): 32
# ('Backstabber-Knife', 'DataDog'): 7
# ('DataDog', 'Mal-PyPI Dataset'): 7
# ('DataDog', 'Phylum'): 15
# ('Snyk.io', 'Tianwen'): 106
# ('Blogs', 'DataDog'): 1 
