In [None]:
import networkx as nx
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load unseed_G1 and unseed_G2 (assuming they are NetworkX Graphs)
unseed_G1 = nx.read_edgelist("unseed_G1.edgelist")
unseed_G2 = nx.read_edgelist("unseed_G2.edgelist")

# Function to assign features to nodes (modify as per your data)
def assign_features(graph):
    for node in graph.nodes:
        # Add features to nodes in the graph (modify this as per your node attributes)
        graph.nodes[node]['feature'] = np.random.rand(10)  # Random feature vector of length 10 as an example

# Assign features to nodes in both unseeded graphs
assign_features(unseed_G1)
assign_features(unseed_G2)

# Matching nodes in unseed_G1 to unseed_G2 based on cosine similarity of features
same_pairs = []
for n1 in unseed_G1.nodes:
    if 'feature' in unseed_G1.nodes[n1]:  # Check if node has 'feature' attribute
        feature1 = unseed_G1.nodes[n1]['feature']
        similarities = [
            cosine_similarity([feature1], [unseed_G2.nodes[n2]['feature']])[0][0]
            if 'feature' in unseed_G2.nodes[n2]  # Check if node in unseed_G2 has 'feature' attribute
            else -1  # Assigning -1 similarity for nodes without features in unseed_G2
            for n2 in unseed_G2.nodes
        ]
        perfect_matched_index = np.argmax(similarities)
        if similarities[perfect_matched_index] > 0:  # Valid similarity, not -1
            perfect_matched = list(unseed_G2.nodes)[perfect_matched_index]
            same_pairs.append((n1, perfect_matched))

# Output the matched pairs
for pair in same_pairs:
    print(f"Matched: {pair[0]} in unseed_G1 with {pair[1]} in unseed_G2")


KeyboardInterrupt: ignored

In [None]:
import networkx as nx
import numpy as np
from google.colab import files

# Upload G1.edgelist
uploaded_G1 = files.upload()

# Upload G2.edgelist
uploaded_G2 = files.upload()


# Load unseed_G1 and unseed_G2 (assuming they are NetworkX Graphs)
unseed_G1 = nx.read_edgelist("unseed_G1.edgelist")
unseed_G2 = nx.read_edgelist("unseed_G2.edgelist")

# De-anonymization by matching nodes in unseed_G1 to unseed_G2 based on Jaccard similarity of node neighborhoods
matched_pairs = []
for node1 in unseed_G1.nodes:
    neighbors_1 = set(unseed_G1.neighbors(node1))
    similarities = [
        len(neighbors_1.intersection(set(unseed_G2.neighbors(node2)))) / len(neighbors_1.union(set(unseed_G2.neighbors(node2))))
        for node2 in unseed_G2.nodes
    ]
    best_match_index = np.argmax(similarities)
    best_similarity = similarities[best_match_index]
    if best_similarity > 0:  # Only consider non-zero similarity matches
        best_match = list(unseed_G2.nodes)[best_match_index]
        matched_pairs.append((node1, best_match))

# Output the matched pairs
for pair in matched_pairs:
    print(f"De-anonymized: {pair[0]} in unseed_G1 matches with {pair[1]} in unseed_G2")


Saving unseed_G1.edgelist to unseed_G1.edgelist


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
De-anonymized: 169 in unseed_G1 matches with 7108 in unseed_G2
De-anonymized: 171 in unseed_G1 matches with 7212 in unseed_G2
De-anonymized: 212 in unseed_G1 matches with 3780 in unseed_G2
De-anonymized: 241 in unseed_G1 matches with 7560 in unseed_G2
De-anonymized: 379 in unseed_G1 matches with 7896 in unseed_G2
De-anonymized: 417 in unseed_G1 matches with 5714 in unseed_G2
De-anonymized: 420 in unseed_G1 matches with 2828 in unseed_G2
De-anonymized: 539 in unseed_G1 matches with 1155 in unseed_G2
De-anonymized: 583 in unseed_G1 matches with 4963 in unseed_G2
De-anonymized: 662 in unseed_G1 matches with 7557 in unseed_G2
De-anonymized: 787 in unseed_G1 matches with 7237 in unseed_G2
De-anonymized: 810 in unseed_G1 matches with 7278 in unseed_G2
De-anonymized: 842 in unseed_G1 matches with 8068 in unseed_G2
De-anonymized: 844 in unseed_G1 matches with 3931 in unseed_G2
De-anonymized: 892 in unseed_G1 matches with 16 in un

In [1]:
list_A, list_B = zip(*matched_pairs)

set_A = set(list_A)
set_B = set(list_B)

print("Set from A:", len(set_A))
print("Set from B:", len(set_B))


Set from A: 8561
Set from B: 4442


In [None]:
n = set()

with open('unseed_G1.edgelist', 'r') as f:
    for line in f:
        node = int(line.split()[0])
        n.add(node)

print(len(n))




8561


In [None]:
n = set()

with open('unseed_G2.edgelist', 'r') as f:
    for line in f:
        node = int(line.split()[0])
        n.add(node)

print(len(n))


8547
