In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
def find_structural_equivalent_nodes(graph):
    structural_equivalent = []
    nodes = list(graph.nodes())
    while nodes:
        node = nodes[0]
        equivalent = [node]
        neighbors = set(graph.neighbors(node))
        nodes.remove(node)
        i = 0
        while i < len(equivalent):
            curr_node = equivalent[i]
            for n in nodes[:]:
                if neighbors == set(graph.neighbors(n)):
                    equivalent.append(n)
                    nodes.remove(n)
            i += 1
        if len(equivalent) > 1:
            structural_equivalent.append(equivalent)
    return structural_equivalent

def find_common_neighbor_equivalent_nodes(graph):
    common_neighbor_equivalent = []
    nodes = list(graph.nodes())
    while nodes:
        node = nodes[0]
        equivalent = [node]
        neighbors = set(graph.neighbors(node))
        nodes.remove(node)
        i = 0
        while i < len(equivalent):
            curr_node = equivalent[i]
            curr_neighbors = set(graph.neighbors(curr_node))
            for n in nodes[:]:
                if len(curr_neighbors.intersection(set(graph.neighbors(n)))) == len(neighbors):
                    equivalent.append(n)
                    nodes.remove(n)
            i += 1
        if len(equivalent) > 1:
            common_neighbor_equivalent.append(equivalent)
    return common_neighbor_equivalent

def find_automorphic_equivalent_nodes(graph):
    automorphic_equivalent = []
    nodes = list(graph.nodes())
    while nodes:
        node = nodes[0]
        equivalent = [node]
        neighbors = sorted(set(graph.neighbors(node)))
        nodes.remove(node)
        i = 0
        while i < len(equivalent):
            curr_node = equivalent[i]
            for n in nodes[:]:
                if neighbors == sorted(set(graph.neighbors(n))):
                    equivalent.append(n)
                    nodes.remove(n)
            i += 1
        if len(equivalent) > 1:
            automorphic_equivalent.append(equivalent)
    return automorphic_equivalent

def find_edge_equivalent_nodes(graph):
    edge_equivalent = []
    edges = list(graph.edges())
    while edges:
        edge = edges[0]
        equivalent = [edge]
        nodes = set(edge)
        edges.remove(edge)
        i = 0
        while i < len(equivalent):
            curr_edge = equivalent[i]
            for e in edges[:]:
                if set(e).intersection(nodes) == nodes:
                    equivalent.append(e)
                    nodes.update(e)
                    edges.remove(e)
            i += 1
        if len(equivalent) > 1:
            edge_equivalent.append(equivalent)
    return edge_equivalent

In [None]:
def equivalence_pipeline(graph):
    equivalence_functions = [
        find_structural_equivalent_nodes,
        find_common_neighbor_equivalent_nodes,
        find_automorphic_equivalent_nodes,
        find_edge_equivalent_nodes
    ]

    for func in equivalence_functions:
        equivalent_nodes = func(graph)
        
        color_map = {}
        for i, nodes in enumerate(equivalent_nodes):
            for node in nodes:
                if isinstance(node, tuple):
                    for n in node:
                        color_map[n] = i
                else:
                    color_map[node] = i

        default_color = len(equivalent_nodes)
        for node in graph.nodes():
            if node not in color_map:
                color_map[node] = default_color

        pos = nx.spring_layout(graph, seed=42)

        plt.figure(figsize=(25, 25))
        node_colors = [color_map[node] for node in graph.nodes()]
        nx.draw(graph, pos, node_color=node_colors, with_labels=False, cmap=plt.cm.tab10)
        plt.title(f'Graph with Nodes Colored by {func.__name__.replace("_", " ").title()} Equivalence Groups')
        plt.show()

In [None]:
df1 = pd.read_csv('data/Postings_01052019_15052019.csv', sep=';')
df2 = pd.read_csv('data/Postings_16052019_31052019.csv', sep=';')
df = pd.concat([df1, df2], ignore_index=True)
df = df[['ID_CommunityIdentity', 'ID_Posting', 'ID_Posting_Parent']]

In [None]:
comment_user_mapping = df[['ID_Posting', 'ID_CommunityIdentity']].drop_duplicates().rename(columns={'ID_Posting':'Id_posting', 'ID_CommunityIdentity': 'ID_ParentIdentity'})

result_df = pd.merge(df, comment_user_mapping, left_on='ID_Posting_Parent', right_on='Id_posting', how='left')
result_df = result_df[['ID_CommunityIdentity', 'ID_ParentIdentity']]
reply_counts = result_df.groupby(['ID_CommunityIdentity', 'ID_ParentIdentity']).size().reset_index(name='counts')

Unnamed: 0,ID_CommunityIdentity,ID_ParentIdentity
0,671476,233191.0
1,566938,640123.0
2,669286,680772.0
3,671476,51817.0
4,671476,


In [None]:

reply_counts = reply_counts[:500]

In [None]:
G = nx.from_pandas_edgelist(reply_counts, 
                            source='ID_CommunityIdentity', 
                            target='ID_ParentIdentity', 
                            edge_attr = 'counts',
                            create_using=nx.Graph())

In [None]:
fig = plt.figure(figsize=(42,42))
nx.draw_networkx(G, with_labels=False)
plt.show()