In [None]:
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import cv2 
import networkx as nx 
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm

In [None]:
def read_edges(filename,is_directed=False):
    file = open(filename, 'r',encoding='utf-8')
    lines = list(file.read().split("\n"))
    graph = nx.Graph()
    if is_directed:
        graph = nx.DiGraph()
    for line in lines :
        if line is None or line == '' or line == "\n" or line[0] == '#':
            continue
        try :
            s,t = line.split()
        except :
            s,t,_ = line.split()
        graph.add_edge(int(s),int(t))
    return graph


graph = read_edges("./wiki-Vote.txt/Wiki-Vote.txt",False)

In [3]:
"""
def create_adjacency_matrix(graph):
    n = graph.number_of_nodes()
    adj_matrix = np.zeros((n,n))
    lookup_table = dict()
    index = 0
    for edge in graph.edges:
        if edge[0] not in lookup_table:
            lookup_table[edge[0]] = index
            index += 1
        if edge[1] not in lookup_table:
            lookup_table[edge[1]] = index
            index += 1  
        
        adj_matrix[lookup_table[edge[0]]][lookup_table[edge[1]]] = 1
        adj_matrix[lookup_table[edge[1]]][lookup_table[edge[0]]] = 1

    return adj_matrix,lookup_table

def create_degree_matrix(adj_matrix):
    n = adj_matrix.shape[0]
    degree_matrix = np.zeros((n,n))
    for i in range(n):
        degree_matrix[i][i] = np.sum(adj_matrix[i])
    return degree_matrix


adj_mat,lt = create_adjacency_matrix(graph)
degree_mat = create_degree_matrix(adj_mat)
"""


In [4]:
"""
def calculate_modularity(graph,partitions,adj_mat,lt):
    clusters = np.zeros(adj_mat.shape[0])
    _keys = list(lt.keys())

    for i in range(adj_mat.shape[0]):
        for j in range(len(partitions)):
            if _keys[i] in partitions[j]:
                clusters[i] = j
                break
            
    n = adj_mat.shape[0]
    m = int(np.sum(adj_mat.flatten())/2)
    degree_mat = create_degree_matrix(adj_mat)
    modularity = 0
    for i in range(n):
        for j in range(n):
            if i == j:
                continue
            a_ij = 1 if adj_mat[i][j] == 1 else 0

            k_i = degree_mat[i,i]
            k_j = degree_mat[j,j]
            modularity += (a_ij - (k_i*k_j)/(2*m))*1 if clusters[i] == clusters[j] else 0

    return modularity/(2*m)

   
def calculate_modularity_matrix_B(graph,adj_mat):
    m = int(np.sum(adj_mat.flatten())/2)
    n = adj_mat.shape[0]
    degree_mat = create_degree_matrix(adj_mat)
    B = np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            B[i][j] = adj_mat[i][j] - (degree_mat[i][i]*degree_mat[j][j])/(2*m)

    return B
"""

In [5]:
def power_method_eigenvalue_and_eigen_vector(B):
    n = B.shape[0]
    x = np.random.rand(n)
    x = np.reshape(x,(n,1))
    x = x/np.linalg.norm(x)
    x_prev = np.zeros_like(x)
    I = 0
    while I < 1000:
        x_prev = x
        x = np.dot(B,x)
        x = x/np.linalg.norm(x)
        I += 1
    return x
import networkx.algorithms.community as nx_comm

def modularity_optimization(MainGraph):
    Community = [set(np.array(MainGraph.nodes()))]
    CommunityModularity = [[0]]
    Current = 0
    Passed = []
    CommunitySpaces = ["\t"]
    I = 1

    print("********************> Tree of Processing <********************\n")
    while len(Community) != 0:
        print()
        current_community = Community.pop(-1)
        current_space = CommunitySpaces.pop(-1)
        current_space = current_space.replace("\t","--------") + "> "
        current_modularity = CommunityModularity.pop(-1)

        print(current_space+"Community #",I)
        I += 1

        #print("#1")
        graph = MainGraph.subgraph(current_community)
        #adj_mat,lt = create_adjacency_matrix(graph)
        print(current_space+"Length of current sub-graph : ",len(graph.nodes()))

        #print("#2")
        B = nx.modularity_matrix(graph,weight=None)

        #print("#3")
        v1,v2 = np.linalg.eig(B)
        idx = np.argmax(v1)
        eigen_vector = v2[:,idx]
        eigen_vector = np.real(eigen_vector).flatten()

        #print("#4")
        nodes = np.array(graph.nodes())
        pos_cluster = nodes[np.where(eigen_vector>=0)[1]]
        neg_cluster = nodes[np.where(eigen_vector<0)[1]]
        print(current_space+"Length of positive cluster : ",len(pos_cluster))
        print(current_space+"Length of negative cluster : ",len(neg_cluster))

        tmp_community = Community.copy()
        tmp_community.append(set(neg_cluster))
        tmp_community.append(set(pos_cluster))

        tmp_spaces = CommunitySpaces.copy()
        tmp_spaces.append(current_space+"\t")
        tmp_spaces.append(current_space+"\t")
        
        #print("#5")
        new_modularity = nx_comm.modularity(MainGraph,tmp_community+Passed,weight=None)
        current_modularity.append(new_modularity)

        #print("#6")
        print(current_space+"Current modularity:",round(Current,4),"New modularity if sub-graph devide:",round(new_modularity,4))

        if new_modularity < Current:
            print(current_space+"Result: Current modularity is better than new modularity if sub-graph devide. So, we will not devide this sub-graph.")
            sns.set_style("darkgrid")
            plt.figure(figsize=(8,6))
            plt.plot(current_modularity)
            plt.title("Changes of modularity until this sub-graph - #"+str(I-1))
            plt.xlabel("Number of devide until this sub-graph")
            plt.ylabel("Modularity")
            plt.xticks(list(range(len(current_modularity))))
            plt.savefig("P4 - Modularity Changes-{0}.png".format(I-1))
            plt.clf()

            Passed.append(current_community)
            continue
        else:
            print(current_space+"Result: New modularity is better than current modularity. So, we will devide this sub-graph.")
            print(current_space+"Length of each Community after devide : ",[len(x) for x in tmp_community])
            print(current_space+"New IDs of each Community after devide : ",[I,I+1])
            Current = new_modularity
            Community = tmp_community.copy()
            CommunitySpaces = tmp_spaces.copy()
            CommunityModularity.append(current_modularity.copy())
            CommunityModularity.append(current_modularity.copy())

     
    return Passed

    
communities = modularity_optimization(graph)

print("Members of each Community: ")
out = open("Memberships of each node in graph.txt","w")
for i in range(len(communities)):
    print("Community #",i+1,": ",len(communities[i]))
    tmp_com = list(communities[i])
    tmp_com = np.array(tmp_com)
    np.save("P4 - Members of Community-{0}.npy".format(i+1),tmp_com)
    for node in tmp_com:
        out.write("Node "+str(node)+" -> Community #"+str(i+1)+"\n")
out.close()


********************> Tree of Processing <********************


--------> Community # 1
--------> Length of current sub-graph :  7115
--------> Length of positive cluster :  3347
--------> Length of negative cluster :  3768
--------> Current modularity: 0 New modularity if sub-graph devide: 0.3378
--------> Result: New modularity is better than current modularity. So, we will devide this sub-graph.
--------> Length of each Community after devide :  [3768, 3347]
--------> New IDs of each Community after devide :  [2, 3]

--------> --------> Community # 2
--------> --------> Length of current sub-graph :  3347
--------> --------> Length of positive cluster :  1729
--------> --------> Length of negative cluster :  1618
--------> --------> Current modularity: 0.3378 New modularity if sub-graph devide: 0.3546
--------> --------> Result: New modularity is better than current modularity. So, we will devide this sub-graph.
--------> --------> Length of each Community after devide :  [3768, 16

<Figure size 576x432 with 0 Axes>

<Figure size 576x432 with 0 Axes>

<Figure size 576x432 with 0 Axes>

<Figure size 576x432 with 0 Axes>