# Igraph clusters

In [1]:
import igraph as ig
import leidenalg as la
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from itertools import chain

Data from spectral_clustering data cleaning process

In [2]:
from get_graph_from_data import get_graph, draw  
G_di, A_di, G_un, A_un = get_graph(data=pd.read_csv('../data/chicago_taxi_trips_2016_11.csv'), head=False) 
# draw(G_di) 
# draw(G_un) 

In [3]:
#covert to igraph object
G_un_ig = ig.Graph.from_networkx(G_un)


Leiden clusters

In [4]:
partition = la.find_partition(G_un_ig, la.ModularityVertexPartition)

In [7]:
partition2 = la.find_partition(G_un_ig, la.CPMVertexPartition,
                                   resolution_parameter = 0.05)

In [67]:
#init list of values for gamma, node number, and cluster membership
gammas = np.arange(0.02, 0.2, 0.02)
g_ls = [] #value for gamma
cl_num_ls = [] #number of clusters from this gamma
nodes = [] #node numbers
node_mems = [] #cluster memberships

#try different values for gamma to get clusters of different sizes
for i in gammas:
    partition = la.find_partition(G_un_ig, la.CPMVertexPartition,
                                   resolution_parameter = i)
    
    n_nodes = len(partition.membership)
    g_ls.append([i]*n_nodes)
    
    
    # Find number of clusters
    n_clust = len(set(partition.membership))
    cl_num_ls.append([n_clust]*n_nodes)

    # Retrieve cluster membership of each node
    node = [*range(len(partition.membership))] #number each node
    node_mem = partition.membership
    
    nodes.append(node)
    node_mems.append(node_mem)


#create ndarray of value for gamma, cluster num, node number, and node membership  
node_mem_gamma = np.asarray(np.column_stack((list(chain(*g_ls)),
                                  list(chain(*cl_num_ls)),
                                  list(chain(*nodes)),
                                  list(chain(*node_mems)))))
#print(node_mem_gamma[:10])

[[0.02 1.   0.   0.  ]
 [0.02 1.   1.   0.  ]
 [0.02 1.   2.   0.  ]
 [0.02 1.   3.   0.  ]
 [0.02 1.   4.   0.  ]
 [0.02 1.   5.   0.  ]
 [0.02 1.   6.   0.  ]
 [0.02 1.   7.   0.  ]
 [0.02 1.   8.   0.  ]
 [0.02 1.   9.   0.  ]]


Community Walktrap (based on random walks)

In [11]:
wc = G_un_ig.community_walktrap()

wc_cl = wc.as_clustering() #creates an object of type VertexClustering, with optimal number of clusters
print(wc_cl.summary())

node_mem = wc_cl.membership #gives cluster membership for each node
node_num = [*range(77)] #number each node

wc_node_mem = np.asarray(np.column_stack((node_num, node_mem)))

Clustering with 77 elements and 3 clusters


In [12]:
#print(wc_node_mem)

[[ 0  0]
 [ 1  0]
 [ 2  1]
 [ 3  1]
 [ 4  1]
 [ 5  1]
 [ 6  0]
 [ 7  0]
 [ 8  0]
 [ 9  1]
 [10  1]
 [11  1]
 [12  1]
 [13  1]
 [14  1]
 [15  1]
 [16  1]
 [17  1]
 [18  1]
 [19  1]
 [20  0]
 [21  0]
 [22  0]
 [23  0]
 [24  0]
 [25  0]
 [26  0]
 [27  0]
 [28  0]
 [29  0]
 [30  0]
 [31  0]
 [32  0]
 [33  0]
 [34  0]
 [35  0]
 [36  0]
 [37  0]
 [38  0]
 [39  0]
 [40  0]
 [41  0]
 [42  0]
 [43  0]
 [44  0]
 [45  0]
 [46  0]
 [47  0]
 [48  0]
 [49  0]
 [50  1]
 [51  0]
 [52  1]
 [53  1]
 [54  1]
 [55  1]
 [56  1]
 [57  0]
 [58  0]
 [59  0]
 [60  0]
 [61  0]
 [62  0]
 [63  1]
 [64  1]
 [65  1]
 [66  1]
 [67  1]
 [68  1]
 [69  0]
 [70  0]
 [71  0]
 [72  0]
 [73  1]
 [74  1]
 [75  2]
 [76  1]]
