# Igraph clusters

In [1]:
import igraph as ig
import leidenalg as la
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from itertools import chain

Data from spectral_clustering data cleaning process

In [2]:
from get_graph_from_data import get_graph, draw  
G_di, A_di, G_un, A_un = get_graph(data=pd.read_csv('../data/chicago_taxi_trips_2016_11.csv'), head=False) 
# draw(G_di) 
# draw(G_un) 

In [3]:
#covert to igraph object
G_un_ig = ig.Graph.from_networkx(G_un)


Leiden clusters

In [4]:
partition = la.find_partition(G_un_ig, la.ModularityVertexPartition)

In [5]:
partition2 = la.find_partition(G_un_ig, la.CPMVertexPartition,
                                   resolution_parameter = 0.05)

In [6]:
#init list of values for gamma, node number, and cluster membership
gammas = np.arange(0.02, 0.2, 0.02)
g_ls = [] #value for gamma
cl_num_ls = [] #number of clusters from this gamma
nodes = [] #node numbers
node_mems = [] #cluster memberships

#try different values for gamma to get clusters of different sizes
for i in gammas:
    partition = la.find_partition(G_un_ig, la.CPMVertexPartition,
                                   resolution_parameter = i)
    
    n_nodes = len(partition.membership)
    g_ls.append([i]*n_nodes)
    
    
    # Find number of clusters
    n_clust = len(set(partition.membership))
    cl_num_ls.append([n_clust]*n_nodes)

    # Retrieve cluster membership of each node
    node = [*range(len(partition.membership))] #number each node
    node_mem = partition.membership
    
    nodes.append(node)
    node_mems.append(node_mem)


#create ndarray of value for gamma, cluster num, node number, and node membership  
node_mem_gamma = np.asarray(np.column_stack((list(chain(*g_ls)),
                                  list(chain(*cl_num_ls)),
                                  list(chain(*nodes)),
                                  list(chain(*node_mems)))))
#print(node_mem_gamma[:10])

In [7]:
#export array as csv
np.savetxt("leiden.csv", node_mem_gamma, delimiter=",")

Use G as networkx object to draw graph since pycairo not working on Zoe's computer. Nodes can be colored using the list of node membership created from the leiden algorithm. Hmm this is still not working

In [25]:
print(set(node_mem_gamma[:,1]))

#retrieve cluster membership corresponding to 3 clusters
n3_node_mem = node_mem_gamma[np.where(node_mem_gamma[:,1] == 3), 3]


import networkx as nx
#list(G_un.nodes) #, node_color = n3_node_mem, pos = nx.spring_layout(G_un))

{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}


[77.0,
 76.0,
 73.0,
 72.0,
 67.0,
 61.0,
 60.0,
 59.0,
 56.0,
 49.0,
 44.0,
 43.0,
 42.0,
 41.0,
 40.0,
 39.0,
 38.0,
 37.0,
 36.0,
 35.0,
 34.0,
 33.0,
 32.0,
 31.0,
 30.0,
 29.0,
 28.0,
 25.0,
 24.0,
 23.0,
 22.0,
 21.0,
 20.0,
 19.0,
 17.0,
 16.0,
 15.0,
 14.0,
 13.0,
 12.0,
 11.0,
 10.0,
 8.0,
 7.0,
 6.0,
 5.0,
 4.0,
 3.0,
 2.0,
 1.0,
 75.0,
 74.0,
 71.0,
 70.0,
 69.0,
 68.0,
 66.0,
 65.0,
 64.0,
 63.0,
 62.0,
 58.0,
 57.0,
 53.0,
 52.0,
 50.0,
 48.0,
 46.0,
 45.0,
 27.0,
 26.0,
 18.0,
 9.0,
 51.0,
 55.0,
 54.0,
 47.0]

Community Walktrap (based on random walks)

In [9]:
wc = G_un_ig.community_walktrap()

wc_cl = wc.as_clustering() #creates an object of type VertexClustering, with optimal number of clusters
print(wc_cl.summary())

node_mem = wc_cl.membership #gives cluster membership for each node
node_num = [*range(77)] #number each node

wc_node_mem = np.asarray(np.column_stack((node_num, node_mem)))

Clustering with 77 elements and 3 clusters


In [13]:
#print(wc_node_mem)

In [10]:
#export array with node membership as csv
np.savetxt("walktrap.csv", wc_node_mem, delimiter=",")