## A3. Community detection

### Imports & Settings

In [None]:
## get the community module 
!pip3 install -qq python-louvain
!pip3 install -qq networkx

In [None]:
## autoreload 
%load_ext autoreload
%autoreload 2

### Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
## change to the work dir 
WORK_DIR = "./drive/MyDrive/Cursos/3_Community_detection"
import os; os.chdir(WORK_DIR)

In [None]:
## import libraries 
import networkx as nx 
import matplotlib.pyplot as plt
from community import community_louvain

In [None]:
## helper functions 
from src.helpers.community import NetworkXCommunityAlgs

from src.helpers.helpers import read_clu,lol2idx,dict_vals_to_list,load_graph_coords
from src.helpers.metrics import (nmi,
                                 jaccard_index,
                                 rand_index,
                                 nvi_from_nmi)
from src.helpers.plotters import plot_graph_partition_original

### Defining Paths & Variables

In [None]:
## helper functions
from src.helpers.config import config_dict,make_net_file_dict
## Setting the PATHS to the specific directories 
DATA_DIR = './data'
IMG_DIR = './imgs'
## Loading the config dictionary 
CONFIG = config_dict(dir=DATA_DIR)
## getting the net files & file dictionary
NET_FILES, FILE_DICT = make_net_file_dict(CONFIG)

### Plotting Original Graphs & Partitions

In [None]:
## some settings 
FIGURE_SIZE = (20,10)
VISUALIZE = True        # change this if you want to visualize 
                        # the plots while they are being generated 

## these are plotted with NetworkX on matplotlib
for net_type in FILE_DICT.keys():
    plot_graph_partition_original(
                                data        = FILE_DICT[net_type], 
                                net_type    = net_type,
                                data_dir    = DATA_DIR,
                                figure_size = FIGURE_SIZE,
                                save_dir    = IMG_DIR,
                                visualize   = VISUALIZE
                                )

In [None]:
!pip3 install python-igraph==0.8.3
!apt install libcairo2-dev pkg-config python3-dev
!pip3 install python-igraph leidenalg cairocffi

In [None]:
import igraph as ig

In [None]:

## Grid 6x6 
dd = './data/toy/grid-p-6x6.net'
save_dir = "./imgs/toy/network_GRID_P_6x6_.png"
g = ig.read(dd)
visual_style = {}
visual_style["edge_width"] = 0.05 ## EDGE WIDTH
visual_style["vertex_size"] = 3 ## SIZE OF THE NODEs
visual_style["bbox"] = (300,300) ## SIZE OF GRAPH (MAINTAINS DPI)
visual_style["margin"] = 10      ## MARGIN of the graph
ig.plot(g, save_dir, **visual_style)


In [None]:

## AIRPORTS UW
## saving the AIRPORTS IMAGE 
dy = "./data/real/airports_UW.net"
save_dir = "./imgs/real/network_AIRPORTS_UW_.png"
g = ig.read(dy)
visual_style = {}
visual_style["edge_width"] = 0.05
visual_style["vertex_size"] = 3
visual_style["bbox"] = (720,480)
visual_style["margin"] = 10
ig.plot(g, save_dir, **visual_style)

### Calculating Partitions for each graph

In [None]:
from src.helpers.partitions import make_best_partition
from src.helpers.metrics import calculate_metrics
from src.helpers.helpers import clean_community


In [None]:
SCHEMA = {
                "MODEL_TYPE":None,
                "FILE_NAME":None,
                "NUM_NODES":None,
                "PARTITION_ID":None,
                "METHOD":None,
                "NUM_PARTITIONS":None,
                "GEN_PARTITION":None,
                "NX_NVI":None,
                "NX_NMI":None,
                "NX_RAND_IDX":None,
                "IG_NVI":None,
                "IG_NMI":None,
                "IG_RAND_IDX":None,
          
          
            }

def make_schema(SCHEMA, update_vals):
    ### incoming vals are going to be the same as the schema 
    out = dict(zip(SCHEMA.keys(), update_vals))
    return out

In [1]:
import igraph as ig 
def load_igraph(file_name):
  g = ig.read(file_name)
  g = g.simplify()
  return g

def get_igraph_greedy(graph):
  ## try to use the same methods we used 
  ## MODULARITY 
  dendrogram = graph.community_fastgreedy()
  clusters = dendrogram.as_clustering()
  membership = clusters.membership
  ## dictionary 
  d = dict(zip(graph.vs['name'],membership))
  return d.values()

## LABEL PROPAGATION 
def get_igraph_label_prop(graph):
  lp = graph.community_label_propagation()
  lp_mem = lp.membership
  ## dictionary
  dlp = dict(zip(graph.vs['name'],lp_mem))
  return dlp.values()
## GIRVAN-NEWMAN
def get_igraph_newmann(graph):
  gn = graph.community_leading_eigenvector()
  gn_mem = gn.membership
  ## dictionary
  dlp = dict(zip(graph.vs['name'],gn_mem))
  return dlp.values()
def get_igraph_metrics(ofn):
  g = load_igraph(ofn)
  greedy = get_igraph_greedy(g)
  label_prop = get_igraph_label_prop(g)
  newman = get_igraph_newmann(g)
  return (greedy, label_prop, newman)

In [None]:
algorithms = ["Garvin-Newman","Greedy","Label-Propagation"]

In [None]:
## make the above for loop into a function 
def get_payload(data,idx,alg,num_nodes,SCHEMA,MODEL, ofn, p_id,algorithms,v,HAS_ORIG_PART,igraph_data,verbose=True):
    ## calculate the metrics
    nvi, nmi, rand_idx = calculate_metrics(data = data, community_alg=alg[0])
    ignvi, ignmi, igrand_idx = calculate_metrics(data = data, community_alg=igraph_data[idx])
    payload = make_schema(SCHEMA,
                            [MODEL, ofn,num_nodes, p_id,algorithms[idx],len(v),HAS_ORIG_PART, nvi, nmi, rand_idx,ignvi, ignmi, igrand_idx]
                            )
    ## if verbose 
    if verbose:
        print(pd.DataFrame.from_records(payload, index=[0],columns=payload.keys()).to_markdown(),'\n')
    
    return payload

In [None]:
from tqdm import tqdm
import pandas as pd
import time
import os
from src.helpers.partitions import calculate_partitions
## 
VERBOSITY = True

holder = []
for MODEL in ["toy","model","real"]:
  for k, v in FILE_DICT[MODEL].items():
    if "airports" in k:
      pass
    else: 
      tic = time.time()
      ofn = f"{DATA_DIR}/{MODEL}/{k}.net"
      ## load the graph and position
      g, pos = load_graph_coords(ofn) ## loading
      ## load the igraph one
      num_nodes = len(g.nodes())
      metrics = calculate_partitions(g, pos) ## communities
      c_metrics = clean_community(metrics) ## cleaning
      toc = time.time()
      ## calculate the difference one ## original partition
      if len(v) == 0:
        data1 = make_best_partition(g)
        p_id = 1
        HAS_ORIG_PART = bool(False)
        ## draw the original plot 
        fig, axs = plt.subplots(1,4,figsize=(40,12))
        axs = axs.ravel()
        nx.draw(g, pos=pos, node_color=data1, ax=axs[0])
        axs[0].set_title("Original Partition")
        ## 
        for idx, alg in enumerate(c_metrics):
          ig_data = get_igraph_metrics(ofn)
          payload = get_payload(data1,idx,alg,num_nodes,SCHEMA,MODEL, ofn, p_id,algorithms,v,HAS_ORIG_PART,ig_data,verbose=VERBOSITY)
          holder.append(payload)
          ## plot the partitions 
          nx.draw(g, pos=pos, node_color=alg[0], ax=axs[idx+1])
          axs[idx+1].set_title(f"Partition for {algorithms[idx]}")
        plt.suptitle(f"{MODEL}-{k}")
        name_to_save = f"./imgs/partitions/{MODEL}_{k}_{algorithms[idx]}.png"
        plt.savefig(name_to_save,bbox_inches='tight')
        plt.show()
  
      elif len(v) ==1:
        p_id = 1
        data2 = read_clu(v[0])
        HAS_ORIG_PART = bool(True)
        fig, axs = plt.subplots(1,4,figsize=(40,12))
        axs = axs.ravel()
        nx.draw(g, pos=pos, node_color=data2, ax=axs[0])
        axs[0].set_title("Original Partition")
        for idx2, alg2 in enumerate(c_metrics):
          ig_data = get_igraph_metrics(ofn)
          payload = get_payload(data2,idx2,alg2,num_nodes,SCHEMA,MODEL, ofn, p_id,algorithms,v,HAS_ORIG_PART,ig_data,verbose=VERBOSITY)
          holder.append(payload)
          nx.draw(g, pos=pos, node_color=alg2[0], ax=axs[idx2+1])
          axs[idx2+1].set_title(f"Partition for {algorithms[idx2]}")
        plt.suptitle(f"{MODEL}-{k}")
        name_to_save = f"./imgs/partitions/{MODEL}_{k}_{algorithms[idx2]}.png"
        plt.savefig(name_to_save,bbox_inches='tight')
        plt.show()
    
      if len(v) > 1: 
        for idx3, part3 in enumerate(v):
          ## pid
          p_id = idx3 + 1
          data3 = read_clu(part3)
          HAS_ORIG_PART = bool(True)
          fig, axs = plt.subplots(1,4,figsize=(40,12))
          axs = axs.ravel()
          nx.draw(g, pos=pos, node_color=data3, ax=axs[0])
          axs[0].set_title(f"Original Partition - {idx3+1}")
          for nidx3,alg3 in enumerate(c_metrics):
            ig_data = get_igraph_metrics(ofn)
            payload = get_payload(data3,nidx3,alg3,num_nodes,SCHEMA,MODEL, ofn, p_id,algorithms,v,HAS_ORIG_PART,ig_data,verbose=VERBOSITY)
            holder.append(payload)
            nx.draw(g, pos=pos, node_color=alg3[0], ax=axs[nidx3+1])
            axs[nidx3+1].set_title(f"Partition for {algorithms[nidx3]}")
          plt.suptitle(f"{MODEL}-{k} Partition: {idx3+1}")
          name_to_save = f"./imgs/partitions/{MODEL}_{k}_{algorithms[nidx3]}_{idx3+1}.png"
          plt.savefig(name_to_save,bbox_inches='tight')
          plt.show()

### iGraph Algorithms

In [None]:
## make the above for loop into a function 
def get_payload(data,idx,alg,num_nodes,SCHEMA,MODEL, ofn, p_id,algorithms,v,HAS_ORIG_PART,igraph_data,verbose=True):
    ## calculate the metrics
    nvi, nmi, rand_idx = calculate_metrics(data = data, community_alg=list(alg))
    ignvi, ignmi, igrand_idx = calculate_metrics(data = data, community_alg=igraph_data[idx])
    payload = make_schema(SCHEMA,
                            [MODEL, ofn,num_nodes, p_id,algorithms[idx],len(v),HAS_ORIG_PART, nvi, nmi, rand_idx,ignvi, ignmi, igrand_idx]
                            )
    ## if verbose 
    if verbose:
        print(pd.DataFrame.from_records(payload, index=[0],columns=payload.keys()).to_markdown(),'\n')
    
    return payload

In [None]:
## repeat the same with another library to see if there is any difference. 
hold1 = []
MODEL = 'real'
AIRPORT = "./data/real/airports_UW.net"
## load the graph and position
g = load_igraph(AIRPORT)
## load the igraph one
num_nodes = len(g.vs())
metrics = get_igraph_metrics(AIRPORT)
## make the best partition to compare
best_partition = g.community_multilevel()
members_best = best_partition.membership
##
v = [0]
HAS_ORIG_PART=False
for idx, alg in enumerate(metrics):
  print(len(members_best),len(alg))
  ig_data = get_igraph_metrics(AIRPORT)
  payload = get_payload(members_best,idx,alg,num_nodes,SCHEMA,MODEL, AIRPORT, 1,algorithms,v,HAS_ORIG_PART,ig_data,verbose=False)
  hold1.append(payload)

In [None]:
## final DataFrame with all the data 
df = pd.DataFrame(holder) ## initial run NetworkX & iGraph
dff = pd.DataFrame(hold1) ## second run with NetworkX & iGraph
dii = pd.concat([df,dff]) ## combining
dii.to_csv(f"./data/model_metrics/all_models.csv") ## final dataframe