In [106]:
import os 
import pickle
import directories as dir
import networkx as nx
import community.community_louvain
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.cluster import *
from functions import labels_to_clusters, draw_graph

In [107]:
fname = os.path.join(dir.graphs_dir, "projected_graph_with_integer_labels.edgelist")
G = nx.read_weighted_edgelist(fname, nodetype=str)

In [108]:
fname = os.path.join(dir.data_dir, "ground_truth_clusters_pickle")
with open(fname, 'rb') as fp:
    clusters = pickle.load(fp)

ground_truth_label = {}

for i, cluster in enumerate(clusters):
    for v in cluster:
        ground_truth_label[v] = i

ground_truth_labels = []
for node in range(234):
    ground_truth_labels.append(ground_truth_label[node])

In [109]:
ground_truth_clusters = labels_to_clusters(ground_truth_labels)
ground_truth_clusters_size = [len(cluster) for cluster in ground_truth_clusters]


Different Resolution Values Comparison

In [110]:
res = np.arange(0, 1.05, 0.05)
n = res.shape[0]

In [111]:
modularity = np.zeros(n)
clusters = np.zeros(n, dtype=int)
mi = np.zeros(n)
nmi = np.zeros(n)
ami  = np.zeros(n)
rand  = np.zeros(n)
hom  = np.zeros(n)
comp = np.zeros(n)

In [112]:
for i in range(n):
  partition = community.community_louvain.best_partition(G, resolution=res[i])

  labels_pred = [partition[str(n)] for n in range(234)]
  clusters[i] = len(set(labels_pred))

  # metrics
  modularity[i] = community.community_louvain.modularity(partition, G, weight='weight')
  mi[i] = mutual_info_score(labels_pred, ground_truth_labels)
  nmi[i] = normalized_mutual_info_score(labels_pred, ground_truth_labels)
  ami[i] = adjusted_mutual_info_score(labels_pred, ground_truth_labels)
  rand[i] = rand_score(labels_pred, ground_truth_labels)
  hom[i] = homogeneity_score(labels_pred, ground_truth_labels)
  comp[i] = completeness_score(labels_pred, ground_truth_labels)

In [113]:
df = pd.DataFrame(zip(res, clusters, modularity, mi, nmi, ami, rand, hom, comp),
                              columns=['Resolution', 'Clusters', 'Modularity', 'MI', 'NMI', 'AMI', 'Rand', 'Hom', 'Comp'])
fname = os.path.join(dir.data_dir, "louvain_metrics.csv")
df.to_csv(fname)

In [None]:
plt.figure(figsize=(10, 5))
sns.lineplot(x="Resolution", y="Clusters", data=df, marker="o")
plt.title('Louvain Number of Clusters for different Resolutions')
fname = os.path.join(dir.plots_dir, "louvain_clusters_number.png")
plt.savefig(fname, bbox_inches='tight', pad_inches=0)

In [None]:
df_temp = df[['Resolution', 'MI', 'NMI', 'AMI']].melt('Resolution', var_name='metrics',
                                                                      value_name=' ')

sns.lineplot(x="Resolution",  y=" ", hue='metrics', data=df_temp, marker='o')
plt.title('Louvain Metrics for different Resolutions')
fname = os.path.join(dir.plots_dir, "louvain_metrics_1.png")
plt.savefig(fname, bbox_inches='tight', pad_inches=0)

In [None]:
df_temp = df[['Resolution', 'Rand', 'Hom', 'Comp']].melt('Resolution', var_name='metrics',
                                                                      value_name=' ')

sns.lineplot(x="Resolution",  y=" ", hue='metrics', data=df_temp, marker='o')
plt.title('Louvain Metrics for different Resolutions')
fname = os.path.join(dir.plots_dir, "louvain_metrics_2.png")
plt.savefig(fname, bbox_inches='tight', pad_inches=0)

Resolution = 0.95

In [118]:
partition = community.community_louvain.best_partition(G, resolution=0.95)
labels = [partition[str(n)] for n in range(234)]
louvain_clusters = labels_to_clusters(labels)
louvain_clusters_size = [len(cluster) for cluster in louvain_clusters]

In [119]:
fname = fname = os.path.join(dir.tables_dir, "clusters_size_comparison.csv")
with open(fname, mode='w') as f:
    f.write(f'Ground truth clusters: {ground_truth_clusters_size}\n')
    f.write(f'Louvain clusters: {louvain_clusters_size}')

In [120]:
modularity = community.community_louvain.modularity(partition, G, weight='weight')
mi = mutual_info_score(labels, ground_truth_labels)
nmi  = normalized_mutual_info_score(labels, ground_truth_labels)
ami  = adjusted_mutual_info_score(labels, ground_truth_labels)
rand  = rand_score(labels, ground_truth_labels)
hom  = homogeneity_score(labels, ground_truth_labels)
comp = completeness_score(labels, ground_truth_labels)

Graph Visualisation

In [121]:
party_length = {}
for i in range(4):
  count = 0
  for val in partition.values():
      if val == i:
          count += 1
  party_length[i] = count

{0: 131, 1: 22, 2: 9, 3: 72}

In [123]:
colors = {}

for k, v in party_length.items():
    if v == 9:
        colors[k] = 'k'
    elif v == 22:
        colors[k] = 'g'
    elif v == 72:
        colors[k] = 'r'
    else:
        colors[k] = 'b'

{0: 'b', 1: 'g', 2: 'k', 3: 'r'}

In [None]:
labels = {n: ground_truth_label[int(n)] for n in G.nodes()}
color_map = [colors[partition[n]] for n in G.nodes()]
fname = os.path.join(dir.plots_dir, "louvain_graph.png")
draw_graph(G, labels, color_map, fname)