In [97]:
%matplotlib inline
import os
import igraph
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt

## Analysis of Genetic Algorithms for Community Detection in Complex Networks

In [85]:
# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("ClusterResults.db")
df = pd.read_sql_query("SELECT * from ClusterResults", con)

# Verify that result of SQL query is stored in the dataframe
con.close()
df.head()

Unnamed: 0,algo,seed,file,average_scores,best_scores,final_score,params_file,membership,nmi,rand,vi
0,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.22118123372395834\n0.3141703287760417\n0.362...,0.4798583984375\n0.5032958984375\n0.5032958984...,0.65234375,impl_GACD/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0
1,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.21961181640625\n0.3170951334635417\n0.374086...,0.46240234375\n0.516357421875\n0.5650634765625...,0.65234375,impl_GACD/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0
2,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.22058553059895833\n0.30173177083333336\n0.34...,0.4945068359375\n0.4945068359375\n0.5831298828...,0.65234375,impl_GACD/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0
3,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.21824951171875\n0.306690673828125\n0.3638411...,0.4951171875\n0.4951171875\n0.4951171875\n0.52...,0.65234375,impl_GACD/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0
4,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.21831787109375\n0.31358642578125\n0.37324422...,0.4573974609375\n0.467529296875\n0.50354003906...,0.65234375,impl_GACD/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0


In [162]:
df['membership'] = df['membership'].apply(lambda s: map(int, s.split(',')))
df['best_scores'] = df['best_scores'].apply(lambda s: map(float, s.split('\n')))
df['average_scores'] = df['average_scores'].apply(lambda s: map(float, s.split('\n')))

AttributeError: 'list' object has no attribute 'split'

In [163]:
df['filename'] = df['file'].apply(lambda s: os.path.basename(s))

In [143]:
def getTrueComs(file):
    true_file = file + ".coms"
    if os.path.exists(true_file):
        data = pd.read_csv(true_file, sep='\t', header=None)
        return list(data[1].as_matrix())
    else:
        return []
   

In [144]:
df['true_membership'] = df['file'].apply(lambda s: getTrueComs(s))

## Comparison Metrics

In [152]:
df['nmi'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        igraph.compare_communities(s[0], s[1], method="nmi"), axis=1)

df['vi'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        igraph.compare_communities(s[0], s[1], method="vi"), axis=1)

df['rand'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        igraph.compare_communities(s[0], s[1], method="rand"), axis=1)

df['adjusted_rand'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        igraph.compare_communities(s[0], s[1], method="adjusted_rand"), axis=1)

df['split_join'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        igraph.compare_communities(s[0], s[1], method="split-join"), axis=1)

In [151]:
df.head()

Unnamed: 0,algo,seed,file,average_scores,best_scores,final_score,params_file,membership,nmi,rand,vi,true_membership,adjusted_rand
0,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.221181233724, 0.314170328776, 0.36223754882...","[0.479858398438, 0.503295898438, 0.50329589843...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0
1,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.219611816406, 0.317095133464, 0.37408650716...","[0.46240234375, 0.516357421875, 0.565063476562...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0
2,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.220585530599, 0.301731770833, 0.34991210937...","[0.494506835938, 0.494506835938, 0.58312988281...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0
3,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.218249511719, 0.306690673828, 0.36384114583...","[0.4951171875, 0.4951171875, 0.4951171875, 0.5...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0
4,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.218317871094, 0.313586425781, 0.37324422200...","[0.457397460938, 0.467529296875, 0.50354003906...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0


In [154]:
def identifyConverge(vals):
    """find the earliest occurance of the maximum value in a list
    """
    best = vals[-1]
    if vals[0] == best:
        return 0
    for i in range(len(vals)):
        if vals[i] == best:
            return i
        

In [157]:
df['converge_gen'] = df['best_scores'].apply(identifyConverge)
df['gens'] = df['best_scores'].apply(len)

In [158]:
df.head()

Unnamed: 0,algo,seed,file,average_scores,best_scores,final_score,params_file,membership,nmi,rand,vi,true_membership,adjusted_rand,split_join,converge_gen,gens
0,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.221181233724, 0.314170328776, 0.36223754882...","[0.479858398438, 0.503295898438, 0.50329589843...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0,0.0,14,301
1,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.219611816406, 0.317095133464, 0.37408650716...","[0.46240234375, 0.516357421875, 0.565063476562...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0,0.0,20,301
2,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.220585530599, 0.301731770833, 0.34991210937...","[0.494506835938, 0.494506835938, 0.58312988281...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0,0.0,13,301
3,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.218249511719, 0.306690673828, 0.36384114583...","[0.4951171875, 0.4951171875, 0.4951171875, 0.5...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0,0.0,12,301
4,gacd,176034018033913,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.218317871094, 0.313586425781, 0.37324422200...","[0.457397460938, 0.467529296875, 0.50354003906...",0.65234375,impl_GACD/default.properties,"[1, 2, 3, 4, 2, 3, 3, 4, 2, 1, 2, 4, 2, 1, 3, ...",1.0,1.0,0.0,"[3, 2, 4, 1, 2, 4, 4, 1, 2, 3, 2, 1, 2, 3, 4, ...",1.0,0.0,10,301


In [169]:
gals = df.loc[lambda dd: dd['algo'] == 'gals']
gacd = df.loc[lambda dd: dd['algo'] == 'gacd']
tbga = df.loc[lambda dd: dd['algo'] == 'tasgin']
ganet = df.loc[lambda dd: dd['algo'] == 'ganet']

In [178]:
gals.loc[lambda dd: dd['filename'] == 'lfr_5000_mu6_s_0.gml'].head()

Unnamed: 0,algo,seed,file,average_scores,best_scores,final_score,params_file,membership,nmi,rand,vi,true_membership,adjusted_rand,split_join,converge_gen,gens,filename
1577,gals,106857048018389,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.101096142319, 0.135559745759, 0.14740539522...","[0.1065285776, 0.14316763691, 0.160901115028, ...",0.3494824541231462,impl_GALS/default.properties,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",0.94698,0.99591,0.551511,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.708007,1075.0,68,101,lfr_5000_mu6_s_0.gml
1578,gals,106857048018389,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.101694373436, 0.136393811127, 0.14709994738...","[0.108459178886, 0.143082109333, 0.15884096552...",0.3494611458347529,impl_GALS/default.properties,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 4, 10, 11, 12, 13,...",0.93903,0.99591,0.630192,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.70627,1286.0,95,101,lfr_5000_mu6_s_0.gml
1579,gals,106857048018389,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.100960348994, 0.136867057142, 0.14808944333...","[0.107715911336, 0.142596624619, 0.16343923671...",0.3456865541623581,impl_GALS/default.properties,"[1, 2, 3, 4, 5, 6, 7, 3, 8, 9, 3, 10, 11, 12, ...",0.909912,0.990383,0.908516,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.50053,1830.0,89,101,lfr_5000_mu6_s_0.gml
1580,gals,106857048018389,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.100990306954, 0.136288084886, 0.14790596644...","[0.106984328824, 0.141311480375, 0.15777883890...",0.3397340281423426,impl_GALS/default.properties,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",0.930664,0.995306,0.71659,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.667779,1499.0,74,101,lfr_5000_mu6_s_0.gml
1581,gals,106857048018389,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[0.101736645299, 0.135965136598, 0.14750796183...","[0.108177368867, 0.141854139389, 0.16304526961...",0.3518613307005093,impl_GALS/default.properties,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",0.936761,0.994999,0.649971,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.66488,1388.0,96,101,lfr_5000_mu6_s_0.gml


In [179]:
ganet.loc[lambda dd: dd['filename'] == 'lfr_5000_mu6_s_0.gml'].head()

Unnamed: 0,algo,seed,file,average_scores,best_scores,final_score,params_file,membership,nmi,rand,vi,true_membership,adjusted_rand,split_join,converge_gen,gens,filename
917,ganet,16982857664273,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[667.712393303, 686.213375904, 694.923208276, ...","[783.938342224, 836.416438684, 836.416438684, ...",1048.8220274860007,impl_GANET/default.properties,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",0.42519,0.974767,5.571345,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.024739,8053.0,30,31,lfr_5000_mu6_s_0.gml
918,ganet,17032264871055,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[667.407777029, 685.068958478, 695.323983067, ...","[791.45974868, 797.525535816, 800.920947931, 8...",946.8047512717663,impl_GANET/default.properties,"[1, 2, 3, 4, 5, 6, 3, 7, 8, 9, 10, 11, 12, 3, ...",0.396172,0.957953,5.654423,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.015433,7996.0,30,31,lfr_5000_mu6_s_0.gml
919,ganet,17084283557520,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[668.203332686, 687.311701771, 695.260475932, ...","[776.908281825, 809.931648718, 809.931648718, ...",960.6455645336456,impl_GANET/default.properties,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 2,...",0.410155,0.965681,5.615173,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.018045,8020.0,28,31,lfr_5000_mu6_s_0.gml
920,ganet,17134984099131,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[673.614606343, 687.434007993, 696.88605066, 7...","[837.030612174, 837.030612174, 837.030612174, ...",965.9424027260876,impl_GANET/default.properties,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",0.417352,0.971843,5.62388,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.020349,8099.0,26,31,lfr_5000_mu6_s_0.gml
921,ganet,17186274447294,D:/alien-pineapple/benchmark_gen/gml_files/ben...,"[670.259709049, 688.16860686, 699.335595563, 7...","[777.122164551, 834.636820622, 834.636820622, ...",977.0404258294428,impl_GANET/default.properties,"[1, 2, 3, 4, 5, 6, 7, 5, 2, 8, 9, 10, 11, 12, ...",0.414284,0.968549,5.613624,"[51, 44, 35, 31, 139, 59, 22, 192, 50, 68, 172...",0.019325,8046.0,29,31,lfr_5000_mu6_s_0.gml
