In [1]:
%matplotlib inline
import os
import igraph as ig
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt

## Analysis of Genetic Algorithms for Community Detection in Complex Networks

In [2]:
# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("ClusterResults.db")
df = pd.read_sql_query("SELECT * from ClusterResults", con)

# Verify that result of SQL query is stored in the dataframe
con.close()
df.head()

Unnamed: 0,algo,seed,file,average_scores,best_scores,final_score,params_file,membership,nmi,rand,vi
0,gals,12785326941766,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.22289794921875\n0.36231892903645835\n0.44061...,0.512451171875\n0.5643310546875\n0.57702636718...,0.65234375,impl_GALS/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0
1,gals,12785326941766,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.22473347981770833\n0.3542476399739583\n0.427...,0.4285888671875\n0.5560302734375\n0.5732421875...,0.65234375,impl_GALS/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0
2,gals,12785326941766,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.228812255859375\n0.365767822265625\n0.446151...,0.4886474609375\n0.50537109375\n0.60400390625\...,0.65234375,impl_GALS/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0
3,gals,12785326941766,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.22015625\n0.34891927083333335\n0.42802083333...,0.4190673828125\n0.551513671875\n0.59521484375...,0.65234375,impl_GALS/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0
4,gals,12785326941766,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.22197306315104168\n0.34857096354166667\n0.43...,0.4366455078125\n0.5635986328125\n0.6212158203...,0.65234375,impl_GALS/default.properties,"1,2,3,4,2,3,3,4,2,1,2,4,2,1,3,2,4,2,3,2,4,1,3,...",0.0,0.0,0.0


In [3]:
df['membership'] = df['membership'].apply(lambda s: map(int, s.split(',')))
df['best_scores'] = df['best_scores'].apply(lambda s: map(float, s.split('\n')))
df['average_scores'] = df['average_scores'].apply(lambda s: map(float, s.split('\n')))

In [4]:
df['filename'] = df['file'].apply(lambda s: os.path.basename(s))

In [5]:
def getTrueComs(file):
    true_file = file + ".coms"
    if os.path.exists(true_file):
        data = pd.read_csv(true_file, sep='\t', header=None)
        return list(data[1].as_matrix())
    else:
        return []
   

In [6]:
df['true_membership'] = df['file'].apply(lambda s: getTrueComs(s))

## Comparison Metrics

@article{rand1971,
  added-at = {2006-03-21T11:09:44.000+0100},
  author = {Rand, W.M.},
  biburl = {https://www.bibsonomy.org/bibtex/2fd52548cb4bcd8e83dd27e4b55eff1f3/hotho},
  interhash = {1afaf0170bc705a9e49b625f67679ee2},
  intrahash = {fd52548cb4bcd8e83dd27e4b55eff1f3},
  journal = {Journal of the American Statistical Association },
  keywords = {cluster clustering criteria evaluation index rand},
  number = 336,
  pages = {846-850},
  timestamp = {2007-09-18T14:44:34.000+0200},
  title = {Objective criteria for the evaluation of clustering methods},
  volume = 66,
  year = 1971
}



In [7]:
df['nmi'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="nmi"), axis=1)

df['vi'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="vi"), axis=1)

df['rand'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="rand"), axis=1)

df['adjusted_rand'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="adjusted_rand"), axis=1)

df['split_join'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="split-join"), axis=1)

In [8]:
df.head()
def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

In [9]:
def identifyConverge(vals):
    """find the earliest occurance of the maximum value in a list
    """
    best = vals[-1]
    if vals[0] == best:
        return 0
    for i in range(len(vals)):
        if vals[i] == best:
            return i
        

In [10]:
df['converge_gen'] = df['best_scores'].apply(identifyConverge)
df['gens'] = df['best_scores'].apply(len)

In [12]:
gals = df.loc[lambda dd: dd['algo'] == 'gals']
gacd = df.loc[lambda dd: dd['algo'] == 'gacd']
tbga = df.loc[lambda dd: dd['algo'] == 'tasgin']
ganet = df.loc[lambda dd: dd['algo'] == 'ganet']

In [22]:
def highlight_max(s):
    """
    highlight the maximum in a Series yellow.
    """
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

In [25]:
gals_valtable = gals.groupby(['filename','params_file'], sort=False)['nmi'].max().reset_index().pivot('filename', 'params_file', 'nmi').reset_index()
gals_valtable

params_file,filename,default.properties,impl_GALS/default.properties,impl_GALS/large.properties
0,girvan_mu1_0.gml,,1.0,1.0
1,girvan_mu2_0.gml,,1.0,1.0
2,girvan_mu3_0.gml,,1.0,1.0
3,girvan_mu4_0.gml,,1.0,1.0
4,girvan_mu5_0.gml,,0.366633,0.581218
5,girvan_mu6_0.gml,,0.149342,0.144851
6,girvan_mu7_0.gml,,0.086256,0.097474
7,girvan_mu8_0.gml,,0.062592,0.048741
8,lfr_10000_mu3_s_size_0.gml,0.468401,0.469977,0.468929
9,lfr_1000_mu1_b_0.gml,,0.976939,0.992912


In [15]:
with open('gals.tex','w') as tf:
    tf.write(gals_valtable.to_latex())

In [16]:
ganet_valtable = ganet.groupby(['filename','params_file'], sort=False)['nmi'].mean().reset_index().pivot('filename', 'params_file', 'nmi')
ganet_valtable

params_file,impl_GANET/balanced_c_m.properties,impl_GANET/default.properties,impl_GANET/high_elite.properties,impl_GANET/large.properties,impl_GANET/low_cross_high_m.properties
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dolphins.gml,,,,,
football.gml,,0.923358,,0.930813,
girvan_mu1_0.gml,,1.0,,1.0,
girvan_mu2_0.gml,,1.0,,1.0,
girvan_mu3_0.gml,,0.035957,,0.241103,
girvan_mu4_0.gml,,0.0,,0.0,
girvan_mu5_0.gml,,0.0,,0.0,
girvan_mu6_0.gml,,0.0,,0.0,
girvan_mu7_0.gml,,0.0,,0.0,
girvan_mu8_0.gml,,0.0,,0.0,


In [17]:
with open('ganet.tex','w') as tf:
    tf.write(ganet_valtable.to_latex())

In [18]:
gacd_valtable = gacd.groupby(['filename','params_file'], sort=False)['nmi'].mean().reset_index().pivot('filename', 'params_file', 'nmi')
gacd_valtable

params_file,impl_GACD/default.properties,impl_GACD/large.properties,impl_GACD/low_mut_high_cross.properties
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
dolphins.gml,,,
football.gml,0.777141,0.830657,0.812103
girvan_mu1_0.gml,1.0,1.0,1.0
girvan_mu2_0.gml,0.979866,0.971664,0.989942
girvan_mu3_0.gml,0.805293,0.798257,0.824674
girvan_mu4_0.gml,0.52348,0.523709,0.55138
girvan_mu5_0.gml,0.152782,0.198201,0.172146
girvan_mu6_0.gml,0.07853,0.089735,0.092851
girvan_mu7_0.gml,0.061151,0.040563,0.045473
girvan_mu8_0.gml,0.042013,0.028933,0.030697


In [19]:
with open('gacd.tex','w') as tf:
    tf.write(ganet_valtable.to_latex())

In [20]:
tbga_valtable = tbga.groupby(['filename','params_file'], sort=False)['nmi'].mean().reset_index().pivot('filename', 'params_file', 'nmi')
tbga_valtable

params_file,impl_TasginGA/default.properties,impl_TasginGA/high_elite.properties,impl_TasginGA/high_init.properties,impl_TasginGA/high_mut.properties,impl_TasginGA/large.properties
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dolphins.gml,,,,,
football.gml,0.741184,0.715898,0.739433,0.741707,0.791795
girvan_mu1_0.gml,1.0,1.0,1.0,1.0,1.0
girvan_mu2_0.gml,0.990121,0.938123,0.977949,0.964862,1.0
girvan_mu3_0.gml,0.901963,0.78012,0.879055,0.896621,1.0
girvan_mu4_0.gml,0.624702,0.506629,0.602239,0.578908,0.830322
girvan_mu5_0.gml,0.147785,0.160739,0.200079,0.17954,0.26086
girvan_mu6_0.gml,0.076576,0.061667,0.076489,0.085948,0.083643
girvan_mu7_0.gml,0.044218,0.050555,0.028483,0.031372,0.031204
girvan_mu8_0.gml,0.02661,0.037776,0.026474,0.029247,0.019529


In [21]:
with open('tasgin.tex','w') as tf:
    tf.write(tbga_valtable.to_latex())

In [None]:
def 