In [141]:
%matplotlib inline
import os
import igraph as ig
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt

## Analysis of Genetic Algorithms for Community Detection in Complex Networks

In [142]:
# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect("ClusterResults.db")
df = pd.read_sql_query("SELECT * from ClusterResults", con)

# Verify that result of SQL query is stored in the dataframe
con.close()
df.head()

Unnamed: 0,algo,seed,file,average_scores,best_scores,final_score,params_file,membership,nmi,rand,vi
0,gals,13369360549290,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.44009612959866123\n0.5461974760784992\n0.605...,0.5421854674251742\n0.6269896726720868\n0.6793...,0.8174163816764717,impl_GALS/default.properties,"1,2,3,1,2,4,5,6,2,6,7,8,9,10,11,5,12,6,13,14,1...",0.0,0.0,0.0
1,gals,13369360549290,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.44102171729926565\n0.5457098792826734\n0.609...,0.5605900188715776\n0.6435192651567331\n0.6994...,0.8183975072639841,impl_GALS/default.properties,"1,2,3,1,2,4,5,6,2,6,7,8,9,10,11,5,12,6,13,14,1...",0.0,0.0,0.0
2,gals,13369360549290,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.4402410463825556\n0.5448017280194865\n0.6035...,0.5558795911697045\n0.6299240468040519\n0.6866...,0.817829138322348,impl_GALS/default.properties,"1,2,3,1,2,4,5,6,2,6,7,8,9,10,11,5,12,6,13,14,1...",0.0,0.0,0.0
3,gals,13369360549290,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.4390068762765545\n0.545385871564504\n0.60710...,0.5386790824759569\n0.6246539036437487\n0.6933...,0.8119665565380684,impl_GALS/default.properties,"1,2,3,1,2,4,5,6,2,6,7,8,9,10,11,5,12,6,13,14,1...",0.0,0.0,0.0
4,gals,13369360549290,D:/alien-pineapple/benchmark_gen/gml_files/ben...,0.43838356561266134\n0.5446626617100198\n0.605...,0.5284876212325604\n0.6271894762815291\n0.7049...,0.8169301644245596,impl_GALS/default.properties,"1,2,3,1,2,4,5,6,2,6,7,8,9,10,11,5,12,6,13,14,1...",0.0,0.0,0.0


In [143]:
df['membership'] = df['membership'].apply(lambda s: map(int, s.split(',')))
df['best_scores'] = df['best_scores'].apply(lambda s: map(float, s.split('\n')))
df['average_scores'] = df['average_scores'].apply(lambda s: map(float, s.split('\n')))

In [144]:
df['filename'] = df['file'].apply(lambda s: os.path.basename(s))

In [145]:
def getTrueComs(file):
    true_file = file + ".coms"
    if os.path.exists(true_file):
        data = pd.read_csv(true_file, sep='\t', header=None)
        return list(data[1].as_matrix())
    else:
        return []
   

In [146]:
df['true_membership'] = df['file'].apply(lambda s: getTrueComs(s))

## Comparison Metrics

@article{rand1971,
  added-at = {2006-03-21T11:09:44.000+0100},
  author = {Rand, W.M.},
  biburl = {https://www.bibsonomy.org/bibtex/2fd52548cb4bcd8e83dd27e4b55eff1f3/hotho},
  interhash = {1afaf0170bc705a9e49b625f67679ee2},
  intrahash = {fd52548cb4bcd8e83dd27e4b55eff1f3},
  journal = {Journal of the American Statistical Association },
  keywords = {cluster clustering criteria evaluation index rand},
  number = 336,
  pages = {846-850},
  timestamp = {2007-09-18T14:44:34.000+0200},
  title = {Objective criteria for the evaluation of clustering methods},
  volume = 66,
  year = 1971
}



In [147]:
df['nmi'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="nmi"), axis=1)

df['vi'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="vi"), axis=1)

df['rand'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="rand"), axis=1)

df['adjusted_rand'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="adjusted_rand"), axis=1)

df['split_join'] = df[['membership', 'true_membership']].apply(lambda s: 
                                                        None if s[1] == [] else 
                                                        ig.compare_communities(s[0], s[1], method="split-join"), axis=1)

In [148]:
df.head()
def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

In [149]:
def identifyConverge(vals):
    """find the earliest occurance of the maximum value in a list
    """
    best = vals[-1]
    if vals[0] == best:
        return 0
    for i in range(len(vals)):
        if vals[i] == best:
            return i
        

In [150]:
df['converge_gen'] = df['best_scores'].apply(identifyConverge)
df['gens'] = df['best_scores'].apply(len)

In [151]:
gals = df.loc[lambda dd: dd['algo'] == 'gals']
gacd = df.loc[lambda dd: dd['algo'] == 'gacd']
tbga = df.loc[lambda dd: dd['algo'] == 'tasgin']
ganet = df.loc[lambda dd: dd['algo'] == 'ganet']

In [152]:
def highlight_max(s):
    """
    highlight the maximum in a Series yellow.
    """
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

In [153]:
gals_valtable = gals.groupby(['filename','params_file'], sort=False)['nmi'].max().reset_index().pivot('filename', 'params_file', 'nmi').reset_index()
gals_valtable

params_file,filename,default.properties,impl_GALS/default.properties,impl_GALS/large.properties
0,girvan_kout_10_0.gml,,0.104429,0.100112
1,girvan_kout_11_0.gml,,0.122744,0.084893
2,girvan_kout_12_0.gml,,0.105214,0.079567
3,girvan_kout_13_0.gml,,0.065108,0.060205
4,girvan_kout_14_0.gml,,0.06138,0.045216
5,girvan_kout_15_0.gml,,0.058146,0.046489
6,girvan_kout_1_0.gml,,1.0,1.0
7,girvan_kout_2_0.gml,,1.0,1.0
8,girvan_kout_3_0.gml,,1.0,1.0
9,girvan_kout_4_0.gml,,1.0,1.0


In [154]:
with open('gals.tex','w') as tf:
    tf.write(gals_valtable.to_latex())

In [155]:
ganet_valtable = ganet.groupby(['filename','params_file'], sort=False)['nmi'].mean().reset_index().pivot('filename', 'params_file', 'nmi')
ganet_valtable

params_file,impl_GANET/balanced_c_m.properties,impl_GANET/default.properties,impl_GANET/high_elite.properties,impl_GANET/high_r.properties,impl_GANET/higher_r.properties,impl_GANET/large.properties,impl_GANET/large_high_r.properties,impl_GANET/low_cross_high_m.properties
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
dolphins.gml,,,,,,,,
football.gml,,0.923358,,,,0.930813,,
girvan_kout_10_0.gml,0.0,0.0,0.0,0.01012,0.039699,0.0,0.040699,0.0
girvan_kout_11_0.gml,0.0,0.0,0.0,0.006953,0.030228,0.0,0.032533,0.0
girvan_kout_12_0.gml,0.0,0.0,0.0,0.01066,0.026523,0.0,0.026896,0.0
girvan_kout_13_0.gml,0.0,0.0,0.0,0.004973,0.027051,0.0,0.016928,0.0
girvan_kout_14_0.gml,0.0,0.0,0.0,0.008069,0.015506,0.0,0.012643,0.0
girvan_kout_15_0.gml,0.0,0.0,0.0,0.006643,0.016396,0.0,0.013813,0.0
girvan_kout_1_0.gml,0.997483,1.0,1.0,1.0,1.0,1.0,1.0,0.966106
girvan_kout_2_0.gml,0.92269,1.0,1.0,1.0,1.0,1.0,1.0,0.796509


In [156]:
with open('ganet.tex','w') as tf:
    tf.write(ganet_valtable.to_latex())

In [157]:
gacd_valtable = gacd.groupby(['filename','params_file'], sort=False)['nmi'].mean().reset_index().pivot('filename', 'params_file', 'nmi')
gacd_valtable

params_file,impl_GACD/default.properties,impl_GACD/large.properties,impl_GACD/low_mut_high_cross.properties
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
dolphins.gml,,,
football.gml,0.777141,0.830657,0.812103
girvan_kout_10_0.gml,0.072171,0.066901,0.070786
girvan_kout_11_0.gml,0.063717,0.053934,0.058126
girvan_kout_12_0.gml,0.048616,0.050932,0.047021
girvan_kout_13_0.gml,0.041322,0.033183,0.036745
girvan_kout_14_0.gml,0.028722,0.02952,0.03277
girvan_kout_15_0.gml,0.028905,0.020916,0.029972
girvan_kout_1_0.gml,1.0,1.0,1.0
girvan_kout_2_0.gml,1.0,1.0,1.0


In [158]:
with open('gacd.tex','w') as tf:
    tf.write(ganet_valtable.to_latex())

In [185]:
tbga_valtable = tbga.groupby(['filename','params_file'], sort=False)['nmi'].mean().reset_index().pivot('filename', 'params_file', 'nmi')
tbga_valtable

params_file,impl_TasginGA/default.properties,impl_TasginGA/high_elite.properties,impl_TasginGA/high_init.properties,impl_TasginGA/high_mut.properties,impl_TasginGA/large.properties
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dolphins.gml,,,,,
football.gml,0.741184,0.715898,0.739433,0.741707,0.791795
girvan_kout_10_0.gml,0.058013,0.053976,0.057011,0.062516,0.038257
girvan_kout_11_0.gml,0.046586,0.043523,0.036791,0.041119,0.038737
girvan_kout_12_0.gml,0.037238,0.039944,0.032293,0.044231,0.028585
girvan_kout_13_0.gml,0.034454,0.03144,0.028707,0.035418,0.018297
girvan_kout_14_0.gml,0.031786,0.031933,0.026173,0.029513,0.015523
girvan_kout_15_0.gml,0.022816,0.030573,0.023768,0.021258,0.012853
girvan_kout_1_0.gml,1.0,1.0,1.0,1.0,1.0
girvan_kout_2_0.gml,1.0,0.997523,1.0,1.0,1.0


In [160]:
with open('tasgin.tex','w') as tf:
    tf.write(tbga_valtable.to_latex())

In [161]:
plot_data = df[['file', 'true_membership']]
plot_data['true_membership'] = plot_data['true_membership'].apply(lambda s: ' '.join(str(e) for e in s))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [162]:
plot_data = plot_data.drop_duplicates()

In [163]:
for x, y in zip(plot_data['file'], plot_data['true_membership']):
    if "lfr" not in x and "dolphins" not in x:
        print(x)
        y = [int(i) for i in y.split(" ") if i != '']
        print(len(y))
        g = ig.Graph.Read(x, format='gml')
        coms = ig.VertexClustering(g, membership=y)
        plt = ig.plot(coms, mark_groups = True, vertex_size=2)
        plt.save(x.split('/')[-1]+".png")
        print("saved")

D:/alien-pineapple/benchmark_gen/gml_files/real_networks/football.gml
115
saved
D:/alien-pineapple/benchmark_gen/gml_files/real_networks/karate.gml
34
saved
D:/alien-pineapple/benchmark_gen/gml_files/benchmarks/gn_kout/girvan_kout_10_0.gml
128
saved
D:/alien-pineapple/benchmark_gen/gml_files/benchmarks/gn_kout/girvan_kout_11_0.gml
128
saved
D:/alien-pineapple/benchmark_gen/gml_files/benchmarks/gn_kout/girvan_kout_12_0.gml
128
saved
D:/alien-pineapple/benchmark_gen/gml_files/benchmarks/gn_kout/girvan_kout_13_0.gml
128
saved
D:/alien-pineapple/benchmark_gen/gml_files/benchmarks/gn_kout/girvan_kout_14_0.gml
128
saved
D:/alien-pineapple/benchmark_gen/gml_files/benchmarks/gn_kout/girvan_kout_15_0.gml
128
saved
D:/alien-pineapple/benchmark_gen/gml_files/benchmarks/gn_kout/girvan_kout_1_0.gml
128
saved
D:/alien-pineapple/benchmark_gen/gml_files/benchmarks/gn_kout/girvan_kout_2_0.gml
128
saved
D:/alien-pineapple/benchmark_gen/gml_files/benchmarks/gn_kout/girvan_kout_3_0.gml
128
saved
D:/alien-

In [179]:
def plot_graph(filename):
    g = ig.Graph.Read(filename, format='gml')
    membership = getTrueComs(filename)
    coms = ig.VertexClustering(g, membership=membership)
    plt = ig.plot(coms, mark_groups = True, vertex_size=0, bbox=(20000,20000))
    imagename = filename.split("\\")[-1]
    imagename = imagename.split(".")[0]
    print(imagename)
    plt.save(imagename+".png")

In [181]:
plot_graph('D:\\alien-pineapple\\benchmark_gen\\gml_files\\benchmarks\\lfr2\\n1000b\\lfr_1000_mu3_b_0.gml')

lfr_1000_mu3_b_0


In [182]:
plot_graph('D:\\alien-pineapple\\benchmark_gen\\gml_files\\benchmarks\\lfr2\\n1000b\\lfr_1000_mu4_b_0.gml')

lfr_1000_mu4_b_0


In [183]:
plot_graph('D:\\alien-pineapple\\benchmark_gen\\gml_files\\benchmarks\\lfr2\\n1000b\\lfr_1000_mu5_b_0.gml')

lfr_1000_mu5_b_0


In [184]:
plot_graph('D:\\alien-pineapple\\benchmark_gen\\gml_files\\benchmarks\\lfr2\\n1000b\\lfr_1000_mu6_b_0.gml')

lfr_1000_mu6_b_0
