In [148]:
import pandas as pd
import numpy as np
import scipy.io 

%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [149]:
%%R 

suppressPackageStartupMessages({
    library(cowplot)
    library(LaCroixColoR)
    library(pals)
    library(see)
    library(tidyverse)
    theme_set(theme_cowplot())
})

options(dplyr.width = Inf)

In [150]:
all_OCDA_computation_times = scipy.io.loadmat("../data/ENMI_Results/all_OCDA_computation_times.mat")['all_OCDA_computation_times'].T
method_columns = ['Infomap', 'SLPA', 'OSLOM_10', 'OSLOM_20', 'OSLOM_30', 'OSLOM_40', 'OSLOM_50', 'OSLOM_60', 'OSLOM_70', 'OSLOM_80', 'OSLOM_90', 'OSLOM_100', 'Clique_3', 'Clique_4', 'Clique_5', 'Clique_6', 'Clique_7', 'Clique_9', 'NNMF_10', 'NNMF_20', 'NNMF_30', 'NNMF_40', 'Louvain']


all_OCDA_computation_times_df = pd.DataFrame(all_OCDA_computation_times, columns=method_columns)

In [151]:
# Convert to python; method_categories <- c("Infomap", "SLPA", rep("OSLOM", 10), rep("Clique", 6), rep("NNMF", 4), "Louvain")
method_categories = ['Infomap', 'SLPA'] + ['OSLOM'] * 10 + ['Clique'] * 6 + ['NNMF'] * 4 + ['Louvain']
method_category_df = pd.DataFrame({'method': method_columns,
                                   'method_category': method_categories})

mean_method_category_computation_times = (pd.DataFrame(all_OCDA_computation_times_df, columns=method_columns)
                .assign(network_number=range(1, len(all_OCDA_computation_times_df) + 1))
                .melt(id_vars='network_number', var_name='method', value_name='Computation_Time')
                .merge(method_category_df, on='method', how='left')
                .groupby(['method_category'])['Computation_Time']
                # .describe()
                .describe()
                .reset_index()
                [['method_category', '50%', '25%', '75%']]
                .rename(columns={'index': 'method', '25%': 'Q1', '50%': 'Median_CompTime', '75%': 'Q3'})
                .assign(IQR_CompTime = lambda x: x['Q3'] - x['Q1'])
                [['method_category', 'Median_CompTime', 'IQR_CompTime']]
                .reset_index()
)

mean_method_computation_times = (pd.DataFrame(all_OCDA_computation_times_df, columns=method_columns)
                .assign(network_number=range(1, len(all_OCDA_computation_times_df) + 1))
                .melt(id_vars='network_number', var_name='method', value_name='Computation_Time')
                .merge(method_category_df, on='method', how='left')
                .groupby(['method', 'method_category'])['Computation_Time']
                # .describe()
                .describe()
                .reset_index()
                [['method', 'method_category', '50%', '25%', '75%']]
                .rename(columns={'index': 'method', '25%': 'Q1', '50%': 'Median_CompTime', '75%': 'Q3'})
                .assign(IQR_CompTime = lambda x: x['Q3'] - x['Q1'])
                [['method', 'method_category', 'Median_CompTime', 'IQR_CompTime']]
                .reset_index()
)


In [152]:
(mean_method_category_computation_times
 .assign(Median_CompTime = lambda x: round(x['Median_CompTime'], 3),
         IQR_CompTime = lambda x: round(x['IQR_CompTime'], 3))
 .sort_values(by='Median_CompTime', ascending=True)
)

Unnamed: 0,index,method_category,Median_CompTime,IQR_CompTime
2,2,Louvain,0.002,0.001
5,5,SLPA,0.042,0.006
1,1,Infomap,0.384,0.117
0,0,Clique,0.674,0.401
3,3,NNMF,1.149,0.091
4,4,OSLOM,12.555,5.994


In [158]:
ENMI_res = scipy.io.loadmat("../data/ENMI_Results/all_benchmark_OCDA_ENMI.mat")['all_ENMI_res'].T
method_columns = ['Infomap', 'SLPA', 'OSLOM_10', 'OSLOM_20', 'OSLOM_30', 'OSLOM_40', 'OSLOM_50', 'OSLOM_60', 'OSLOM_70', 'OSLOM_80', 'OSLOM_90', 'OSLOM_100', 'Clique_3', 'Clique_4', 'Clique_5', 'Clique_6', 'Clique_7', 'Clique_9', 'NNMF_10', 'NNMF_20', 'NNMF_30', 'NNMF_40', 'Louvain']
ENMI_res_df = pd.DataFrame(ENMI_res, columns=method_columns)

# Compute median and IQR for each column of ENMI_new_subset_df
ENMI_summary = (ENMI_res_df
                .describe()
                .T[['25%', '50%', '75%']]
                .reset_index()
                .rename(columns={'index': 'method', '25%': 'Q1', '50%': 'Median_ENMI', '75%': 'Q3'})
                .assign(IQR_ENMI = lambda x: x['Q3'] - x['Q1'])
                [['method', 'Median_ENMI', 'IQR_ENMI']]
                .sort_values(by='Median_ENMI', ascending=False)
                .merge(method_category_df, on='method', how='left')
                .reset_index()
                )

ENMI_comptime_merged = pd.merge(mean_method_computation_times.drop(columns=['index']), 
                                ENMI_summary.drop(columns=['index']), 
                                on=['method', 'method_category'], how='left')

ENMI_comptime_merged.head()

Unnamed: 0,method,method_category,Median_CompTime,IQR_CompTime,Median_ENMI,IQR_ENMI
0,Clique_3,Clique,0.74437,0.467748,0.546586,0.372767
1,Clique_4,Clique,0.678397,0.411051,0.58824,0.315384
2,Clique_5,Clique,0.657044,0.404896,0.635982,0.210669
3,Clique_6,Clique,0.653352,0.382673,0.674559,0.193023
4,Clique_7,Clique,0.652044,0.386204,0.688819,0.189702


In [161]:
%%R -i ENMI_comptime_merged

lacroix_colors <- lacroix_palette("PassionFruit", n=6, type = "discrete")

# Merge the two dataframes
ENMI_comptime_merged %>%
          mutate(Median_CompTime = log10(Median_CompTime),
                 IQR_CompTime = log10(IQR_CompTime)) %>%
    ggplot(data=., mapping=aes(x=Median_CompTime, y=Median_ENMI, color=method_category)) +
      # Horizontal bar (x variance)
      geom_segment(aes(x = Median_CompTime-IQR_CompTime, xend = Median_CompTime+IQR_CompTime, 
                       y = Median_ENMI, yend = Median_ENMI),
                   linewidth = 1, alpha=0.3) +
      # Vertical bar (y variance)
      geom_segment(aes(x = Median_CompTime, xend = Median_CompTime, 
                       y = Median_ENMI - IQR_ENMI, yend = Median_ENMI + IQR_ENMI), 
                   linewidth = 1, alpha=0.3) +
      # Central point (mean)
      geom_point(aes(x = Median_CompTime, y = Median_ENMI), size = 2.5) +
      ylab("ENMI across simulated networks") +
      xlab("log(Computation time in seconds)") +
      theme(legend.position='bottom') +
      scale_color_manual(values=c("OSLOM"=lacroix_colors[1], 
                            "Clique"=lacroix_colors[3],
                            "NNMF"=lacroix_colors[4],
                            "Louvain" =lacroix_colors[2],
                            "Infomap"=lacroix_colors[5],
                            "SLPA"=lacroix_colors[6])) 
# ggsave("../plots/benchmark_evaluation/OCDA_computation_time_vs_ENMI.svg", width=4.25, height=4, units='in', dpi=300)