In [None]:
import os
import glob
from scipy.stats import pearsonr, spearmanr, kendalltau
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("ticks")

In [None]:
def scatterplots(plots_dir:str,results_file,x:str,y:str,x_lab:str,y_lab:str,plot_suf:str,xy = False,corrs = True):
    os.makedirs(plots_dir, exist_ok = True)
    if type(results_file) == list:
        df = pd.read_csv(results_file[0])
        df_to_merge = pd.read_csv(results_file[1])
        df = df.merge(df_to_merge,on=["graph","emb","dim","param"])
    else:
        df = pd.read_csv(results_file)
    df.loc[df.param.astype(str) == '1','param'] = 'p=1.0,q=1.0'
    df.loc[df.param.astype(str) == '9','param'] = 'p=9.0,q=0.11'
    df.loc[df.param.astype(str) == '1.0','param'] = 'p=1.0,q=1.0'
    df.loc[df.param.astype(str) == '9.0','param'] = 'p=9.0,q=0.11'
    df.loc[df.param.astype(str) == '0.11','param'] = 'p=0.11,q=9.0'
    for graph in ["sbm10k","lfr10k","nlfr10k","email"]:
        for emb in ["hope","n2v"]:
            plt.clf()
            mask = (df.graph == graph) & (df.emb == emb)
            if emb == "hope":
                order = ["ppr","katz","aa"]
            else:
                order = ["p=0.11,q=9.0","p=1.0,q=1.0","p=9.0,q=0.11"]
            ax = sns.scatterplot(x=x,y=y,hue="param",hue_order=order,size="dim",data=df[mask])
            if xy:
                xp = ax.get_xlim()
                ax.plot(xp,xp, color="gray",linestyle='--')
            display_graph = "noisy LFR10k" if graph == "nlfr10k" else graph.upper()
            ax.set_title(f"{display_graph} graph, {emb.upper()} embeddings")
            handles, labels = ax.get_legend_handles_labels()
            ax.legend(handles=handles[:6]+[handles[-1]], labels=["Param",*order,"Dim",2,32])
            ax.set_xlabel(x_lab)
            ax.set_ylabel(y_lab)
            corr_val = round(pearsonr(df[x][mask],df[y][mask])[0],2)
            ax.get_figure().text(0.64, 0.15, f"Pearson's corr: {corr_val}", ha ='left', fontsize = 12);
            ax.get_figure().savefig(f'{plots_dir}/{graph}_{emb}_{plot_suf}.eps' , dpi=400)
    if corrs:
        print("Graph-Embedding Pearson Spearman Kendall-Tau")
        for graph in ["sbm10k","lfr10k","nlfr10k","email"]:
            for emb in ["hope","n2v"]:
                mask = (df.graph == graph) & (df.emb == emb)
                x1 = df[x][mask]
                x2 = df[y][mask]
                print(graph.upper()+"-"+emb.upper(),round(pearsonr(x1,x2)[0],2), \
                round(spearmanr(x1,x2)[0],2), \
                round(kendalltau(x1,x2)[0],2))
    return df

In [None]:
def normalized_plots(plots_dir:str,results_file:list,size:list,title_suf:str,plot_suf:str,alpha:float):
    os.makedirs(plots_dir, exist_ok = True)
    if type(results_file) == list:
        df = pd.read_csv(results_file[0])
        df_to_merge = pd.read_csv(results_file[1])
        df = df.merge(df_to_merge,on=["graph","emb","dim","param"])
    else:
        df = pd.read_csv(results_file)
    merged.loc[merged.param.astype(str) == '1','param'] = 'p=1.0,q=1.0'
    merged.loc[merged.param.astype(str) == '9','param'] = 'p=9.0,q=0.11'
    merged.loc[merged.param.astype(str) == '1.0','param'] = 'p=1.0,q=1.0'
    merged.loc[merged.param.astype(str) == '9.0','param'] = 'p=9.0,q=0.11'
    merged.loc[merged.param.astype(str) == '0.11','param'] = 'p=0.11,q=9.0'
    min_scores = merged[['graph','best_div','best_auc']].groupby(['graph',]).min().reset_index().rename(columns={'best_div':'min_div','best_auc':'min_auc'})
    merged_min = merged.merge(min_scores,on=["graph"])
    merged_min['normalized_div'] = merged_min.best_div/merged_min.min_div
    merged_min['normalized_auc'] = merged_min.best_auc/merged_min.min_auc
    for graph in ["sbm10k","lfr10k","nlfr10k","email"]:
            plt.clf()
            mask = (merged_min.graph == graph)
            ax = sns.scatterplot(x='normalized_auc',y='normalized_div',hue="emb",hue_order=["hope","n2v"],size=size[0],sizes=(5, 150), alpha=alpha,data=merged_min[mask])
            display_graph = "noisy LFR10k" if graph == "nlfr10k" else graph.upper()
            ax.set_title(f"{display_graph} graph{title_suf}")
            ax.set_xlabel("Normalized local score")
            ax.set_ylabel("Normalized global score")
            handles, labels = ax.get_legend_handles_labels()
            ax.legend(handles=handles[:5]+[handles[-1]], labels=["Embedding","HOPE","N2V",size[1],labels[-3],labels[-1]])
            ax.get_figure().savefig(f'{plots_dir}/{graph}_{plot_suf}.pdf' , dpi=400)

In [None]:
def correlations(merged:pd.DataFrame, x:str):
    print("Graph-Embedding Pearson Pearson Spearman Spearman Kendall-Tau Kendall-Tau")
    for graph in ["sbm10k","lfr10k","nlfr10k","email"]:
        for emb in ["hope","n2v"]:
            pear_div = []
            pear_auc = []
            spear_div = []
            spear_auc = []
            kend_div = []
            kend_auc = []
            for param in ["ppr","katz","aa"]:
                if emb == "n2v":
                    param = {"ppr":'p=1.0,q=1.0',"katz":'p=9.0,q=0.11',"aa":'p=0.11,q=9.0'}[param]
                mask = (merged.graph == graph) & (merged.emb == emb) & (merged.param == param)
                x1 = merged[x][mask]
                y = merged.best_div[mask]
                z = merged.best_auc[mask]
                pear_div.append(pearsonr(x1,y)[0])
                pear_auc.append(pearsonr(x1,z)[0])
                spear_div.append(spearmanr(x1,y)[0])
                spear_auc.append(spearmanr(x1,z)[0])
                kend_div.append(kendalltau(x1,y)[0])
                kend_auc.append(kendalltau(x1,z)[0])
            print(graph.upper()+"-"+emb.upper(),round(np.mean(pear_div),2), \
            round(np.mean(pear_auc),2), \
            round(np.mean(spear_div),2), \
            round(np.mean(spear_auc),2), \
            round(np.mean(kend_div),2), \
            round(np.mean(kend_auc),2))

# Exact scores

In [None]:
df = scatterplots("exp_exact",'results/exact_scores.csv',"best_div","best_auc",
             "Global score","Local score","div_auc")

# Approximate and exact scores

In [None]:
df = scatterplots("exp_approx_div",['results/exact_scores.csv','results/approximate_scores.csv'],
             "best_div_x","best_div_y","Exact global score","Approx. global score","div",True)

In [None]:
df = scatterplots("exp_approx_auc",['results/exact_scores.csv','results/approximate_scores.csv'],
             "best_auc_x","best_auc_y","Exact local score","Approx. local score","auc",True)

# Correlation and ratio

In [None]:
plots_dir = "exp_landmarks_corr"
os.makedirs(plots_dir,exist_ok=True)
df = pd.read_csv('results/exact_scores.csv')
df['param'] = df['param'].astype(str)
exp_land_df = pd.read_csv("results/correlation_ratio.csv")
merged = df.merge(exp_land_df,on=["graph","emb","dim","param"])
merged['auc_ratio'] = np.minimum(merged.best_auc_x/merged.best_auc_y,merged.best_auc_y/merged.best_auc_x)
merged['div_ratio'] = np.minimum(merged.best_div_x/merged.best_div_y,merged.best_div_y/merged.best_div_x)
merged.loc[merged.param == '1','param'] = 'p=1.0,q=1.0'
merged.loc[merged.param == '9','param'] = 'p=9.0,q=0.11'
merged.loc[merged.param == '0.11','param'] = 'p=0.11,q=9.0'
corrs_auc = (merged[['best_auc_x', 'graph','landmarks_y','emb']]
        .groupby(['graph','emb','landmarks_y'])
        .corrwith(merged.best_auc_y)
        .rename(columns={'best_auc_x' : 'Corr_Coef'}))
corrs_auc = corrs_auc.reset_index()
corrs_div = (merged[['best_div_x', 'graph','landmarks_y','emb']]
        .groupby(['graph','emb','landmarks_y'])
        .corrwith(merged.best_div_y)
        .rename(columns={'best_div_x' : 'Corr_Coef'}))
corrs_div = corrs_div.reset_index()
for graph in ["sbm10k","lfr10k","nlfr10k"]:
    for emb in ["hope","n2v"]:
        ## Ratio div
        plt.clf()
        mask = (merged.graph == graph) & (merged.emb == emb)
        ax = sns.lineplot(x="landmarks_y",y="div_ratio",data=merged[mask])
        display_graph = "noisy LFR10k" if graph == "nlfr10k" else graph.upper()
        ax.set_title(f"{display_graph} graph, {emb.upper()} embeddings")
        ax.set_xlabel("No. landmarks")
        ax.set_ylabel("Global score ratio")
        ax.plot(ax.get_xlim(),(1,1),color="gray",linestyle="--")
        ax.get_figure().savefig(f'{plots_dir}/{graph}_{emb}_ratio_div.eps' , dpi=400)
        ## Ratio AUC
        plt.clf()
        mask = (merged.graph == graph) & (merged.emb == emb)
        ax = sns.lineplot(x="landmarks_y",y="auc_ratio",data=merged[mask])
        display_graph = "noisy LFR10k" if graph == "nlfr10k" else graph.upper()
        ax.set_title(f"{display_graph} graph, {emb.upper()} embeddings")
        ax.set_xlabel("No. landmarks")
        ax.set_ylabel("Local score ratio")
        ax.plot(ax.get_xlim(),(1,1),color="gray",linestyle="--")
        ax.get_figure().savefig(f'{plots_dir}/{graph}_{emb}_ratio_auc.eps' , dpi=400)
        ## Correlation div
        plt.clf()
        mask = (corrs_div.graph == graph) & (corrs_div.emb == emb)
        ax = sns.lineplot(x="landmarks_y",y="Corr_Coef",data=corrs_div[mask])
        display_graph = "noisy LFR10k" if graph == "nlfr10k" else graph.upper()
        ax.set_title(f"{display_graph} graph, {emb.upper()} embeddings")
        ax.set_xlabel("No. landmarks")
        ax.set_ylabel("Correlation of global score")
        ax.plot(ax.get_xlim(),(1,1),color="gray",linestyle="--")
        ax.get_figure().savefig(f'{plots_dir}/{graph}_{emb}_corr_div.eps' , dpi=400)
        ## Correlation AUC
        plt.clf()
        mask = (corrs_auc.graph == graph) & (corrs_auc.emb == emb)
        ax = sns.lineplot(x="landmarks_y",y="Corr_Coef",data=corrs_auc[mask])
        display_graph = "noisy LFR10k" if graph == "nlfr10k" else graph.upper()
        ax.set_title(f"{display_graph} graph, {emb.upper()} embeddings")
        ax.set_xlabel("No. landmarks")
        ax.set_ylabel("Correlation of local score")
        ax.plot(ax.get_xlim(),(1,1),color="gray",linestyle="--")
        ax.get_figure().savefig(f'{plots_dir}/{graph}_{emb}_corr_auc.eps' , dpi=400)

In [None]:
for emb in ["hope","n2v"]:
    ## Correlation div
    plt.clf()
    mask = corrs_div.emb == emb
    ax = sns.lineplot(x="landmarks_y",y="Corr_Coef",hue=corrs_div[mask].graph,data=corrs_div[mask],style="graph",
    markers="h"*3, dashes=False)
    ax.set_title(f"{emb.upper()} embeddings")
    ax.set_xlabel("No. landmarks")
    ax.set_ylabel("Correlation of global score")
    ax.plot(ax.get_xlim(),(1,1),color="gray",linestyle="--")
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles=handles, labels=["LFR10k","nLFR10k","SBM10k"])
    ax.get_figure().savefig(f'{plots_dir}/{graph}_{emb}_corr_div_per_emb.eps' , dpi=400)
    # Correlation AUC
    plt.clf()
    ax = sns.lineplot(x="landmarks_y",y="Corr_Coef",hue=corrs_div[mask].graph,data=corrs_auc[mask],style="graph",
    markers="h"*3, dashes=False)
    ax.set_title(f"{emb.upper()} embeddings")
    ax.set_xlabel("No. landmarks")
    ax.set_ylabel("Correlation of local score")
    ax.plot(ax.get_xlim(),(1,1),color="gray",linestyle="--")
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles=handles, labels=["LFR10k","nLFR10k","SBM10k"])
    ax.get_figure().savefig(f'{plots_dir}/{emb}_corr_auc_per_emb.eps' , dpi=400)

# Community detection

In [None]:
merged = scatterplots("exp_comm_detect",['results/exact_scores.csv','results/comm_detection.csv'],
             "best_auc","accuracy","Exact local score","Accuracy","auc")

In [None]:
merged = scatterplots("exp_comm_detect",['results/exact_scores.csv','results/comm_detection.csv'],
             "best_div","accuracy","Exact global score","Accuracy","div")

In [None]:
correlations(merged, "accuracy")

In [None]:
normalized_plots("exp_normalized_scores",['results/exact_scores.csv','results/comm_detection.csv'],
                 ['accuracy','Accuracy'],", accuracy in community detection","acc",0.1)

# Clustering

In [None]:
merged = scatterplots("exp_clustering",['results/exact_scores.csv','results/clustering.csv'],
             "best_auc","ami","Exact local score","AMI","auc",False)

In [None]:
merged = scatterplots("exp_clustering",['results/exact_scores.csv','results/clustering.csv'],
             "best_div","ami","Exact global score","AMI","div",False)

In [None]:
correlations(merged, "ami")

In [None]:
normalized_plots("exp_normalized_scores",['results/exact_scores.csv','results/clustering.csv'],
                 ['ami','AMI'],", AMI in clustering","ami",0.1)

# Link prediction

In [None]:
ls = glob.glob('linkpred_results/*')
dfs = [pd.read_csv(l) for l in ls]
df = pd.concat(dfs)
df.to_csv("results/link_prediction.csv",index=False)

In [None]:
merged = scatterplots("exp_link_prediction",'results/link_prediction.csv',
             "best_auc","auc","Exact local score","AUC","auc")

In [None]:
merged = scatterplots("exp_link_prediction",'results/link_prediction.csv',
             "best_div","auc","Exact global score","AUC","div")

In [None]:
correlations(merged, "auc")

In [None]:
merged = scatterplots("exp_link_prediction",'results/link_prediction.csv',
             "best_auc","auc_swap","Exact local score","AUC","auc_swap")

In [None]:
merged = scatterplots("exp_link_prediction",'results/link_prediction.csv',
             "best_div","auc_swap","Exact global score","AUC","div_swap")

In [None]:
correlations(merged, "auc_swap")

In [None]:
merged = scatterplots("exp_link_prediction",'results/link_prediction.csv',
             "best_auc","auc_deg","Exact local score","AUC","auc_deg")

In [None]:
merged = scatterplots("exp_link_prediction",'results/link_prediction.csv',
             "best_div","auc_deg","Exact global score","AUC","auc_deg")

In [None]:
correlations(merged, "auc_deg")

In [None]:
normalized_plots("exp_normalized_scores",'results/link_prediction.csv',
                 ['auc','AUC'],", AUC in link prediction","auc",0.7)