In [None]:
%matplotlib inline
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import subprocess
from scipy.stats import variation
from time import time
from collections import defaultdict
from line_plots import plot_line

from scale_to_latex import get_columnwidth, get_textwidth, get_figsize

In [None]:
# Folder where nodeinfos are stored
nif = "node_info/"

# Folder where results are stored
rf = "results/"

algorithms = [
    "line",
    "node2vec",
    "sdne",
    "hope",
    "graphsage",
]

datasets = [
    "facebook",
    "protein",
    "blogcatalog",
    "wikipedia",
    "cora"
]

experiments = [
    "procrustes_cossim",
    "20nn_overlap",
    "20nn_jaccard",
    "linproc_cossim",
    
]

windows = [25, 5]
replace_dict = {"line":"LINE", "hope":"HOPE", "sdne":"SDNE", "graphsage":"GraphSAGE", "node2vec":"node2vec",
                "procrustes_cossim":"Aligned Cosine Similarity", "20nn_overlap": "20NN-Overlap", "20nn_jaccard": "20NN Jaccard Similarity Coefficient",
                "facebook":"Facebook", "protein":"Protein", "blogcatalog":"BlogCatalog", "wikipedia": "Wikipedia",
                "cora": "Cora",
                "page_rank":"PageRank", "coreness":"Coreness", "linproc_cossim":"LinProc"}
color_dict = {"LINE":"#E30066", "HOPE":"#612158", "SDNE":"#F6A800", "GraphSAGE":"#00549F", "node2vec":"#57AB27"}

In [None]:
"""Compute a DataFrame that can be used for plotting"""
start_time = time()
df_dict = defaultdict(list)
df = pd.DataFrame()
for experiment in experiments:
    for dataset in datasets:
        print(experiment, dataset)
        data = [rf + "_".join([algo, dataset, experiment]) + ".npy" for algo in algorithms]
        mode, k, emb_dir = None, None, None
        info_file = nif + dataset + ".node_info"
        xscale = "log"
        filter_type = "mean"  # kind of rolling filter over y values
        window_sizes = windows
        ylabel = "mean of means"
        ylims = [0]
        save_path = "tmp/" + dataset + experiment
        mf_frame = plot_line(data, mode, info_file, xscale, window_sizes, k, emb_dir, filter_type, ylabel, ylims, save_path)
        mf_frame.insert(0, 'dataset', dataset)
        mf_frame.insert(0, 'experiment', experiment)
        df = df.append(mf_frame)
            
            
print(f"Computing the data frame took {time()-start_time} seconds.")

In [None]:
df = df.replace(to_replace=replace_dict)
df.head()

In [None]:
columnwidth = get_columnwidth()
textwidth = get_textwidth()
light_gray = ".8"
dark_gray =".15"
sns.set(context="notebook", style="ticks", font_scale=1, 
        rc={"axes.edgecolor": light_gray, "xtick.color": dark_gray,
            "ytick.color": dark_gray, "xtick.bottom": True,
            "font.size":8,"axes.titlesize":6,"axes.labelsize":6, "xtick.labelsize":6, "legend.fontsize":6, 
            "ytick.labelsize":6, "axes.linewidth":1, 
            "xtick.minor.width":0.5, "xtick.major.width":0.5,
            "ytick.minor.width":0.5, "ytick.major.width":0.5, "lines.linewidth": 0.7,
            "xtick.major.size": 3,
            "ytick.major.size": 3,
            "xtick.minor.size": 2,
            "ytick.minor.size": 2,
           })

width, height, aspect = get_figsize(textwidth, wf=1/5)
print(width, height, aspect)
legend_yoffset = -0.5 
plot_df = df
plot_df["algorithm"] = pd.Categorical(plot_df["algorithm"], ["HOPE", "LINE", "node2vec", "SDNE", "GraphSAGE"])
plot_df = plot_df.sort_values(by=["dataset", "algorithm"])
display(plot_df.head())
plot_df.rename(columns={"algorithm":"Algorithm"}, inplace=True)

g = sns.FacetGrid(data=plot_df.loc[(plot_df.experiment == "20NN Jaccard Similarity Coefficient") & (plot_df.statistic == "PageRank")],
                     hue="Algorithm", col="dataset", palette=color_dict, sharex=False, height=1*width, aspect=1)
g.map(plt.plot, "x", "y")
g.axes[0,3].legend(loc='upper center', bbox_to_anchor=(-0.8, legend_yoffset), fancybox=False, shadow=False, ncol=5)
g.set_ylabels(f"Mean of Mean\n20-NN Jaccard Similarity")
#plt.legend(loc="best")
g.set_titles("{col_name}")
g.set_xlabels("PageRank")
for ax in g.axes[0, :]:
    ax.set_xscale("log")
    ax.set_yticks(np.arange(0, 1.1, 0.2))
g.savefig("plots/jaccard_pagerank_all.pdf")