In [None]:
%matplotlib inline
%load_ext autoreload
import subprocess
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from grouped_boxplots_graph_x_similarity import run
from scale_to_latex import get_columnwidth, get_textwidth, get_figsize
import patch_lvplot

color_dict = {"LINE":"#E30066", "HOPE":"#612158", "SDNE":"#F6A800", "GraphSAGE":"#00549F", "node2vec":"#57AB27"}

### Loading data

In [None]:
knn_df = run("knn", "results/", "boxplots/test")

In [None]:
jaccard_df = run("jaccard", "results/", "boxplots/test")

In [None]:
rank_df = run("ranks", "results/", "boxplots/test")

In [None]:
rns_df = run("rns", "results/", "boxplots/test")

In [None]:
angdiv_df = run("angdiv", "results/", "boxplots/test")

In [None]:
proc_df = run("procrustes", "results/", "boxplots/test")

#### k-NN and aligned cosine similarity plots

In [4]:
replace_dict = {"line":"LINE", "hope":"HOPE", "sdne":"SDNE", "graphsage":"GraphSAGE",
                "procrustes_cossim":"Aligned Cosine Similarity", "20nn_overlap": "20-NN Neighborhood Overlap",
                "knn":"20-NN Neighborhood Overlap", "20nn_rank": "20-NN Ranking Invariance", 
                "rns" : "Ranked Neighborhood Stability", "procrustes":"Aligned Cosine Similarity",
                "jaccard": "20-NN Jaccard Similarity", "angdiv": "20-NN Angle Divergence", "cora":"Cora",
                "facebook":"Facebook", "blogcatalog": "BlogCatalog", "protein":"Protein", "wikipedia":"Wikipedia"}

In [None]:
columnwidth = get_columnwidth()
textwidth = get_textwidth()
light_gray = ".8"
dark_gray =".15"
sns.set(context="notebook", style="whitegrid", font_scale=1,
        rc={"axes.edgecolor": light_gray, "xtick.color": dark_gray,
            "ytick.color": dark_gray, "xtick.bottom": True,
            "font.size":8,"axes.titlesize":6,"axes.labelsize":6, "xtick.labelsize":6, "legend.fontsize":6, 
            "ytick.labelsize":6, "axes.linewidth":1, 
            "xtick.minor.width":0.5, "xtick.major.width":0.5,
            "ytick.minor.width":0.5, "ytick.major.width":0.5, "lines.linewidth": 0.7,
            "xtick.major.size": 3,
            "ytick.major.size": 3,
            "xtick.minor.size": 2,
            "ytick.minor.size": 2,
            "grid.linewidth":0.5
           })

width, height, aspect = get_figsize(textwidth, wf=1/2)
print(width, height, aspect)

In [None]:
# Plot aligned cosine similarity

df = proc_df.replace(to_replace=replace_dict)
df["algorithm"] = pd.Categorical(df["algorithm"], ["HOPE", "LINE", "node2vec", "SDNE", "GraphSAGE"])
df = df.sort_values(by=["graph", "algorithm"])

g = sns.catplot(data=df, x="graph", y="similarity", col="experiment", kind="boxen", hue="algorithm", legend=False,
            palette=color_dict, height=width/2, aspect=2)
g.set_ylabels("Aligned Cosine Similarity")
g.set_xlabels("")
g.axes[0,0].legend(loc='upper center', bbox_to_anchor=(0.45, -0.16), fancybox=False, shadow=False, ncol=5)
g.axes[0,0].set_ylim(-0.2, 1)
g.set_titles("")
g.savefig("boxplots/cossim_boxen.pdf", bbox_inches="tight")

In [None]:
# Plot k-NN overlap

df = knn_df.replace(to_replace=replace_dict)
df["algorithm"] = pd.Categorical(df["algorithm"], ["HOPE", "LINE", "node2vec", "SDNE", "GraphSAGE"])
df = df.sort_values(by=["graph", "algorithm"])

g = sns.catplot(data=df, x="graph", y="similarity", col="experiment", kind="boxen", hue="algorithm", legend=False,
                palette=color_dict, height=width/2, aspect=2)
g.set_ylabels("20-NN Neighborhood Overlap")
g.set_xlabels("")
plt.yticks(np.arange(0, 1.1, 0.2))
g.axes[0,0].legend(loc='upper center', bbox_to_anchor=(0.45, -0.16), fancybox=False, shadow=False, ncol=5)
g.set_titles("")
g.savefig("boxplots/knn_boxen.pdf", bbox_inches="tight")

In [None]:
# Plot k-NN Jaccard similarity

df = jaccard_df.replace(to_replace=replace_dict)
df["algorithm"] = pd.Categorical(df["algorithm"], ["HOPE", "LINE", "node2vec", "SDNE", "GraphSAGE"])
df = df.sort_values(by=["graph", "algorithm"])

g = sns.catplot(data=df.loc[(df.graph != "subelj_cora") & (df.graph != "blog_catalog")], x="graph", y="similarity",
                col="experiment", kind="boxen", hue="algorithm", legend=False,
                palette=color_dict, height=width/2, aspect=2)
g.set_ylabels("20-NN Jaccard Similarity")
g.set_xlabels("")
plt.yticks(np.arange(0, 1.1, 0.2))
g.axes[0,0].legend(loc='upper center', bbox_to_anchor=(0.45, -0.16), fancybox=False, shadow=False, ncol=5)
#g.set_titles("{col_name}")
g.set_titles("")
g.savefig("boxplots/jaccard_boxen.pdf", bbox_inches="tight")

In [None]:
# Plot k-NN ranking invariance


df = rank_df.replace(to_replace=replace_dict)
df["algorithm"] = pd.Categorical(df["algorithm"], ["HOPE", "LINE", "node2vec", "SDNE", "GraphSAGE"])
df = df.sort_values(by=["graph", "algorithm"])

g = sns.catplot(data=df, x="graph", y="similarity", col="experiment", kind="boxen", hue="algorithm", legend=False,
                palette=color_dict, height=width/2, aspect=2)
g.set_ylabels("20-NN Ranking Invariance")
g.set_xlabels("")
plt.yticks(np.arange(0, 1.1, 0.2))
g.axes[0,0].legend(loc='upper center', bbox_to_anchor=(0.45, -0.16), fancybox=False, shadow=False, ncol=5)
#g.set_titles("{col_name}")
g.set_titles("")
g.savefig("plots/ranks_boxen.pdf", bbox_inches="tight")

In [None]:
# Plot ranked neighborhood stability

df = rns_df.replace(to_replace=replace_dict)
df["algorithm"] = pd.Categorical(df["algorithm"], ["HOPE", "LINE", "node2vec", "SDNE", "GraphSAGE"])
df = df.sort_values(by=["graph", "algorithm"])

g = sns.catplot(data=df, x="graph", y="similarity",
                col="experiment", kind="boxen", hue="algorithm",
                order=["BlogCatalog", "Cora", "Facebook", "Protein", "Wikipedia"],
                legend=False, palette=color_dict, height=width/2, aspect=2)
g.set_ylabels("Ranked Neighborhood Stability")
g.set_xlabels("")
plt.yticks(np.arange(0, 1.1, 0.2))
g.axes[0,0].legend(loc='upper center', bbox_to_anchor=(0.45, -0.16), fancybox=False, shadow=False, ncol=5)

g.set_titles("")
g.savefig("plots/rns_boxen.pdf", bbox_inches="tight")

In [None]:
# Plot k-NN angle divergence

df = angdiv_df.replace(to_replace=replace_dict)
df["algorithm"] = pd.Categorical(df["algorithm"], ["HOPE", "LINE", "node2vec", "SDNE", "GraphSAGE"])
df = df.sort_values(by=["graph", "algorithm"])

g = sns.catplot(data=df, x="graph", y="similarity", col="experiment", kind="boxen", hue="algorithm", 
                order=["BlogCatalog", "Cora", "Facebook", "Protein", "Wikipedia"],
                legend=False, palette=color_dict, height=width/2, aspect=2)
g.set_ylabels("20-NN Angle Divergence")
g.set_xlabels("")
plt.ylim(0,30)
g.axes[0,0].legend(loc='upper center', bbox_to_anchor=(0.45, -0.16), fancybox=False, shadow=False, ncol=5)
#g.set_titles("{col_name}")
g.set_titles("")
g.savefig("plots/angdiv_boxen.pdf", bbox_inches="tight")

#### Use the following cells to see results for second-order cosine similarity and linear-shift Procrustes