In [None]:
%matplotlib inline
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from collections import defaultdict

from scale_to_latex import get_figsize, get_textwidth, get_columnwidth

In [None]:
algorithms = ["line",
        "hope",
        "sdne",
        "graphsage",
        "node2vec"
        ]
experiments = [
        #"20nn_2nd_order_cossim",
        #"20nn_overlap",
        "aligned_cossim",
        #"linproc_cossim"
        ]

### Plots for constant density

In [None]:
""" Load data and store in DataFrame """

sizes = [
        "1000",
        "2000",
        "4000",
        "8000",
        "16000",
        "32000",
        "64000",
       # "128000"
        ]

experiment_data = {}
info_frames = {}
# Build list of file names per experiment
for experiment in experiments:
    ba_files = defaultdict(list)
    for size in sizes:
        for algorithm in algorithms:
            if size == "64000" and algorithm not in ["line", "hope", "sdne", "node2vec"]:
                continue
            ba_files[size].append(f"results/{algorithm}_ba_N{size}_d01_{experiment}.npy")
            
    ba_data = {}
    for size, file_list in ba_files.items():
        for f in file_list:
            ba_data[os.path.split(f)[-1].split("_")[0] + "_" + size] = np.load(f)
            
    experiment_data[experiment] = ba_data

# Build DataFrame from BA data
di = {"Nodes": [], "Algorithm": [], "y": [], "Experiment": [], "Type": []}
for experiment, data_dict in experiment_data.items():
    for algo, size in [key.split("_") for key in data_dict.keys()]:
        di["Nodes"].append(int(size))
        di["Algorithm"].append(algo)
        di["Experiment"].append(experiment)
        di["Type"].append("Barabasi-Albert")
        if experiment == "aligned_cossim" or experiment == "linproc_cossim":
            y = np.mean(1 - data_dict[algo + "_" + size].mean(axis=0))
        else:
            y = np.mean(data_dict[algo + "_" + size].mean(axis=0))
        di["y"].append(y)
ba_df = pd.DataFrame(di)

# Load WS data
experiment_data = {}
info_frames = {}
# Build list of file names per experiment
for experiment in experiments:
    ws_files = defaultdict(list)
    for size in sizes:
        for algorithm in algorithms:
            #if (algorithm == "node2vec") and size == "32000":
            #    continue
            if size == "64000" and algorithm not in ["line", "hope", "sdne", "node2vec"]:
                continue
            ws_files[size].append(f"results/{algorithm}_ws_N{size}_d01_{experiment}.npy")
            
    ws_data = {}
    for size, file_list in ws_files.items():
        for f in file_list:
            ws_data[os.path.split(f)[-1].split("_")[0] + "_" + size] = np.load(f)
            
    experiment_data[experiment] = ws_data
    
di = {"Nodes": [], "Algorithm": [], "y": [], "Experiment": [], "Type": []}
for experiment, data_dict in experiment_data.items():
    for algo, size in [key.split("_") for key in data_dict.keys()]:
        di["Nodes"].append(int(size))
        di["Algorithm"].append(algo)
        di["Experiment"].append(experiment)
        di["Type"].append("Watts-Strogatz")
        if experiment == "aligned_cossim" or experiment == "linproc_cossim":
            y = np.mean(1 - data_dict[algo + "_" + size].mean(axis=0))
        else:
            y = np.mean(data_dict[algo + "_" + size].mean(axis=0))
        di["y"].append(y)
ws_df = pd.DataFrame(di)

df = pd.DataFrame()
df = df.append(ba_df)
df = df.append(ws_df)

In [None]:
""" Change colors for algorithms """
replace_dict = {"line":"LINE", "hope":"HOPE", "sdne":"SDNE", "graphsage":"GraphSAGE", "node2vec":"node2vec",
                "aligned_cossim":"Aligned Cosine Similarity", "20nn_overlap": "20-NN Overlap"}
color_dict = {"LINE":"#E30066", "HOPE":"#7A6FAC
              #612158", "SDNE":"#F6A800", "GraphSAGE":"#00549F", "node2vec":"#57AB27"}
rdf = df.replace(to_replace=replace_dict)
rdf["Algorithm"] = pd.Categorical(rdf["Algorithm"], ["HOPE", "LINE", "node2vec", "SDNE", "GraphSAGE"])
display(rdf.head())

In [None]:
columnwidth = get_columnwidth()
textwidth = get_textwidth()
light_gray = ".8"
dark_gray =".15"
sns.set(context="notebook", style="ticks", font_scale=1, #font="Bitstream Vera Sans",
        rc={"axes.edgecolor": light_gray, "xtick.color": dark_gray,
            "ytick.color": dark_gray, "xtick.bottom": True,
            "font.size":8,"axes.titlesize":6,"axes.labelsize":6, "xtick.labelsize":6, "legend.fontsize":5.7, 
            "ytick.labelsize":6,
            "axes.linewidth":1, 
            "xtick.minor.width":0.5, "xtick.major.width":0.5,
            "ytick.minor.width":0.5, "ytick.major.width":0.5, "lines.linewidth": 1.25,
            "xtick.major.size": 3,
            "ytick.major.size": 3,
            "xtick.minor.size": 2,
            "ytick.minor.size": 2,
           })

width, height, aspect = get_figsize(textwidth, wf=0.5/2)
print(width, height, aspect)


angle = 40
rdf = rdf.sort_values(by=["Algorithm", "Type"])

g = sns.catplot(x="Nodes", y="y", data=rdf[rdf.Experiment == "Aligned Cosine Similarity"], kind="point", 
                palette=color_dict, height=width, aspect=1, scale=0.5,
                col="Type", hue="Algorithm", legend=False)
g.set_ylabels("Mean of Mean Similarity")
g.set_xlabels("Number of Nodes")
g.set_titles("{col_name}")
g.axes[0,1].legend(loc='upper center', bbox_to_anchor=(-0.2, -0.45), fancybox=False, shadow=False, ncol=5)
for i in range(g.axes.shape[1]):
    g.axes[0,i].set_ylim(0)
    g.axes[0,i].set_xticklabels(g.axes[0,i].get_xticklabels(), rotation=angle, horizontalalignment='right')
g.savefig("synth_plots/sizes_max64k_cossim.pdf")

### Plots for changing density

In [None]:
densities = [
        "00025",
        "0005",
        "001",
        "002",
        "005",
        "01",
        "02",
        "05",
        "1"
        ]
d_to_float = {"00025": 0.00025, "0005": 0.0005, "001": 0.001, "002": 0.002, "005": 0.005, "01": 0.01,
              "02": 0.02, "05": 0.05, "1": 0.1}
experiment_data = {}
info_frames = {}
# Build list of file names per experiment
for experiment in experiments:
    ba_files = defaultdict(list)
    for density in densities:
        for algorithm in algorithms:
            ba_files[density].append(f"results/{algorithm}_ba_N8000_d{density}_{experiment}.npy")
            
    ba_data = {}
    for density, file_list in ba_files.items():
        for f in file_list:
            ba_data[os.path.split(f)[-1].split("_")[0] + "_" + density] = np.load(f)
            
    experiment_data[experiment] = ba_data

# Build DataFrame from BA data
di = {"Density": [], "Algorithm": [], "y": [], "Experiment": [], "Type": []}
for experiment, data_dict in experiment_data.items():
    for algo, density in [key.split("_") for key in data_dict.keys()]:
        #print(density, float(d_to_float[density]))
        di["Density"].append(float(d_to_float[density]))
        di["Algorithm"].append(algo)
        di["Experiment"].append(experiment)
        di["Type"].append("Barabasi-Albert")
        if experiment == "aligned_cossim" or experiment == "linproc_cossim":
            y = np.mean(1 - data_dict[algo + "_" + density].mean(axis=0))
        else:
            y = np.mean(data_dict[algo + "_" + density].mean(axis=0))
        di["y"].append(y)
ba_df = pd.DataFrame(di)

# Load WS data
experiment_data = {}
info_frames = {}
# Build list of file names per experiment
for experiment in experiments:
    ws_files = defaultdict(list)
    for density in densities:
        for algorithm in algorithms:
            ws_files[density].append(f"results/{algorithm}_ws_N8000_d{density}_{experiment}.npy")
            
    ws_data = {}
    for density, file_list in ws_files.items():
        for f in file_list:
            ws_data[os.path.split(f)[-1].split("_")[0] + "_" + density] = np.load(f)
            
    experiment_data[experiment] = ws_data
    
di = {"Density": [], "Algorithm": [], "y": [], "Experiment": [], "Type": []}
for experiment, data_dict in experiment_data.items():
    for algo, density in [key.split("_") for key in data_dict.keys()]:
        di["Density"].append(d_to_float[density])
        di["Algorithm"].append(algo)
        di["Experiment"].append(experiment)
        di["Type"].append("Watts-Strogatz")
        if experiment == "aligned_cossim" or experiment == "linproc_cossim":
            y = np.mean(1 - data_dict[algo + "_" + density].mean(axis=0))
        else:
            y = np.mean(data_dict[algo + "_" + density].mean(axis=0))
        di["y"].append(y)
ws_df = pd.DataFrame(di)

df_density = pd.DataFrame()
df_density = df_density.append(ba_df)
df_density = df_density.append(ws_df)

In [None]:
df_density = df_density.replace(to_replace=replace_dict)
df_density["Algorithm"] = pd.Categorical(df_density["Algorithm"], ["HOPE", "LINE", "node2vec", "SDNE", "GraphSAGE"])
df_density.sort_values(by=["Algorithm", "Type"], inplace=True)

In [None]:
height = 4
aspect = 1
angle = 40

sns.set(context="notebook", style="ticks", font_scale=1, #font="Bitstream Vera Sans",
        rc={"axes.edgecolor": light_gray, "xtick.color": dark_gray,
            "ytick.color": dark_gray, "xtick.bottom": True,
            "font.size":8,"axes.titlesize":6,"axes.labelsize":6, "xtick.labelsize":6, "legend.fontsize":5.7, 
            "ytick.labelsize":6,
            "axes.linewidth":1, 
            "xtick.minor.width":0.5, "xtick.major.width":0.5,
            "ytick.minor.width":0.5, "ytick.major.width":0.5, "lines.linewidth": 1.25,
            "xtick.major.size": 3,
            "ytick.major.size": 3,
            "xtick.minor.size": 2,
            "ytick.minor.size": 2,
           })

g = sns.catplot(x="Density", y="y", data=df_density[df_density.Experiment == "Aligned Cosine Similarity"], kind="point", 
                palette=color_dict, scale=0.5,
                col="Type", hue="Algorithm", height=width, aspect=1, legend=False)
g.set_ylabels("Mean of Mean Similarity")
g.axes[0,1].legend(loc='upper center', bbox_to_anchor=(-0.2, -0.45), fancybox=False, shadow=False, ncol=5)
#g.set_xlabels("")
g.set_titles("{col_name}")
for i in range(g.axes.shape[1]):
    g.axes[0,i].set_ylim(0)
    g.axes[0,i].set_xticklabels(g.axes[0,i].get_xticklabels(), rotation=angle, horizontalalignment='right')
g.savefig("synth_plots/densities_cossim.pdf", bbox_inches="tight")

#g = sns.catplot(x="Density", y="y", data=df_density[df_density.Experiment == "20-NN Overlap"], kind="point", 
#                palette=color_dict,
#                col="Type", hue="Algorithm", height=height, aspect=aspect, legend=False)
#g.set_ylabels("Mean of Average Overlap")
##g.set_xlabels("")
#g.axes[0,1].legend(loc='upper center', bbox_to_anchor=(-0.1, -0.27), fancybox=False, shadow=False, ncol=5)
#g.set_titles("{col_name}")
#for i in range(g.axes.shape[1]):
#    for tick in g.axes[0, i].get_xticklabels():
#        tick.set_rotation(angle)
#g.savefig("synth_plots/densities_overlap.pdf", bbox_inches="tight")#
#
#g = sns.catplot(x="Density", y="y", data=df_density[df_density.Experiment == "linproc_cossim"], kind="point", 
#                palette=color_dict,
#                col="Type", hue="Algorithm", height=height, aspect=aspect, legend=False)
#g.set_ylabels("Mean LinProc")
##g.set_xlabels("")
#g.axes[0,1].legend(loc='upper center', bbox_to_anchor=(-0.1, -0.27), fancybox=False, shadow=False, ncol=5)
#g.set_titles("{col_name}")
#for i in range(g.axes.shape[1]):
#    for tick in g.axes[0, i].get_xticklabels():
#        tick.set_rotation(angle)
#g.savefig("synth_plots/densities_linproc.pdf", bbox_inches="tight")