In [None]:
import numpy as np
import pandas as pd
import os
import subprocess
import igraph as ig
import networkx as nx
from node2vec import Node2Vec
import umap

import matplotlib.pyplot as plt
import seaborn as sns

from ..common import Hope

def saveEmbedding(X, g, fn='_embed'):
    with open(fn,'w') as f:
        for i in range(X.shape[0]):
            f.write(g.vs[i]['name']+' ')
            for j in range(X.shape[1]):
                f.write(str(X[i][j])+' ')
            f.write('\n')

In [None]:
os.makedirs("embeddings",exist_ok=True)

In [None]:
for graph in ["football"]:
        g = ig.Graph.Read_Edgelist("football.edgelist",directed=False)
        g.vs['name'] = [str(i) for i in np.arange(g.vcount())]
        for dim in range(2,33,2):
            for sim in ["cn"]:
                X = Hope(g, sim = sim, dim = dim)
                fname = f'embeddings/hope-{graph}-{sim}-{dim}'
                saveEmbedding(X, g, fn=fname)

for graph in ["football"]:
        g = nx.read_edgelist('football.edgelist')
        for dim in range(2,33,2):
            for p_val in [1,0.11,9]:
                node2vec = Node2Vec(g, dimensions=dim, p=p_val, q=1/p_val, quiet=True, workers=4, seed=42)
                model = node2vec.fit()
                fname = f"embeddings/n2v-{graph}-p{round(p_val,2)}-{dim}"
                model.wv.save_word2vec_format(fname)

In [None]:
header = "graph,emb,dim,param,best_alpha,best_div,best_div_ext,best_div_int,best_alpha_auc,best_auc,best_auc_errc\n"
p_dict = {"cn":1,"ppr":9,"aa":0.11}
with open(f"results_football.csv","w") as fcsv:
    fcsv.write(header)
    for graph in ['football']:
        for emb in ['hope','n2v']:
            for dim in range(4,31,2):
                for param in ["cn","ppr","aa"]:
                    embedding = f'{emb}-{graph}-{param}-{dim}'
                    if emb == "n2v":
                        param = p_dict[param]
                        embedding  = f'{emb}-{graph}-p{param}-{dim}'
                    out = subprocess.check_output(f'julia ../CGE_CLI.jl -g football.edgelist -c football.ecg -e embeddings/{embedding} --seed 32',shell=True)
                    out = eval(out.decode('utf-8'))
                    line = f"{graph},{emb},{dim},{param},{out[0]},{out[1]},{out[2]},{out[3]},{out[4]},{out[5]},{out[6]}\n"
                    fcsv.write(line)
                    print(line)

In [None]:
merged = pd.read_csv("results_football.csv")
min_scores = merged[['graph','best_div','best_auc']].groupby(['graph',]).min().reset_index().rename(columns={'best_div':'min_div','best_auc':'min_auc'})
merged_min = merged.merge(min_scores,on=["graph"])
merged_min['normalized_div'] = (merged_min.best_div+0.01)/(merged_min.min_div+0.01)
merged_min['normalized_auc'] = (merged_min.best_auc+0.01)/(merged_min.min_auc+0.01)
for graph in ["football"]:
        plt.clf()
        mask = (merged_min.graph == graph)
        ax = sns.scatterplot(x="normalized_auc",y="normalized_div",hue="emb",size="dim", alpha=0.6,data=merged_min[mask])
        ax.set_title("Football graph")
        ax.set_xlabel("Normalized local score")
        ax.set_ylabel("Normalized global score")
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles=handles[:5]+[handles[-1]], labels=["Emb","HOPE","N2V","Dim","4","30"],loc="upper right")
        ax.get_figure().savefig(f'{graph}_normalized.pdf' , dpi=400)

In [None]:
merged_min['mixed_score'] = 0.5*(merged_min.best_auc+0.01)/(merged_min.min_auc+0.01)+0.5*(merged_min.best_div+0.01)/(merged_min.min_div+0.01)

In [None]:
merged_min[merged_min.mixed_score == merged_min.mixed_score.min()]

In [None]:
merged_min[merged_min.mixed_score == merged_min.mixed_score.max()]

In [None]:
merged_min[merged_min.best_div == merged_min.best_div.min()]

In [None]:
## Read embedding from file in node2vec format
## Map to layout format
## for visualization, we use UMAP if dim > 2
def embed2layout(fn="_embed",n2v=True):
    if n2v:
        D = pd.read_csv(fn, sep=' ', skiprows=1, header=None)
    else:
        D = pd.read_csv(fn, sep=' ', header=None)
    D = D.dropna(axis=1)
    D = D.sort_values(by=0)
    Y = np.array(D.iloc[:,1:])
    if Y.shape[1]>2:
        Y = umap.UMAP().fit_transform(Y)
    ly = []
    for v in range(Y.shape[0]):
        ly.append((Y[v][0],Y[v][1]))
    return ly

In [None]:
cfg = ig.Graph.Read_Ncol('football.edgelist',directed=False)
c = np.loadtxt('football.ecg',dtype='uint16',usecols=(0))
cfg.vs['community'] = [c[int(x['name'])] for x in cfg.vs]

In [None]:
pal = ig.RainbowPalette(n=max(cfg.vs['community'])+1) 
cfg.vs['color'] = [pal.get(int(i)) for i in cfg.vs['community']]
l = embed2layout('embeddings/n2v-football-p0.11-12')
cfg.vs['ly'] = [l[int(v['name'])] for v in cfg.vs]
ig.plot(cfg, target='best_emb_layout.eps',layout=cfg.vs['ly'], bbox=(0,0,500,300), vertex_size=8, edge_color='lightgray')
l = embed2layout('embeddings/n2v-football-p0.11-4')
cfg.vs['ly'] = [l[int(v['name'])] for v in cfg.vs]
ig.plot(cfg, target='worst_emb_layout.eps',layout=cfg.vs['ly'], bbox=(0,0,500,300), vertex_size=8, edge_color='lightgray')
l = embed2layout('embeddings/hope-football-cn-4',False)
cfg.vs['ly'] = [l[int(v['name'])] for v in cfg.vs]
ig.plot(cfg, target='worst_emb_div_layout.eps',layout=cfg.vs['ly'], bbox=(0,0,500,300), vertex_size=8, edge_color='lightgray')