In [1]:
%matplotlib notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import os.path
from statistics import median
import sys
from os import path
sns.set_theme(style="whitegrid")

In [2]:
def get_target_nodes_features(dataset, policy):
    # To get distance ##################
    f = open(f"{dataset}/out_graph.txt")
    lines = f.readlines()
    f.close()
    G = nx.parse_edgelist(lines[1:], nodetype=int, create_using=nx.DiGraph)
    trgs = dict()
    min_d = dict()
    sum_d = dict()
    max_d = dict()
    no_exist = dict()
    median_d = dict()
    ####################################
    
    round_stats = pd.read_csv(f"{dataset}/sc_{policy}.csv").drop(axis=0, labels=[0])
    target_nodes = dict()
    for index, row in round_stats.iterrows():
        edges = eval(row["Edges"])
        # ---------------------------------------------------
        for edge in edges:
            target = edge[1]
            if target not in target_nodes:
                target_nodes[target] = [0 for i in range(10)]
            target_nodes[target][row["Rounds"]] += 1
        # ---------------------------------------------------
        # To get distance #####################
        for e in edges:
            trgs.setdefault(e[1], 0)
            min_d.setdefault(e[1], sys.maxsize)
            sum_d.setdefault(e[1], 0)
            max_d.setdefault(e[1], 0)
            no_exist.setdefault(e[1], 0)
            median_d.setdefault(e[1], list())
            trgs[e[1]] += 1
            try:
                length = len(nx.shortest_path(G, source=e[1], target=e[0])) - 1
                sum_d[e[1]] += length
                if min_d[e[1]] > length: min_d[e[1]] = length
                if max_d[e[1]] < length: max_d[e[1]] = length
                median_d[e[1]].append(length)
            except nx.NetworkXNoPath:
                no_exist[e[1]] += 1
        #######################################
    totals = dict()
    for node in target_nodes:
        totals[node] = sum(target_nodes[node])
    nodes = list(totals.keys())
    values = list(totals.values())
    dd = {
        "Nodes": nodes,
        "times": values
    }
    df = pd.DataFrame(dd)
    nodes_info = pd.read_csv(f"{dataset}/nodeQualityFeatures.txt", sep="\t")
    nodes_info['out_homophily'] = [row['redNeighborsOutRatio'] if row['group'] == 1 else 1 - row['redNeighborsOutRatio']
                               for i, row in nodes_info.iterrows()]
    df = df.join(nodes_info.set_index("nodeId"), on="Nodes")
    
    # To get distance ########################
    data = list()
    for k,v in trgs.items():
        temp_median = median(median_d[k]) if median_d[k] else float("NaN")
        data.append([k, min_d[k], max_d[k], round(sum_d[k]/trgs[k],3), temp_median, no_exist[k]])
    df_d = pd.DataFrame(data, columns=['Nodes', 'MinDistance', "MaxDistance", "AverageDistance", "MedianDistance", "NoExistsDistance"])
    df = df.join(df_d.set_index("Nodes"), on="Nodes")
    ##########################################
    
    df = df.sort_values(axis=0, by="times", ascending=False)
    
    return df

In [3]:
books_n2v = get_target_nodes_features("books", "node2vec")
blogs_n2v = get_target_nodes_features("blogs", "node2vec")
dblp_n2v = get_target_nodes_features("dblp_course", "node2vec")
dblp_n2v_n2v = get_target_nodes_features("dblp_course", "node2vec")
twitter_n2v = get_target_nodes_features("twitter", "node2vec")

books_fair = get_target_nodes_features("books", "fair")
blogs_fair = get_target_nodes_features("blogs", "fair")
dblp_fair = get_target_nodes_features("dblp_course", "fair")
dblp_fair_fair = get_target_nodes_features("dblp_course", "fair")
twitter_fair = get_target_nodes_features("twitter", "fair")

books_h = get_target_nodes_features("books", "hybrid_node2vec")
blogs_h = get_target_nodes_features("blogs", "hybrid_node2vec")
dblp_h = get_target_nodes_features("dblp_course", "hybrid_node2vec")
dblp_h_h = get_target_nodes_features("dblp_course", "hybrid_node2vec")
twitter_h = get_target_nodes_features("twitter", "hybrid_node2vec")

books_b = get_target_nodes_features("books", "dyadic_fair")
blogs_b = get_target_nodes_features("blogs", "dyadic_fair")
dblp_b = get_target_nodes_features("dblp_course", "dyadic_fair")
dblp_b= get_target_nodes_features("dblp_course", "dyadic_fair")
twitter_b = get_target_nodes_features("twitter", "dyadic_fair")

books_hb = get_target_nodes_features("books", "hybrid_balanced_node2vec")
blogs_hb = get_target_nodes_features("blogs", "hybrid_balanced_node2vec")
dblp_hb = get_target_nodes_features("dblp_course", "hybrid_balanced_node2vec")
dblp_hb = get_target_nodes_features("dblp_course", "hybrid_balanced_node2vec")
twitter_hb = get_target_nodes_features("twitter", "hybrid_balanced_node2vec")

In [9]:
# Books.
books_top = books_n2v.loc[books_n2v['times'] >= 6]
temp = books_top.describe().iloc[[1, 2, 3, 5, 7]]
print("\\hline")
print(f"Node2vec\
 & {temp.AverageDistance['mean']:.6f} & {temp.MedianDistance['mean']:.6f} & {temp.MaxDistance['mean']:.6f} & {temp.NoExistsDistance['mean']:.6f}\
 & {temp['pagerank']['mean']:.6f} & {temp['pagerank']['50%']:.6f} & {temp['pagerank']['max']:.6f}\
 & {temp['redPagerank']['mean']:.6f} & {temp['redPagerank']['50%']:.6f} & {temp['redPagerank']['max']:.6f}\
 & {temp['out_homophily']['mean']:.6f} & {temp['out_homophily']['50%']:.6f} & {temp['out_homophily']['max']:.6f}\
 & {sum((books_top.times * books_top.group) / sum(books_top.times)):.6f} \\\\")

books_top = books_fair.loc[books_fair['times'] >= 5]
temp = books_top.describe().iloc[[1, 2, 3, 5, 7]]
print("\\hline")
print(f"Fair\
 & {temp.AverageDistance['mean']:.6f} & {temp.MedianDistance['mean']:.6f} & {temp.MaxDistance['mean']:.6f} & {temp.NoExistsDistance['mean']:.6f}\
 & {temp['pagerank']['mean']:.6f} & {temp['pagerank']['50%']:.6f} & {temp['pagerank']['max']:.6f}\
 & {temp['redPagerank']['mean']:.6f} & {temp['redPagerank']['50%']:.6f} & {temp['redPagerank']['max']:.6f}\
 & {temp['out_homophily']['mean']:.6f} & {temp['out_homophily']['50%']:.6f} & {temp['out_homophily']['max']:.6f}\
 & {sum((books_top.times * books_top.group) / sum(books_top.times)):.6f} \\\\")

\hline
Node2vec & 2.989857 & 3.071429 & 3.714286 & 0.000000 & 0.019558 & 0.018066 & 0.028284 & 0.352294 & 0.030159 & 0.966886 & 0.919048 & 1.000000 & 1.000000 & 0.279070 \\
\hline
Fair & 2.568500 & 2.400000 & 3.300000 & 0.000000 & 0.007763 & 0.007787 & 0.010310 & 0.969345 & 0.969804 & 0.973552 & 1.000000 & 1.000000 & 1.000000 & 1.000000 \\
