In [7]:
import networkx as nx
import pandas as pd
import sys
import os.path
from os import path

dataset = "books"
scores_files = ["fair", "hybrid_node2vec", "dyadic_fair", "hybrid_balanced_node2vec", "node2vec", "fairwalk", "random"]
red_pr = pd.read_csv(dataset + "/red_absorbing_probabilities.csv")
pr = pd.read_csv(dataset + "/out_pagerank.csv")
com = pd.read_csv(dataset + "/out_community.txt", names=["Nodes", "Community"], sep='\\s+', skiprows=1)
f = open(dataset + "/out_graph.txt")
lines = f.readlines()
f.close()
G = nx.parse_edgelist(lines[1:], nodetype=int, create_using=nx.DiGraph)

names = None
if path.exists(dataset + "/names.txt"):
    names = pd.read_csv(dataset + "/names.txt", sep='\t')
    print(names.info())

print(red_pr.info())
print(pr.info())
print(com.info())

for sf in scores_files:
    print(sf)
    df = pd.read_csv(dataset + "/" + f"sc_{sf}.csv")
    trgs = dict()
    min = dict()
    sum = dict()
    max = dict()
    no_exist = dict()
    for index, row in df.iterrows():
        if row['Rounds'] == -1: continue
        edges = list(eval(str(row["Edges"])))
        for e in edges:
            trgs.setdefault(e[1], 0)
            min.setdefault(e[1], sys.maxsize)
            sum.setdefault(e[1], 0)
            max.setdefault(e[1], 0)
            no_exist.setdefault(e[1], 0)
            trgs[e[1]] += 1
            try:
                length = len(nx.shortest_path(G, source=e[1], target=e[0])) - 1
                sum[e[1]] += length
                if min[e[1]] > length: min[e[1]] = length
                if max[e[1]] < length: max[e[1]] = length
            except Exception:
                no_exist[e[1]] += 1

    data = list()
    for k,v in trgs.items():
        try:
            data.append([k, names[names['Node_id'] == k].iloc[0]["Author_Name"] if names is not None else None, 
                         v, G.in_degree(k), G.out_degree[k], 
                         'red' if com[com["Nodes"] == k].iloc[0]["Community"] == 1 else 'blue', 
                         round(red_pr[red_pr["Nodes"] == k].iloc[0]["Red Pagerank"], 5), 
                         round(pr[pr["Nodes"] == k].iloc[0]["Pagerank"], 5), min[k], max[k], 
                         round(sum[k]/trgs[k],3), no_exist[k]])
        except Exception as e:
            print(e)
    df = pd.DataFrame(data, columns=['id', 'name', 'appearances', 'in_degree', 'out_degree', 'community', 'red_pr', 'pr', 'min_path_length', 'max_path_length', 'average_path_length', 'no_paths'])
    df.sort_values(by=['appearances'], ascending=False, inplace=True)
    df.to_csv(f"{dataset}/{sf}_stats.csv", index=False)
    print(df.to_latex(index=False)) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92 entries, 0 to 91
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Nodes         92 non-null     int64  
 1   Red Pagerank  92 non-null     float64
dtypes: float64(1), int64(1)
memory usage: 1.6 KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92 entries, 0 to 91
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Nodes     92 non-null     int64  
 1   Pagerank  92 non-null     float64
dtypes: float64(1), int64(1)
memory usage: 1.6 KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92 entries, 0 to 91
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Nodes      92 non-null     int64
 1   Community  92 non-null     int64
dtypes: int64(2)
memory usage: 1.6 KB
None
fair
\begin{tabular}{rlrrrlrrrrrr}
\toprule
 id &  name &  ap

\begin{tabular}{rlrrrlrrrrrr}
\toprule
 id &  name &  appearances &  in\_degree &  out\_degree & community &   red\_pr &       pr &  min\_path\_length &  max\_path\_length &  average\_path\_length &  no\_paths \\
\midrule
 83 &  None &            3 &         21 &          21 &       red &  0.94863 &  0.02513 &                4 &                4 &                4.000 &         0 \\
 32 &  None &            3 &         22 &          22 &       red &  0.95536 &  0.02627 &                4 &                4 &                4.000 &         0 \\
 21 &  None &            3 &          9 &           9 &       red &  0.85264 &  0.01137 &                4 &                4 &                4.000 &         0 \\
 76 &  None &            3 &          5 &           5 &       red &  0.77275 &  0.00822 &                4 &                4 &                4.000 &         0 \\
 79 &  None &            3 &          4 &           4 &       red &  0.76755 &  0.00659 &                4 &              

\begin{tabular}{rlrrrlrrrrrr}
\toprule
 id &  name &  appearances &  in\_degree &  out\_degree & community &   red\_pr &       pr &  min\_path\_length &  max\_path\_length &  average\_path\_length &  no\_paths \\
\midrule
 19 &  None &            4 &          9 &           9 &      blue &  0.02045 &  0.01140 &                2 &                3 &                2.750 &         0 \\
 36 &  None &            4 &          9 &           9 &      blue &  0.07037 &  0.01181 &                2 &                3 &                2.250 &         0 \\
 75 &  None &            3 &          7 &           7 &       red &  0.75811 &  0.01052 &                3 &                4 &                3.667 &         0 \\
 42 &  None &            3 &         14 &          14 &      blue &  0.07564 &  0.01737 &                2 &                2 &                2.000 &         0 \\
 84 &  None &            3 &         16 &          16 &       red &  0.94028 &  0.01887 &                2 &              

In [44]:
x = {"A": [1,2,3], "B": [2,3,4]}
algo_names = ["FREC", "E-FREC", "PREC", "E-PREC", "n2v", "FairWalk"]
df = pd.DataFrame(x, columns = ["A", "B"])
df
columns_ = [1, 2, 3 ,4 ,5, 6]
fruit_dictionary = dict(zip(algo_names, columns_))
# data = [{algo_names[i]: columns_[i]} for i in range(0, len(algo_names))]
fruit_dictionary

{'FREC': 1, 'E-FREC': 2, 'PREC': 3, 'E-PREC': 4, 'n2v': 5, 'FairWalk': 6}

In [50]:
pair_files = ["sc_fair.csv", "sc_hybrid_node2vec.csv", "sc_dyadic_fair.csv", "sc_hybrid_balanced_node2vec.csv",
              "sc_node2vec.csv", "sc_fairwalk.csv"]
algo_names = ["FREC", "E-FREC", "PREC", "E-PREC", "n2v", "FairWalk"]
def table_best_target_nodes(dataset, sep=" "):
    rounds = 10
    nodes = dict()
    columns_ = list()
    
    names = None
    if path.exists(dataset + "/names.txt"):
        names = pd.read_csv(dataset + "/names.txt", sep='\t')
    i = 0
    for sf in pair_files:
        dfs = dict()
        df = pd.read_csv(f"{dataset}/{sf}")
        edges = list()
        for r in range(1, rounds + 1):
            es = eval(df["Edges"][r])
            edges.extend(es)
        for e in edges:
            tgt = names[names['Node_id'] == e[1]].iloc[0]["Author_Name"]
            if tgt in dfs:
                dfs[tgt]+=1
            else:
                dfs[tgt] = 1
        dfs = dict(sorted(dfs.items(), key=lambda item: item[1], reverse=True))
        sorted_keys = sorted(dfs, key=dfs.get, reverse=True)
        col = list()
        c = 0
        for k in sorted_keys:
            col.append(k + f" ({str(dfs[k])})")
            c+=1
            if c==20:
                break
        columns_.append(col)
        i+=1
    y = dict(zip(algo_names, columns_))
    df = pd.DataFrame(y, columns = algo_names)
    print(df.to_latex(index=False))

In [52]:
table_best_target_nodes("dblp_new_pubs")

\begin{tabular}{llllll}
\toprule
                         FREC &                     E-FREC &                     PREC &                      E-PREC &                              n2v &              FairWalk \\
\midrule
         Xiaoqing Huang (937) &           Jincai Lai (935) &     Xiaoqing Huang (667) &            Jincai Lai (704) &         Daniel D. Corkill (1033) &   Qinghua Zheng (620) \\
               Pin Xiao (875) &       Xiaoqing Huang (935) &           Pin Xiao (617) &        Xiaoqing Huang (665) &             Amanda Gentzel (980) &        Lina Yao (421) \\
          Corbin Rosset (747) &            Haoyu Han (872) &      Corbin Rosset (519) &             Haoyu Han (622) &            Fotis Psallidas (970) &   Sen Wang 0001 (388) \\
         Gale Yan Huang (702) &             Pin Xiao (858) &     Gale Yan Huang (489) &              Pin Xiao (604) &             Lisa Friedland (943) &       Yang Chen (365) \\
              Haoyu Han (599) &         Tianqiao Liu (721) &       X

In [51]:
table_best_target_nodes("dblp_new_gender")

\begin{tabular}{llllll}
\toprule
                          FREC &                   E-FREC &                     PREC &                   E-PREC &                              n2v &              FairWalk \\
\midrule
             Limeng Cui (1444) &        Limeng Cui (1121) &         Limeng Cui (956) &         Limeng Cui (710) &           Fotis Psallidas (1041) &   Qinghua Zheng (780) \\
            Yi-Feng Lan (1257) &       Yi-Feng Lan (1058) &        Yi-Feng Lan (856) &        Yi-Feng Lan (704) &             Amanda Gentzel (940) &        Lina Yao (564) \\
           Hsu-Chao Lai (1058) &       Dongwon Lee (1004) &       Hsu-Chao Lai (715) &        Dongwon Lee (671) &             Lisa Friedland (896) &   Sen Wang 0001 (509) \\
            Dongwon Lee (1012) &  Wush Chi-Hsuan Wu (931) &        Dongwon Lee (679) &  Wush Chi-Hsuan Wu (624) &             Subru Krishnan (827) &       Yang Chen (468) \\
            Haeseung Seo (915) &       Haeseung Seo (915) &       Haeseung Seo (613) &  