In [9]:
import rankedDelegation as rd
import numpy as np
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import pulp
import time
np.random.seed(42)

In [10]:
path = "C:\\Users\\Theo Delemazure\\Documents\\ENS\\Stage Berlin\\data\\"
list_datasets = [{"path":"facebook0\\out.facebook-wosn-links","name":"facebook"}]
                 
"""
                "deezer\\HR_edges.csv", 
                 "deezer\\HU_edges.csv",
                "deezer\\RO_edges.csv",
                "facebook\\artist_edges.csv",
                "facebook\\athletes_edges.csv",
                "facebook\\company_edges.csv",
                "facebook\\government_edges.csv",
                "facebook\\new_sites_edges.csv",
                "facebook\\politician_edges.csv",
                "facebook\\public_figure_edges.csv",
                "facebook\\tvshow_edges.csv",
                "facebook_large\\musae_facebook_edges.csv",
                "github\\musae_git_edges.csv",
                "twitch\\DE\\musae_DE_edges.csv",
                "twitch\\ENGB\\musae_ENGB_edges.csv",
                "twitch\\ES\\musae_ES_edges.csv",
                "twitch\\FR\\musae_FR_edges.csv",
                "twitch\\PTBR\\musae_PTBR_edges.csv",
                "twitch\\RU\\musae_RU_edges.csv",]
                
"""

def load_data(index):
    full_path = path + list_datasets[index]["path"]
    list_edges = np.genfromtxt(full_path, delimiter=" ",dtype=int)[:,:2]
    nb_nodes= int(np.max(list_edges))+1
    list_nodes = np.arange(nb_nodes)
    count_out = np.zeros(nb_nodes)
    count_in = np.zeros(nb_nodes)

    for (a, b) in tqdm(list_edges):
        if a != b:
            count_out[a] += 1
            count_in[b] += 1
    
    return {"edges":list_edges,"out": count_out, "in": count_in, "nb_nodes": nb_nodes}

In [11]:
def get_friends(list_edges):
    friends = {}
    for [a,b] in list_edges:
        if a not in friends:
            friends[a] = []
        if b not in friends:
            friends[b] = []
        friends[a].append(b)
        friends[b].append(a)
    return friends


In [12]:
def distrib_friends(friends):
    tab = []
    for f in friends:
        tab.append(len(friends[f]))
    plt.figure(figsize=(15,5))
    plt.hist(tab, bins=int(np.max(tab)))
    plt.yscale("log")
    plt.xscale("log")
    plt.xlabel("degree")
    plt.ylabel("number of nodes")
    plt.plot()
    plt.title("Distribution of degrees")
    plt.show()
    

In [13]:
def list_common_friends(a, friends):
    friends_a  = friends[a]
    list_common = []
    for x in friends_a:
        c = 1
        friends_x = friends[x]
        for b in friends_x:
            if b in friends_a:
                c += 1
        list_common.append(c)
    return np.array(list_common, dtype=float)

In [14]:
def create_election(data, p=0.8):
    
    list_edges = data["edges"]
    n = data["nb_nodes"]
    e = rd.Election()
    
    outedges_dict = {}
    
    friends = get_friends(list_edges)
    
    
    # We add the voter and pick the casting voters
    for i in tqdm(range(n)):
        x = np.random.choice([0,1,2],p=[p,(1-p)/2,(1-p)/2])
        if x == 0:
            v = rd.Voter()
        else:
            v = rd.Voter(vote=x)
        e.add_voter(v)

    L = e.list_voters
    delegatees_list = [[] for i in range(n)]
    n_edges = 0
    
    for r_1 in tqdm(range(n)):
        
        if L[r_1].vote is not None or r_1 not in friends:
            continue
            
        delegatees = friends[r_1]
        proba = list_common_friends(r_1, friends)
        proba /= np.sum(proba)
        ordering = np.random.choice(delegatees, p=proba, replace=False, size=len(proba))
        for ind in ordering:
            delegatees_list[r_1].append(L[ind])
        

    for voter in L:
        voter.delegate(delegatees_list[voter.id])
    
    return e 


In [15]:
def save_election(e, results, title=""):
    L = []
    C = []
    timestamp = int(time.time())
    for v in tqdm(e.list_voters):
        for i, d in enumerate(v.delegatees):
            L.append((v.id, d.id, i+1))
        if v.vote is not None:
            C.append((v.id, 1))
        else:
            C.append((v.id, 0))
        
    np.savetxt("Save/edges_%s_%i.csv"%(title,timestamp), L)
    np.savetxt("Save/nodes_%s_%i.csv"%(title,timestamp), C)
    results.to_csv("Save/results_%s_%i.csv"%(title, timestamp))

In [16]:
def run_election(index):
    data= load_data(index)

    rules = [rd.rules.naive_BFD, rd.rules.minsum, rd.rules.naive_DFD, 
             rd.rules.diffusion, rd.rules.lexrank, rd.rules.minSumRank]

    rules_str = ["BFD", "MinSumPath", "DFD","Diffusion", "LexRank","MinArb"]
    metrics_str = ["Max Rank", "Max Length", "Avg Length","Max Power", "Max representation",
                "Power entropy", "Max Sum", "Sum of Rank", "Avg Rank", "Unpopularity"]

    seed = np.random.randint(100000)
    print(seed)
    np.random.seed(seed)
    n_rules = len(rules)
    tab_results = np.zeros((10, n_rules))
    e = create_election(data)

    for i, rule in tqdm(enumerate(rules)):
        e.attribute_gurus(rule)
        tab_results[0, i] += e.max_rank
        tab_results[1, i] += e.max_length
        tab_results[2, i] += e.mean_length
        tab_results[3, i] += e.max_power
        tab_results[4, i] += e.max_representation
        tab_results[5, i] += e.power_entropy
        tab_results[6, i] += e.max_sum
        tab_results[7, i] += e.sum_rank
        tab_results[8, i] += e.avg_rank
        tab_results[9, i] += e.unpopularity

    df = pd.DataFrame(tab_results.T, index=rules_str, columns=metrics_str)
    save_election(e, df, list_datasets[index]["name"])

run_election(0)

100%|██████████| 817035/817035 [00:02<00:00, 363436.37it/s]


15795


100%|██████████| 63732/63732 [00:01<00:00, 37095.55it/s]
100%|██████████| 63732/63732 [03:44<00:00, 284.16it/s]  
6it [19:41:22, 11813.73s/it]
100%|██████████| 63732/63732 [00:00<00:00, 105616.26it/s]
