In [101]:
import pandas as pd
import re
import urllib

In [210]:
def from_preflib(path):
    """
    Converts Preflib Data
    """
    file = urllib.request.urlopen(path)
    arr = file.read().decode('utf-8').split("\n")
    _ = path.split(".")[-1]

    output = []
    nodes = int(arr[0])
    for index, line in enumerate(arr):
        output.append(line.split(",", 1))
            
    df = pd.DataFrame(output).dropna()
    
    df_nodes = df.head(nodes).copy()
    df_nodes = df_nodes.rename(columns={0: "node_id", 1: "node_name"})
    df_edges = df[nodes+1:df.shape[0]].copy()

    if _ in ["toc"]:
        cols = ["winners", "losers"]
        df_edges[cols] = df_edges.apply(
            lambda x: [item for item in re.split(r",\{(.*?)\}", x[1])[:2]],
            axis=1,
            result_type="expand"
        )
        for col in cols:
            df_edges[col] = df_edges[col].str.replace("{", "", regex=False).str.replace("}", "", regex=False)
            
    elif _ in ["soc"]:
        df_edges = df_edges.rename(columns={1: "rank"})

    else:
        cols = ["source", "destination"]
        df_edges[cols] = df_edges[1].str.split(",", expand=True)

    df_edges = df_edges.rename(columns={0: "voters"})
    df_edges = df_edges.drop(columns=[1], errors="ignore")

    return df_nodes, df_edges

In [178]:
pd.__version__

'1.3.5'

In [211]:
nodes, edges = from_preflib("https://www.preflib.org/static/data/ED/cleanweb/ED-00015-00000001.soc")
edges

Unnamed: 0,voters,rank
242,1,"1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,1..."
243,1,"11,14,16,2,10,1,4,8,26,6,5,33,41,12,53,3,7,77,..."
244,1,"114,8,94,26,33,17,14,7,64,173,4,10,11,60,41,9,..."
245,1,"1,2,33,6,4,5,7,14,12,16,8,64,11,9,17,18,20,28,..."


Unnamed: 0,weight,1
18,13,"{},{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}"
19,13,"6,{1,2,3,4,5,7,8,9,10,11,12,13,14,15,16}"
20,10,"{9,10},{1,2,3,4,5,6,7,8,11,12,13,14,15,16}"
21,10,"{1,6},{2,3,4,5,7,8,9,10,11,12,13,14,15,16}"
22,9,"5,{1,2,3,4,6,7,8,9,10,11,12,13,14,15,16}"
...,...,...
229,1,"{1,14},{2,3,4,5,6,7,8,9,10,11,12,13,15,16}"
230,1,"{8,12,15},{1,2,3,4,5,6,7,9,10,11,13,14,16}"
231,1,"{6,13,14},{1,2,3,4,5,7,8,9,10,11,12,15,16}"
232,1,"{1,4,5,14},{2,3,6,7,8,9,10,11,12,13,15,16}"


In [67]:
file = open("/Users/cnavarreteliz/Downloads/soi/ED-00001-00000001.soi", "r")
arr = file.read().split("\n")

output = []
lines = int(arr[0])
for index, line in enumerate(arr):
    if index > (lines + 1):
        output.append(line.split(",", 1))

In [68]:
df = pd.DataFrame(output, columns=["voters", "rank"]).dropna()
df["voters"] = df["voters"].astype(int)
df["rank"] = df["rank"].apply(lambda x: x.split(","))

In [69]:
df.head()

Unnamed: 0,voters,rank
0,800,"[12, 6, 4]"
1,680,"[4, 6, 12]"
2,506,"[6, 12, 4]"
3,486,"[12, 4, 6]"
4,429,"[6, 4, 12]"


In [3]:
from comchoice.voting import Voting
from comchoice.pairwise import Pairwise

In [4]:
choice = Voting(df)

NameError: name 'df' is not defined

In [72]:
choice.borda()

Unnamed: 0,candidate,value,rank
1,10,263296,1
11,9,229007,2
6,4,204631,3
8,6,200336,4
3,12,194830,5
4,2,185176,6
9,7,159550,7
0,1,113340,8
7,5,85342,9
5,3,69427,10


In [73]:
choice.plurality()

Unnamed: 0,candidate,value,rank
1,10,7294,1
11,9,6359,2
6,4,5892,3
3,12,5658,4
4,2,5501,5
8,6,5253,6
9,7,4012,7
5,3,1350,8
0,1,1177,9
7,5,914,10


In [74]:
choice.copeland()

Unnamed: 0,candidate,value,rank
1,6,1.0,1
2,4,0.818182,2
3,10,0.818182,3
5,9,0.818182,4
0,12,0.636364,5
4,2,0.545455,6
6,7,0.454545,7
7,1,0.363636,8
9,5,0.272727,9
8,3,0.181818,10


In [42]:
from itertools import combinations

In [47]:
output = []
for i, row in df.iterrows():
    items = list(combinations(row["rank"].split(">"), 2))
    for item in items:
        output.append({
            "voters": row["voters"],
            "winner": item[0],
            "loser": item[1]
        })
    

In [49]:
df1 = pd.DataFrame(output)
df1 = df1.groupby(["winner", "loser"]).agg({"voters": "sum"}).reset_index()
df1

Unnamed: 0,winner,loser,voters
0,1,10,5139
1,1,11,3228
2,1,12,6328
3,1,13,11691
4,1,14,5462
...,...,...,...
177,9,4,1813
178,9,5,2028
179,9,6,2183
180,9,7,2512
