In [8]:
import numpy as np
import pandas as pd
import networkx as nx

In [46]:
df = pd.DataFrame([
    (1, 1, 2, 1),
    (1, 1, 3, 1),
    (1, 2, 3, 2)
], columns=["voter", "alternative_a", "alternative_b", "selected"])
df

Unnamed: 0,voter,alternative_a,alternative_b,selected
0,1,1,2,1
1,1,1,3,1
2,1,2,3,2


In [None]:
def __transform(
    data,
    ballot="rank",
    delimiter=">",
    rmv=[],
    score_delimiter="=",
    to_pairwise=False,
    unique_id=False,
    **kws
) -> pd.DataFrame:
    """Transforms a DataFrame into a machine-friendly DataFrame.

    Parameters
    ----------
    data : pd.DataFrame
        A pandas DataFrame.
    delimiter : str, optional
        Whether alternatives are separated in the column, by default ">"
    ballot : {"rank", "score"}, optional
        DataFrame format. Values accepted are "rank" and "score", by default "rank"
    rmv : list, optional
        Remove alternatives from list, before calculating ranking, by default []
    score_delimiter : str, optional
        In case of ballot = "score", defines how alternative and score are separated, by default "="
    unique_id : bool, optional
        Returns internal unique_id generated in the function to convert the DataFrame, by default False

    Returns
    -------
    pd.DataFrame
        A transformed DataFrame.
    """
    df = data.copy()
    df["_id"] = range(df.shape[0])

    if ballot == "rank":
        df["rank"] = df["rank"].str.split(delimiter)
        df = df.explode("rank")
        df = df.rename(columns={"rank": "alternative"})

        if len(rmv) > 0:
            df = df[~df["alternative"].isin(rmv)].copy()

        # TODO: Allow ties in ballots.
        df["rank"] = df.groupby("_id").cumcount() + 1

    elif ballot == "score":
        df["alternative"] = df["ballot"].str.split(delimiter)
        df = df.explode("alternative")
        df[["alternative", "score"]] = df["alternative"].str.split(
            score_delimiter, n=1, expand=True)
        df["score"] = df["score"].astype(float)
        df = df.drop(columns=["ballot"])

    if to_pairwise:
        df = to_pw(
            voter="_id"
        )

    if not unique_id:
        df = df.drop(columns=["_id"])

    return df


In [49]:
def to_ballot(
    df,
    alternative_a="alternative_a",
    alternative_b="alternative_b",
    ballot="ballot",
    delimiter = ">",
    dtype="pairwise",
    score="score",
    score_delimiter="=",
    selected="selected",
    voter="voter"
):
    df["_id"] = range(df.shape[0])

    if dtype == "pairwise":
        output = []
        for v, tmp in df.groupby(voter):
            l = tmp.apply(lambda x: \
                    (x[alternative_a], x[alternative_b]) if x[alternative_a] == x[selected] else  (x[alternative_b], x[alternative_a]),
                    axis=1
                )

            DG = nx.DiGraph(list(l))
            TR = nx.transitive_reduction(DG)
            edges = list(TR.edges)

            chain = edges[0]
            for i in range(0, len(edges) - 1):
                b = edges[i + 1]
                if chain[len(chain) - 1] == b[0]:
                    chain = ((*chain, b[1]))

            output.append((v, (delimiter).join(map(str, chain))))

        return pd.DataFrame(output, columns=[voter, "ballot"])
    

    elif dtype == "score":
        df["alternative"] = df[ballot].str.split(delimiter)
        df = df.explode("alternative")

        df[["alternative", score]] = df["alternative"].str.split(
            score_delimiter, n=1, expand=True)
        df[score] = df[score].astype(float)
        df = df.drop(columns=[ballot])
        
        return 

to_ballot(df)

Unnamed: 0,voter,ballot
0,1,1>2>3


In [44]:
DG = nx.DiGraph([(1, 2), (2, 3), (3, 4), (1, 3), (1, 4), (2, 4)])
TR = nx.transitive_reduction(DG)
edges = list(TR.edges)
delimiter = ">"

chain = edges[0]
for i in range(0, len(edges) - 1):
    b = edges[i + 1]
    if chain[len(chain) - 1] == b[0]:
        chain = ((*chain, b[1]))

(delimiter).join(map(str, chain))

'1>2>3>4'

In [104]:
df1 = pd.DataFrame([
    (1, "A=1;B=1;C=3"),
    (2, "A=3;B=2;C=4"),
    (3, "A=2;B=1;C=5")
], columns=["voter", "ballot"])
df1

def score_extend(
    df,
    delimiter=";",
    ballot="ballot",
    delimiter_score="=",
    unique_id=False,
    ascending=False
):
    df["_id"] = range(df.shape[0])
    
    df["alternative"] = df[ballot].str.split(delimiter)
    df = df.explode("alternative")

    df[["alternative", ballot]] = df["alternative"].str.split(
        delimiter_score, n=1, expand=True)
    
    df[ballot] = df[ballot].astype(float)
    if (df[ballot] % 1  == 0).all():
        df[ballot] = df[ballot].astype(int)
        
    df["rank"] = df.groupby("_id")[ballot].rank(method="min", ascending=ascending).astype(int)
    
    if not unique_id:
        df = df.drop(columns=["_id"])

    return df

score_extend(df1)

Unnamed: 0,voter,ballot,alternative,rank
0,1,1,A,2
0,1,1,B,2
0,1,3,C,1
1,2,3,A,2
1,2,2,B,3
1,2,4,C,1
2,3,2,A,2
2,3,1,B,3
2,3,5,C,1


In [108]:
def transform(
    df,
    dtype_from="ballot",
    dtype_to="ballot_extended",
    alternative_a="alternative_a",
    alternative_b="alternative_b",
    selected="selected",
    voter="voter",
    ballot="ballot",
    delimiter=">",
    delimiter_ties="=",
    delimiter_score="=",
    rmv=[],
    unique_id=False,
    ascending=False
):
    if dtype_from == "ballot" and dtype_to == "ballot_extended":
        return ballot_extend(
            df,
            ballot=ballot,
            delimiter=delimiter,
            delimiter_ties=delimiter_ties,
            rmv=rmv,
            unique_id=unique_id
        )
    
    elif dtype_from == "score" and dtype_to == "score_extended":
        return score_extend(
            df,
            delimiter=delimiter,
            ballot=ballot,
            delimiter_score=delimiter_score,
            unique_id=unique_id,
            ascending=ascending
        )
    
    elif dtype_from == "pairwise":
        df = to_ballot(
            df,
            ballot=ballot,
            delimiter=delimiter,
            dtype=dtype_from,
#             score="score",
            delimiter_score=delimiter_score,
            selected=selected,
            voter=voter
        )
        if dtype_to == "ballot_extended":
            return ballot_extend(
                df,
                ballot=ballot,
                delimiter=delimiter,
                delimiter_ties=delimiter_ties,
                rmv=rmv,
                unique_id=unique_id
            )
        return df
    
    elif dtype_from in ["ballot", "score"] and dtype_to == "pairwise":
        dtype_a, dtype_b = dtype_from.split("_")
        return to_pairwise(
            df,
            alternative=alternative,
            ascending=ascending,
            delimiter=delimiter,
            alternative_a=alternative_a,
            alternative_b=alternative_b,
            selected=selected,
            ballot=ballot,
            value=value,
            voter=voter,
            voters=voters,
            dtype=dtype_a,
            verbose=True
        )

In [89]:
df1 = pd.DataFrame([
    (1, "A>B=C>D=E"),
    (2, "A>C>B>D>E"),
    (3, "C=E>A>B>D")
], columns=["voters", "ballot"])

def ballot_extend(
    df,
    ballot="ballot",
    delimiter=">",
    delimiter_ties="=",
    rmv=[],
    unique_id=False
):
    df["_id"] = range(df.shape[0])
    

    df[ballot] = df[ballot].str.split(delimiter)
    df = df.explode(ballot)
    df = df.rename(columns={ballot: "alternative"})

    if len(rmv) > 0:
        df = df[~df["alternative"].isin(rmv)].copy()

    df["rank_a"] = df.groupby("_id").cumcount() + 1
    df["alternative"] = df["alternative"].str.split(delimiter_ties)
    df["rank_b"] = df["alternative"].map(len)
    df["rank_b"] = df.groupby("_id")["rank_b"].cumsum()
    
    df["rank"] = df.apply(lambda x: x["rank_a"] if len(x["alternative"]) > 1 else x["rank_b"], axis=1)
    
#     display(df)
    
    df = df.explode("alternative")
    
    df = df.drop(columns=["rank_a", "rank_b"])

    if not unique_id:
        df = df.drop(columns=["_id"])

    return df

ballot_extend(df1)

Unnamed: 0,voters,alternative,rank
0,1,A,1
0,1,B,2
0,1,C,2
0,1,D,3
0,1,E,3
1,2,A,1
1,2,C,2
1,2,B,3
1,2,D,4
1,2,E,5
