In [4]:
import pandas as pd

# Import Data

Test data for quetions 1 to 4

In [5]:
table = pd.read_csv('preference.csv')
test = pd.read_csv('test.csv')

Data for question 5 generated in the 'data_generation.ipynb'

In [6]:
ex5_data = pd.read_csv('data_question5.csv')

# Utils

In [7]:
def count(df, only1st=False):
    columns = [c for c in df.columns if c != 'Noms']

    counts = []

    for col in columns:
        if only1st:
            nb = df[col] == 1
            counts.append(nb.sum())
        else:
            nb = df[col].sum()
            counts.append(nb)

    out = pd.DataFrame({
        "Candidat": columns,
        "Count": counts
    })

    out = out.sort_values("Count", ascending=False).reset_index(drop=True)

    return out

c = count(test, only1st=False)
c.head()


Unnamed: 0,Candidat,Count
0,d,78
1,a,67
2,c,65
3,b,60


# Voting Methods

### Plurality voting

In [8]:
def plurality(df):
    df = count(df, only1st=True)
    df = df.sort_values("Count", ascending=False).reset_index(drop=True)
    print(df.iloc[0, 0] + " wins with " + str(df.iloc[0, 1]) + " votes.")

    return df

In [9]:
test_plurality = plurality(test)
test_plurality.head()

d wins with 10 votes.


Unnamed: 0,Candidat,Count
0,d,10
1,a,9
2,c,8
3,b,0


### Plurality with Runoff

In [10]:
ts = test.copy()
tb = table.copy()

In [11]:
def plurality_runoff(df):
    first_round = count(df, only1st=True)
    top2 = first_round.head(2)['Candidat'].tolist()

    df2 = df[['Noms'] + top2]

    for i in range(len(df2)):
        row = df2.iloc[i]
        if row[top2[0]] > row[top2[1]]:
            df2.at[i, top2[0]] = 0
            df2.at[i, top2[1]] = 1
        else:
            df2.at[i, top2[1]] = 0
            df2.at[i, top2[0]] = 1

    runoff_counts = count(df2, only1st=True)
    print(runoff_counts.iloc[0, 0] + " wins with " + str(runoff_counts.iloc[0, 1]) + " votes.")
    
    return runoff_counts

runoff_test = plurality_runoff(ts)
runoff_test.head()

a wins with 17 votes.


Unnamed: 0,Candidat,Count
0,a,17
1,d,10


## Condorcet voting

In [12]:
# Condorcet voting
def condorcet(df):
    candidates = [c for c in df.columns if c != 'Noms']
    n = len(candidates)

    wins = {c: 0 for c in candidates}

    for i in range(n):
        for j in range(i + 1, n):
            c1 = candidates[i]
            c2 = candidates[j]

            c1_wins = 0
            c2_wins = 0

            for k in range(len(df)):
                row = df.iloc[k]
                if row[c1] < row[c2]:
                    c1_wins += 1
                elif row[c2] < row[c1]:
                    c2_wins += 1

            if c1_wins > c2_wins:
                wins[c1] += 1
            elif c2_wins > c1_wins:
                wins[c2] += 1

    out = pd.DataFrame({
        "Candidat": list(wins.keys()),
        "Wins": list(wins.values())
    })

    out = out.sort_values("Wins", ascending=False).reset_index(drop=True)

    if out.iloc[0, 1] == n - 1:
        print(out.iloc[0, 0] + " is the Condorcet winner with " + str(out.iloc[0, 1]) + " wins.")
    else:
        print("No Condorcet winner.")

    return out

condorcet_test = condorcet(ts)
condorcet_test.head()

b is the Condorcet winner with 3 wins.


Unnamed: 0,Candidat,Wins
0,b,3
1,c,2
2,a,1
3,d,0


### Borda Voting

In [13]:
def bordaVoting(df):
    df = count(df, only1st=False)
    df = df.sort_values("Count", ascending=True).reset_index(drop=True)
    print(df.iloc[0, 0] + " wins with a Borda score of " + str(df.iloc[0, 1]) + ".")

    return df

borda_test = bordaVoting(ts)
borda_test.head()

b wins with a Borda score of 60.


Unnamed: 0,Candidat,Count
0,b,60
1,c,65
2,a,67
3,d,78


# Ex5

### Votation test

In [14]:
plurality_ex5 = plurality(ex5_data)
plurality_runoff_ex5 = plurality_runoff(ex5_data)
condorcet_ex5 = condorcet(ex5_data)
borda_ex5 = bordaVoting(ex5_data)

A wins with 5 votes.
A wins with 6 votes.
A is the Condorcet winner with 7 wins.
A wins with a Borda score of 15.


In [16]:
def test_best_worst_limits(df, max_best=0.5, max_worst=0.4):
    candidates = [c for c in df.columns if c != 'Noms']
    n = len(df)
    if n == 0:
        return {
            "best_ok": True,
            "worst_ok": True,
            "best_fracs": {},
            "worst_fracs": {},
            "best_violations": {},
            "worst_violations": {}
        }

    bests = df[candidates].idxmin(axis=1)   # column with smallest rank (best)
    worsts = df[candidates].idxmax(axis=1)  # column with largest rank (worst)

    best_counts = bests.value_counts().reindex(candidates, fill_value=0)
    worst_counts = worsts.value_counts().reindex(candidates, fill_value=0)

    best_fracs = (best_counts / n).to_dict()
    worst_fracs = (worst_counts / n).to_dict()

    best_violations = {c: f for c, f in best_fracs.items() if f > max_best}
    worst_violations = {c: f for c, f in worst_fracs.items() if f > max_worst}

    result = {
        "best_ok": len(best_violations) == 0,
        "worst_ok": len(worst_violations) == 0,
        "best_fracs": best_fracs,
        "worst_fracs": worst_fracs,
        "best_violations": best_violations,
        "worst_violations": worst_violations
    }

    if result["best_ok"] and result["worst_ok"]:
        print("OK: best and worst distributions within limits.")
    else:
        if not result["best_ok"]:
            print("Best-limit violations:", best_violations)
        if not result["worst_ok"]:
            print("Worst-limit violations:", worst_violations)

    return result

test_limits = test_best_worst_limits(ex5_data, max_best=0.5, max_worst=0.4)

OK: best and worst distributions within limits.
