In [None]:
# 2020 Democratic Primary Drop Out Predictions Scoring Formulas
# Author: Oliver Gladfelter
# Date: Aug 22, 2023

In [199]:
import pandas as pd
from collections import Counter
import statistics
pd.options.mode.chained_assignment = None  # default='warn'

df = pd.read_csv("../2024/leaderboard/data/submissions2024.csv")
df = df[['leaderboardAlias', 'prediction']]

# create a new dataframe with reformatted data, which will help answer some questions
long = df.copy()
long[[1,2,3,4,5,6,7,8,9,10,11,12,'x']] = long['prediction'].str.split(',',expand=True)
long = long.drop(['leaderboardAlias', 'prediction', 'x'], axis=1)

# another reformatted dataset
# this reformatted data will be necessary for answering certain questions
candidatePositions = {}
for row in long.index:
    for column in long.columns:
        if long[column][row] not in candidatePositions:
            candidatePositions[long[column][row]] = [int(column)]
        elif long[column][row] in candidatePositions:
            candidatePositions[long[column][row]].append(int(column))
candidatePositions = pd.DataFrame(candidatePositions)

# numParticipants
n = len(df)

## Calculate 'average' drop out position of each candidate

In [2]:
candidateScores = {}
for i in df.index:
    prediction = df['prediction'][i].split(",")[:-1]
    for i, candidate in enumerate(prediction):
        if candidate in candidateScores:
            candidateScores[candidate] += i
        else:
            candidateScores[candidate] = i

averagePredictions = [{'candidate': key, 'value': candidateScores[key] / len(df)} for key in candidateScores.keys()]
averagePredictions.sort(key=lambda x: x['value'])

averageOrder = [ap['candidate'] for ap in averagePredictions]

display(averagePredictions)
print("The 'Wisdom Of The Crowd' predicted drop out order is:")
display(averageOrder)

[{'candidate': 'hurd', 'value': 2.0816326530612246},
 {'candidate': 'elder', 'value': 2.142857142857143},
 {'candidate': 'suarez', 'value': 2.7755102040816326},
 {'candidate': 'hutchinson', 'value': 3.326530612244898},
 {'candidate': 'burgum', 'value': 3.5918367346938775},
 {'candidate': 'christie', 'value': 5.8979591836734695},
 {'candidate': 'scott', 'value': 6.122448979591836},
 {'candidate': 'haley', 'value': 6.63265306122449},
 {'candidate': 'ramaswamy', 'value': 6.673469387755102},
 {'candidate': 'pence', 'value': 6.857142857142857},
 {'candidate': 'desantis', 'value': 9.224489795918368},
 {'candidate': 'trump', 'value': 10.673469387755102}]

The 'Wisdom Of The Crowd' predicted drop out order is:


['hurd',
 'elder',
 'suarez',
 'hutchinson',
 'burgum',
 'christie',
 'scott',
 'haley',
 'ramaswamy',
 'pence',
 'desantis',
 'trump']

In [20]:
# Add average answer to submissions
predictionStr = ''
for p in averageOrder:
    predictionStr += p + ','

aggregateRow = {'leaderboardAlias':['Wisdom of the crowd'], 'prediction':[predictionStr]}
aggregateRow = pd.DataFrame(aggregateRow)


df = df.append(aggregateRow, ignore_index=True)

#df.to_csv("../2024/leaderboard/data/submissions2024.csv", index=False)

## Who are common picks for predicted first drop? Predicted winner? Etc

In [101]:
def perForPosition(dropPosition):
    display(long[dropPosition].value_counts().head(3) / n * 100)

perForPosition(1) # first drop
perForPosition(2) # second drop
perForPosition(3) # third drop
perForPosition(11) # last drop
perForPosition(12) # winner

hurd      26.0
elder     22.0
suarez    18.0
Name: 1, dtype: float64

suarez    22.0
elder     22.0
hurd      16.0
Name: 2, dtype: float64

hurd      20.0
elder     18.0
suarez    16.0
Name: 3, dtype: float64

desantis     48.0
ramaswamy    16.0
trump        12.0
Name: 11, dtype: float64

trump        82.0
desantis     10.0
ramaswamy     4.0
Name: 12, dtype: float64

## What percent of predictions put Hurd / Elder / Suarez in the first three drops?

In [221]:
# predicts hurd or elder or suarez as first drop
print(len(long[(long[1] == 'hurd') | (long[1] == 'elder') | (long[1] == 'suarez')]) / n * 100)

# predicts various candidates as first three drops
print(len(candidatePositions[candidatePositions['hurd'] <= 3]) / n * 100)
print(len(candidatePositions[candidatePositions['elder'] <= 3]) / n * 100)
print(len(candidatePositions[candidatePositions['suarez'] <= 3]) / n * 100)

# how about all 3?
print(len(candidatePositions[((candidatePositions['hurd'] <= 3) & (candidatePositions['elder'] <= 3) & (candidatePositions['suarez'] <= 3))]) / n * 100)

66.0
62.0
62.0
56.00000000000001
18.0


In [197]:
print("% of respondents who think candidate will be one of the first three to drop out")
print(" ")

for candidate in reformattedData.columns:
    print(candidate + ": " + str(len(reformattedData[reformattedData[candidate] <= 3]) / n * 100))

% of respondents who think candidate will be one of the first three to drop out
 
ramaswamy: 16.0
hurd: 62.0
hutchinson: 38.0
elder: 62.0
pence: 2.0
christie: 14.000000000000002
burgum: 36.0
scott: 10.0
haley: 4.0
suarez: 56.00000000000001
trump: 0.0
desantis: 0.0


## Where do we see the most/least disagreement over candidates?

In [219]:
stdvs = []
for candidate in reformattedData.columns:
    stdvs.append(statistics.stdev(reformattedData[candidate]))
    
stdvs = pd.DataFrame({'candidate':reformattedData.columns, 'stdv':stdvs})

# most agreement over Trump (going far), most disagreement on Ramaswamy (he's been positioned all over the place)
# also worth mentioning DeSantis, Hurd, Christie
display(stdvs.sort_values('stdv'))

Unnamed: 0,candidate,stdv
10,trump,0.957036
11,desantis,1.519398
1,hurd,1.725676
4,pence,1.897904
3,elder,1.944537
8,haley,2.057936
2,hutchinson,2.161254
9,suarez,2.420659
7,scott,2.504201
6,burgum,2.595129
