In [None]:
# 2020 Democratic Primary Drop Out Predictions Scoring Formulas
# Author: Oliver Gladfelter
# Date: Aug 22, 2023

In [1]:
import pandas as pd
from collections import Counter
import statistics
pd.options.mode.chained_assignment = None  # default='warn'

df = pd.read_csv("../2024/leaderboard/data/submissions2024.csv")
df = df[['leaderboardAlias', 'prediction']]

# create a new dataframe with reformatted data, which will help answer some questions
long = df.copy()
long[[1,2,3,4,5,6,7,8,9,10,11,12,'x']] = long['prediction'].str.split(',',expand=True)
long = long.drop(['leaderboardAlias', 'prediction', 'x'], axis=1)

# another reformatted dataset
# this reformatted data will be necessary for answering certain questions
candidatePositions = {}
for row in long.index:
    for column in long.columns:
        if long[column][row] not in candidatePositions:
            candidatePositions[long[column][row]] = [int(column)]
        elif long[column][row] in candidatePositions:
            candidatePositions[long[column][row]].append(int(column))
candidatePositions = pd.DataFrame(candidatePositions)

# numParticipants
n = len(df)

## Calculate 'average' drop out position of each candidate

In [2]:
candidateScores = {}
for i in df.index:
    prediction = df['prediction'][i].split(",")[:-1]
    for i, candidate in enumerate(prediction):
        if candidate in candidateScores:
            candidateScores[candidate] += i
        else:
            candidateScores[candidate] = i

averagePredictions = [{'candidate': key, 'value': candidateScores[key] / len(df)} for key in candidateScores.keys()]
averagePredictions.sort(key=lambda x: x['value'])

averageOrder = [ap['candidate'] for ap in averagePredictions]

display(averagePredictions)
print("The 'Wisdom Of The Crowd' predicted drop out order is:")
display(averageOrder)

[{'candidate': 'hurd', 'value': 2.08974358974359},
 {'candidate': 'elder', 'value': 2.2564102564102564},
 {'candidate': 'suarez', 'value': 2.730769230769231},
 {'candidate': 'burgum', 'value': 3.3461538461538463},
 {'candidate': 'hutchinson', 'value': 3.3846153846153846},
 {'candidate': 'scott', 'value': 5.987179487179487},
 {'candidate': 'christie', 'value': 6.256410256410256},
 {'candidate': 'ramaswamy', 'value': 6.551282051282051},
 {'candidate': 'haley', 'value': 6.576923076923077},
 {'candidate': 'pence', 'value': 7.141025641025641},
 {'candidate': 'desantis', 'value': 9.076923076923077},
 {'candidate': 'trump', 'value': 10.602564102564102}]

The 'Wisdom Of The Crowd' predicted drop out order is:


['hurd',
 'elder',
 'suarez',
 'burgum',
 'hutchinson',
 'scott',
 'christie',
 'ramaswamy',
 'haley',
 'pence',
 'desantis',
 'trump']

In [3]:
# Add average answer to submissions
predictionStr = ''
for p in averageOrder:
    predictionStr += p + ','

aggregateRow = {'leaderboardAlias':['Wisdom of the crowd'], 'prediction':[predictionStr]}
aggregateRow = pd.DataFrame(aggregateRow)


df = df.append(aggregateRow, ignore_index=True)

df.to_csv("../2024/leaderboard/data/submissions2024.csv", index=False)

## Who are common picks for predicted first drop? Predicted winner? Etc

In [4]:
def perForPosition(dropPosition):
    display(long[dropPosition].value_counts().head(3) / n * 100)

perForPosition(1) # first drop
perForPosition(2) # second drop
perForPosition(3) # third drop
perForPosition(11) # last drop
perForPosition(12) # winner

suarez    24.358974
elder     21.794872
hurd      20.512821
Name: 1, dtype: float64

elder     23.076923
hurd      19.230769
suarez    17.948718
Name: 2, dtype: float64

hurd      24.358974
elder     15.384615
burgum    14.102564
Name: 3, dtype: float64

desantis     41.025641
ramaswamy    16.666667
trump        12.820513
Name: 11, dtype: float64

trump        82.051282
desantis     10.256410
ramaswamy     2.564103
Name: 12, dtype: float64

## What percent of predictions put Hurd / Elder / Suarez in the first three drops?

In [5]:
# predicts hurd or elder or suarez as first drop
print(len(long[(long[1] == 'hurd') | (long[1] == 'elder') | (long[1] == 'suarez')]) / n * 100)

# predicts various candidates as first three drops
print(len(candidatePositions[candidatePositions['hurd'] <= 3]) / n * 100)
print(len(candidatePositions[candidatePositions['elder'] <= 3]) / n * 100)
print(len(candidatePositions[candidatePositions['suarez'] <= 3]) / n * 100)

# how about all 3?
print(len(candidatePositions[((candidatePositions['hurd'] <= 3) & (candidatePositions['elder'] <= 3) & (candidatePositions['suarez'] <= 3))]) / n * 100)

66.66666666666666
64.1025641025641
60.256410256410255
56.41025641025641
19.230769230769234


In [8]:
print("% of respondents who think candidate will be one of the first three to drop out")
print(" ")

for candidate in candidatePositions.columns:
    print(candidate + ": " + str(len(candidatePositions[candidatePositions[candidate] <= 3]) / n * 100))

% of respondents who think candidate will be one of the first three to drop out
 
ramaswamy: 15.384615384615385
hurd: 64.1025641025641
hutchinson: 34.61538461538461
elder: 60.256410256410255
pence: 1.282051282051282
christie: 11.538461538461538
burgum: 39.743589743589745
scott: 11.538461538461538
haley: 3.8461538461538463
suarez: 56.41025641025641
trump: 1.282051282051282
desantis: 0.0


## Where do we see the most/least disagreement over candidates?

In [9]:
stdvs = []
for candidate in candidatePositions.columns:
    stdvs.append(statistics.stdev(candidatePositions[candidate]))
    
stdvs = pd.DataFrame({'candidate':candidatePositions.columns, 'stdv':stdvs})

# most agreement over Trump (going far), most disagreement on Ramaswamy (he's been positioned all over the place)
display(stdvs.sort_values('stdv'))

Unnamed: 0,candidate,stdv
10,trump,1.45352
11,desantis,1.64963
1,hurd,1.668547
4,pence,1.87758
2,hutchinson,2.007976
3,elder,2.079185
8,haley,2.147298
6,burgum,2.416539
7,scott,2.601167
5,christie,2.615816


# How did predictions fare?
### Hypothetical scenarios for now

In [21]:
# Will Hurd was the first candidate to drop out. Just 20% of contestants accurately predicted that. 
print(len(candidatePositions[candidatePositions['hurd'] == 1]) / len(df) * 100)

# Although many others thought he would drop out 2nd or 3rd
display(candidatePositions['hurd'].value_counts().head(3))

20.253164556962027


3    19
1    16
2    15
Name: hurd, dtype: int64

In [23]:
# With Tim Scott departing the race, just six candidates remain: Christie, Ramaswamy, Haley, Pence, DeSantis, and Trump
# just 12% of participants accurately predicted these individuals would be the final six candidates still running
print(len(candidatePositions[((candidatePositions['christie'] >= 7) & (candidatePositions['ramaswamy'] >= 7) & (candidatePositions['haley'] >= 7) & (candidatePositions['pence'] >= 7) & (candidatePositions['desantis'] >= 7) & (candidatePositions['trump'] >= 7))]) / n * 100)

11.538461538461538


In [29]:
# 33% predicted Haley would be in the final 4
print(len(candidatePositions[((candidatePositions['haley'] >= 9))]) / n * 100)

print(len(candidatePositions[((candidatePositions['pence'] >= 9))]) / n * 100)
print(len(candidatePositions[((candidatePositions['desantis'] >= 9))]) / n * 100)
print(len(candidatePositions[((candidatePositions['trump'] >= 9))]) / n * 100)

33.33333333333333
46.15384615384615
87.17948717948718
97.43589743589743
