In [1]:
# Author: Oliver Gladfelter
# Date: Aug 22, 2023

In [1]:
import pandas as pd
from collections import Counter
import statistics
pd.options.mode.chained_assignment = None  # default='warn'

df = pd.read_csv("../2024/leaderboard/data/submissions2024.csv")
df = df[['leaderboardAlias', 'prediction']]

# create a new dataframe with reformatted data, which will help answer some questions
long = df.copy()
long[[1,2,3,4,5,6,7,8,9,10,11,12,'x']] = long['prediction'].str.split(',',expand=True)
long = long.drop(['leaderboardAlias', 'prediction', 'x'], axis=1)

# another reformatted dataset
# this reformatted data will be necessary for answering certain questions
candidatePositions = {}
for row in long.index:
    for column in long.columns:
        if long[column][row] not in candidatePositions:
            candidatePositions[long[column][row]] = [int(column)]
        elif long[column][row] in candidatePositions:
            candidatePositions[long[column][row]].append(int(column))
candidatePositions = pd.DataFrame(candidatePositions)

# numParticipants
n = len(df)

## Calculate 'average' drop out position of each candidate

In [2]:
candidateScores = {}
for i in df.index:
    prediction = df['prediction'][i].split(",")[:-1]
    for i, candidate in enumerate(prediction):
        if candidate in candidateScores:
            candidateScores[candidate] += i
        else:
            candidateScores[candidate] = i

averagePredictions = [{'candidate': key, 'value': candidateScores[key] / len(df)} for key in candidateScores.keys()]
averagePredictions.sort(key=lambda x: x['value'])

averageOrder = [ap['candidate'] for ap in averagePredictions]

display(averagePredictions)
print("The 'Wisdom Of The Crowd' predicted drop out order is:")
display(averageOrder)

[{'candidate': 'hurd', 'value': 2.0632911392405062},
 {'candidate': 'elder', 'value': 2.240506329113924},
 {'candidate': 'suarez', 'value': 2.721518987341772},
 {'candidate': 'burgum', 'value': 3.3417721518987342},
 {'candidate': 'hutchinson', 'value': 3.392405063291139},
 {'candidate': 'scott', 'value': 5.974683544303797},
 {'candidate': 'christie', 'value': 6.253164556962025},
 {'candidate': 'ramaswamy', 'value': 6.556962025316456},
 {'candidate': 'haley', 'value': 6.594936708860759},
 {'candidate': 'pence', 'value': 7.1645569620253164},
 {'candidate': 'desantis', 'value': 9.08860759493671},
 {'candidate': 'trump', 'value': 10.60759493670886}]

The 'Wisdom Of The Crowd' predicted drop out order is:


['hurd',
 'elder',
 'suarez',
 'burgum',
 'hutchinson',
 'scott',
 'christie',
 'ramaswamy',
 'haley',
 'pence',
 'desantis',
 'trump']

## Who are common picks for predicted first drop? Predicted winner? Etc

In [3]:
def perForPosition(dropPosition):
    display(long[dropPosition].value_counts().head(3) / n * 100)

perForPosition(1) # first drop
perForPosition(2) # second drop
perForPosition(3) # third drop
perForPosition(11) # last drop
perForPosition(12) # winner

suarez    24.050633
hurd      21.518987
elder     21.518987
Name: 1, dtype: float64

elder     24.050633
hurd      18.987342
suarez    17.721519
Name: 2, dtype: float64

hurd      24.050633
suarez    15.189873
elder     15.189873
Name: 3, dtype: float64

desantis     41.772152
ramaswamy    16.455696
trump        12.658228
Name: 11, dtype: float64

trump        82.278481
desantis     10.126582
ramaswamy     2.531646
Name: 12, dtype: float64

## What percent of predictions put Hurd / Elder / Suarez in the first three drops?

In [4]:
# predicts hurd or elder or suarez as first drop
print(len(long[(long[1] == 'hurd') | (long[1] == 'elder') | (long[1] == 'suarez')]) / n * 100)

# predicts various candidates as first three drops
print(len(candidatePositions[candidatePositions['hurd'] <= 3]) / n * 100)
print(len(candidatePositions[candidatePositions['elder'] <= 3]) / n * 100)
print(len(candidatePositions[candidatePositions['suarez'] <= 3]) / n * 100)

# how about all 3?
print(len(candidatePositions[((candidatePositions['hurd'] <= 3) & (candidatePositions['elder'] <= 3) & (candidatePositions['suarez'] <= 3))]) / n * 100)

67.08860759493672
64.55696202531645
60.75949367088608
56.9620253164557
20.253164556962027


In [5]:
print("% of respondents who think candidate will be one of the first three to drop out")
print(" ")

for candidate in candidatePositions.columns:
    print(candidate + ": " + str(len(candidatePositions[candidatePositions[candidate] <= 3]) / n * 100))

% of respondents who think candidate will be one of the first three to drop out
 
ramaswamy: 15.18987341772152
hurd: 64.55696202531645
hutchinson: 34.177215189873415
elder: 60.75949367088608
pence: 1.2658227848101267
christie: 11.39240506329114
burgum: 39.24050632911392
scott: 11.39240506329114
haley: 3.79746835443038
suarez: 56.9620253164557
trump: 1.2658227848101267
desantis: 0.0


## Where do we see the most/least disagreement over candidates?

In [6]:
stdvs = []
for candidate in candidatePositions.columns:
    stdvs.append(statistics.stdev(candidatePositions[candidate]))
    
stdvs = pd.DataFrame({'candidate':candidatePositions.columns, 'stdv':stdvs})

# most agreement over Trump (going far), most disagreement on Ramaswamy (he's been positioned all over the place)
display(stdvs.sort_values('stdv'))

Unnamed: 0,candidate,stdv
10,trump,1.444865
11,desantis,1.642308
1,hurd,1.674406
4,pence,1.877194
2,hutchinson,1.996264
3,elder,2.070645
8,haley,2.139488
6,burgum,2.401314
7,scott,2.586824
5,christie,2.599153


## Who do we expect to drop next?

In [4]:
from collections import Counter


# before Asa dropped
dropOutOrder = ['suarez', 'hurd', 'elder','pence','scott', 'burgum', 'christie', 'ramaswamy']
whoNext = []

for i in df.index:
    prediction = df['prediction'][i].split(",")[:-1]
    for i, candidate in enumerate(prediction):
        if candidate not in dropOutOrder:
            whoNext.append(candidate)
            break

counts = Counter(whoNext)
display(counts)

print(counts['hutchinson'] / len(df))
print(counts['desantis'] / len(df))
print(counts['haley'] / len(df))
print(counts['trump'] / len(df))

print(" ")

# after Asa dropped
dropOutOrder = ['suarez', 'hurd', 'elder','pence','scott', 'burgum', 'christie', 'ramaswamy', 'hutchinson']
whoNext = []

for i in df.index:
    prediction = df['prediction'][i].split(",")[:-1]
    for i, candidate in enumerate(prediction):
        if candidate not in dropOutOrder:
            whoNext.append(candidate)
            break

counts = Counter(whoNext)
display(counts)

print(counts['desantis'] / len(df))
print(counts['haley'] / len(df))
print(counts['trump'] / len(df))

Counter({'hutchinson': 66, 'haley': 9, 'desantis': 2, 'trump': 2})

0.8354430379746836
0.02531645569620253
0.11392405063291139
0.02531645569620253
 


Counter({'haley': 69, 'desantis': 8, 'trump': 2})

0.10126582278481013
0.8734177215189873
0.02531645569620253


# How did predictions fare?

In [10]:
# Suarez was the first candidate to drop out. 24% of contestants accurately predicted that. 
print(len(candidatePositions[candidatePositions['suarez'] == 1]) / len(df) * 100)

# Although many others thought he would drop out 2nd or 3rd
display(candidatePositions['suarez'].value_counts().head(3))

# 57% predicted he'd drop out in the first 3
print(len(candidatePositions[candidatePositions['suarez'] <= 3]) / len(df) * 100)

24.050632911392405


1    19
2    14
3    12
Name: suarez, dtype: int64

56.9620253164557


In [5]:
# Hurd was the second candidate to drop out. 19% of contestants accurately predicted that. 
print(len(candidatePositions[candidatePositions['hurd'] == 2]) / len(df) * 100)

# Although many others thought he would drop out first three
display(candidatePositions['hurd'].value_counts().head(3))

# 64% predicted he'd drop out in the first 3
print(len(candidatePositions[candidatePositions['hurd'] <= 3]) / len(df) * 100)

18.9873417721519


3    19
1    17
2    15
Name: hurd, dtype: int64

64.55696202531645


In [15]:
# 48% predicted he'd drop out 4th, 5th, or 6th
print(len(candidatePositions[(candidatePositions['scott'] >= 5) & (candidatePositions['scott'] <= 8)]) / len(df) * 100)

48.10126582278481


In [23]:
# With Tim Scott departing the race, just six candidates remain: Christie, Ramaswamy, Haley, Pence, DeSantis, and Trump
# just 12% of participants accurately predicted these individuals would be the final six candidates still running
print(len(candidatePositions[((candidatePositions['christie'] >= 7) & (candidatePositions['ramaswamy'] >= 7) & (candidatePositions['haley'] >= 7) & (candidatePositions['pence'] >= 7) & (candidatePositions['desantis'] >= 7) & (candidatePositions['trump'] >= 7))]) / n * 100)

11.538461538461538


In [3]:
# 34% predicted Haley would be in the final 4
print(len(candidatePositions[((candidatePositions['haley'] >= 9))]) / n * 100)
print(len(candidatePositions[((candidatePositions['pence'] >= 9))]) / n * 100)
print(len(candidatePositions[((candidatePositions['desantis'] >= 9))]) / n * 100)
print(len(candidatePositions[((candidatePositions['trump'] >= 9))]) / n * 100)

34.177215189873415
46.835443037974684
87.34177215189874
97.46835443037975


In [6]:
print(len(candidatePositions[((candidatePositions['haley'] >= 10))]) / n * 100)

20.253164556962027
