In [11]:
import json
import os
import numpy as np
from collections import Counter
from datetime import datetime as dt
from datetime import timedelta
import copy
import pickle
import bz2
import pandas as pd
from scipy.stats import rankdata  
import pickle
import bz2


In [12]:
#Load raw rankings (in JSON format)
aau_path = os.fsencode("/Users/cameron/Documents/SMU_DS/Capstone/SMU_Capstone_Project/Historical_Rankings.txt/Ranking_AAU.txt")
bvne_path = os.fsencode("/Users/cameron/Documents/SMU_DS/Capstone/SMU_Capstone_Project/Historical_Rankings.txt/Ranking_BVNE.txt")
p1440_path = os.fsencode("/Users/cameron/Documents/SMU_DS/Capstone/SMU_Capstone_Project/Historical_Rankings.txt/Ranking_p1440.txt")

file = open(aau_path)
aau_raw_rankings=json.load(file)
file.close()

file = open(bvne_path)
bvne_raw_rankings=json.load(file)
file.close()

file = open(p1440_path)
p1440_raw_rankings=json.load(file)
file.close()

In [13]:
#Example of single ranking set on specific date
aau_raw_rankings[0]

{'AsOf': '2019-01-07T00:00:00',
 'Players': [{'PlayerProfileId': 123, 'Points': 1119.0},
  {'PlayerProfileId': 124, 'Points': 618.0},
  {'PlayerProfileId': 125, 'Points': 1494.7},
  {'PlayerProfileId': 127, 'Points': 490.0},
  {'PlayerProfileId': 128, 'Points': 120.0},
  {'PlayerProfileId': 129, 'Points': 765.0},
  {'PlayerProfileId': 130, 'Points': 255.0},
  {'PlayerProfileId': 131, 'Points': 1570.0},
  {'PlayerProfileId': 132, 'Points': 1785.0},
  {'PlayerProfileId': 133, 'Points': 340.5},
  {'PlayerProfileId': 134, 'Points': 150.0},
  {'PlayerProfileId': 135, 'Points': 30.0},
  {'PlayerProfileId': 136, 'Points': 30.0},
  {'PlayerProfileId': 137, 'Points': 3558.1},
  {'PlayerProfileId': 139, 'Points': 5.0},
  {'PlayerProfileId': 140, 'Points': 5.0},
  {'PlayerProfileId': 141, 'Points': 1564.0},
  {'PlayerProfileId': 142, 'Points': 1755.0},
  {'PlayerProfileId': 143, 'Points': 1368.0},
  {'PlayerProfileId': 144, 'Points': 1320.0},
  {'PlayerProfileId': 146, 'Points': 475.0},
  {'Playe

In [14]:
#Verify dates are the same for all rankings,check the amount of rankings available, and the min/max dates in rankings
print(
    [x['AsOf'] for x in  aau_raw_rankings]==[x['AsOf'] for x in  bvne_raw_rankings], #aau vs bvne date list comparison
    [x['AsOf'] for x in  aau_raw_rankings]==[x['AsOf'] for x in  p1440_raw_rankings], #aau vs p1440 date list comparison
    len(aau_raw_rankings),
    aau_raw_rankings[0]['AsOf'],
    aau_raw_rankings[-1]['AsOf'],sep='\n'
)

True
True
182
2019-01-07T00:00:00
2022-06-27T00:00:00


In [15]:
#Convert date strings to datetime format and store in date list
aau_dt_holder=sorted([dt.strptime(x.get('AsOf'),'%Y-%m-%dT%H:%M:%S') for x in aau_raw_rankings])
bvne_dt_holder=sorted([dt.strptime(x.get('AsOf'),'%Y-%m-%dT%H:%M:%S') for x in bvne_raw_rankings])
p1440_dt_holder=sorted([dt.strptime(x.get('AsOf'),'%Y-%m-%dT%H:%M:%S') for x in p1440_raw_rankings])

aau_dt_holder[0:5]

[datetime.datetime(2019, 1, 7, 0, 0),
 datetime.datetime(2019, 1, 14, 0, 0),
 datetime.datetime(2019, 1, 21, 0, 0),
 datetime.datetime(2019, 1, 28, 0, 0),
 datetime.datetime(2019, 2, 4, 0, 0)]

In [16]:
#Create new ranking dictionaries based on points earned and another by position relative to competitors
#Only creating Keys based on dates provided, values will be added below
#IMPORTANT: Higher points is better while lower position is better
aau_point_rankings={k:[] for k in aau_dt_holder}
bvne_point_rankings={k:[] for k in bvne_dt_holder}
p1440_point_rankings={k:[] for k in p1440_dt_holder}

aau_positional_rankings={k:[] for k in aau_dt_holder}
bvne_positional_rankings={k:[] for k in bvne_dt_holder}
p1440_positional_rankings={k:[] for k in p1440_dt_holder}

In [17]:
#Function to fill values in dictionary for point based dictionary
#Values are a dictionary of every player:point total
#Function will update the third parameter (new_dict) in place and requires no return
#This format is used over original format from JSON file that had similar information for optimized look up speed
def fill_point_rankings(dt_holder,raw_rankings,new_dict):
    for i in dt_holder:
        dict_holder={}
        for j in raw_rankings:
            if i==dt.strptime(j.get('AsOf'),'%Y-%m-%dT%H:%M:%S'):
                for k in j.get('Players'):
                    dict_holder[k.get('PlayerProfileId')]=k.get('Points')
                new_dict[i]=dict_holder
                
#Update point rankings
fill_point_rankings(aau_dt_holder,aau_raw_rankings,aau_point_rankings)
fill_point_rankings(bvne_dt_holder,bvne_raw_rankings,bvne_point_rankings)
fill_point_rankings(p1440_dt_holder,p1440_raw_rankings,p1440_point_rankings)

In [18]:
#Function to fill values in dictionary for positional based dictionary
#Values are a dictionary of every player:relative position (lower is better)
#Function will update the third parameter (new_dict) in place and requires no return
#This format is used over original format from JSON file that had similar information for optimized look up speed
def fill_positional_rankings(dt_holder,raw_rankings,new_dict):
    for i in dt_holder:
        dict_holder={}
        for j in raw_rankings:
            if i==dt.strptime(j.get('AsOf'),'%Y-%m-%dT%H:%M:%S'):
                for k in j.get('Players'):
                    dict_holder[k.get('PlayerProfileId')]=k.get('Points')
                ratings={key: val for key, val in sorted(dict_holder.items(), key=lambda item: item[1],reverse=True)}
                positional_dict_holder=dict(zip(ratings.keys(), rankdata([-i for i in ratings.values()], method='average')))
                new_dict[i]=positional_dict_holder
                
#Update positional rankings       
fill_positional_rankings(aau_dt_holder,aau_raw_rankings,aau_positional_rankings)
fill_positional_rankings(bvne_dt_holder,bvne_raw_rankings,bvne_positional_rankings)
fill_positional_rankings(p1440_dt_holder,p1440_raw_rankings,p1440_positional_rankings)

In [19]:
#Bring in all match data provided
with open('/Users/cameron/Documents/SMU_DS/Capstone/SMU_Capstone_Project/Pickle_Files/vball_game_data.pbz2', 'rb') as file:
    uncompressed = bz2.BZ2File(file)
    vball = pickle.load(uncompressed)

with open('/Users/cameron/Documents/SMU_DS/Capstone/SMU_Capstone_Project/Pickle_Files/new_vball_game_data_alt.pbz2', 'rb') as file:
    uncompressed = bz2.BZ2File(file)
    vball_new = pickle.load(uncompressed)
    
vball_new_clean=vball_new[(vball_new['datetime'] >= max(vball['datetime']))]
vball_all=np.concatenate((vball, vball_new_clean))

In [20]:
#Column names reminder
vball_all.dtype

dtype([('tournamentId', '<i4'), ('tournament_name', '<U200'), ('type', '<U200'), ('division', '<U200'), ('divisionId', '<U200'), ('gender', '<U200'), ('age_type', '<U200'), ('match_id', '<i4'), ('roundNumber', '<i4'), ('matchNumber', '<i4'), ('matchWinner', '<U200'), ('series_number', '<i4'), ('match_type', '<U200'), ('isMatch', '?'), ('player_id', '<i4'), ('teammate_id', '<i4'), ('team_id', '<U200'), ('opponent1_id', '<i4'), ('opponent2_id', '<i4'), ('opponent_team_id', '<U200'), ('win', '?'), ('ignored_score', '?'), ('team_score', '<i4'), ('opponent_score', '<i4'), ('incomplete_score', '?'), ('required_score', '<i4'), ('score_differential', '<i4'), ('pct_points_won', '<f4'), ('winning_score', '<i4'), ('latitude', '<f4'), ('longitude', '<f4'), ('time_zone', '<U200'), ('datetime', '<M8[us]')])

In [32]:
#Function to predict games for every available match based on ranking system inputted
#Function returns dictionary of outcomes
#Match will refer to latest rankings provided relative to match date
#Imputation process is to use partner's ranking if available (will not evaluate if full team missing)
#If imputation is false, all players must be present to evaluate
#max_is_higher used to adjust if lower or higher is better in ranking
#better_player_weight allows you to tune the weight of the blended ranking of the players
#start_date allows you to specify a manual start date

def predict_games(df,new_dict,dt_holder,max_is_higher=True,impute=True,better_player_weight=.5,start_date=None):
    
    #If match before the first rankings or before inputted start date, match will not be evaluated
    #If match after 7 days of last ranking, match will not be evaluated
    if start_date==None or start_date<=min(list(new_dict.keys())):
        adj_df=df[df['datetime']>=min(list(new_dict.keys()))]
        adj_df=adj_df[adj_df['datetime']<(max(list(new_dict.keys()))+ timedelta(days=7))]
    else:
        new_start_date=max([x for x in dt_holder if x<=start_date])
        adj_df=df[df['datetime']>=new_start_date]
        adj_df=adj_df[adj_df['datetime']<(max(list(new_dict.keys()))+ timedelta(days=7))]
        
    #Iterate through every match and store results by appending one at a time    
    result_holder=[]
    for i in adj_df[::4]:
        
        #Find date of last ranking provided
        date_check_holder=max([x for x in dt_holder if x<i['datetime']])
        rankings=new_dict.get(date_check_holder)
        
        #Update teams and match result
        team_ids=[i['player_id'],i['teammate_id']]
        opponent_ids=[i['opponent1_id'],i['opponent2_id']]
        match_result=i['win']
        
        #Check if there is sufficient data for match evaluation depending on if imputation is allowed (available_data_check)
        #Example in cell below how any and all work with dictionaries
        if impute==True:
            available_data_check=(any(x in rankings for x in team_ids) and any(x in rankings for x in opponent_ids))
        else:
            available_data_check=(all(x in rankings for x in team_ids) and all(x in rankings for x in opponent_ids))

        #Only evaluate if data is available, else mark as skipped
        if available_data_check==True: 
            
            #Sort team ratings so that you can control weight of better player
            #Sort order changes based on whether ranking is ascending or descending
            if max_is_higher==True:
                team_ranking_holder=sorted([rankings.get(team_ids[0],rankings.get(team_ids[1])),rankings.get(team_ids[1],rankings.get(team_ids[0]))])
                opponent_ranking_holder=sorted([rankings.get(opponent_ids[0],rankings.get(opponent_ids[1])),rankings.get(opponent_ids[1],rankings.get(opponent_ids[0]))])
            else:
                team_ranking_holder=sorted([rankings.get(team_ids[0],rankings.get(team_ids[1])),rankings.get(team_ids[1],rankings.get(team_ids[0]))],reverse=True)
                opponent_ranking_holder=sorted([rankings.get(opponent_ids[0],rankings.get(opponent_ids[1])),rankings.get(opponent_ids[1],rankings.get(opponent_ids[0]))],reverse=True)
                                
            team_ranking=team_ranking_holder[0]*(1-better_player_weight)+team_ranking_holder[1]*better_player_weight
            opponent_ranking=opponent_ranking_holder[0]*(1-better_player_weight)+opponent_ranking_holder[1]*better_player_weight
            
            #Update result based on blended ranking of teams
            if team_ranking==opponent_ranking:
                result_holder.append('tie') #predicted tie
            elif max(team_ranking,opponent_ranking)==team_ranking and match_result==True and max_is_higher==True: #Adjust min/max depending on points vs positional ranking
                result_holder.append('correct') #correct prediction
            elif min(team_ranking,opponent_ranking)==team_ranking and match_result==True and max_is_higher==False: #Adjust min/max depending on points vs positional ranking
                result_holder.append('correct') #correct prediction
            else:
                result_holder.append('incorrect') #incorrect prediction
        
        else:
            result_holder.append('skipped-unranked team')
        
        # print(date_check_holder,team_ranking,opponent_ranking,match_result)

    results=Counter(result_holder)
    return results


In [9]:
#Example of how any and all work with dictionaries
#Searches for keys
#'Any' means at least 1 while 'all' means every element must match
test={0:'hi',1:'low',2:'yo'}
case1=[0,1]
case2=[0,3]
case3=[3,4]
print(
    any(x in test for x in case1),
    any(x in test for x in case2),
    any(x in test for x in case3),
    all(x in test for x in case1),
    all(x in test for x in case2),
    all(x in test for x in case3),sep='\n'
)



True
True
False
True
False
False


### Points Ranking Evaluation With Imputation (50/50 weight balance)

In [143]:
#AAU points rankings with imputation
results=predict_games(vball_all,aau_point_rankings,aau_dt_holder,max_is_higher=True,impute=True,better_player_weight=.5,start_date=None)
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))


Counter({'skipped-unranked team': 26882, 'incorrect': 17746, 'correct': 15314, 'tie': 224})
accuracy: 0.4632183908045977


In [144]:
#BVNE points rankings with imputation
results=predict_games(vball_all,bvne_point_rankings,bvne_dt_holder,max_is_higher=True,impute=True,better_player_weight=.5,start_date=None)
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

Counter({'skipped-unranked team': 40200, 'incorrect': 11215, 'correct': 8576, 'tie': 175})
accuracy: 0.43332828053155475


In [145]:
#p1440 points rankings with imputation
results=predict_games(vball_all,p1440_point_rankings,p1440_dt_holder,max_is_higher=True,impute=True,better_player_weight=.5,start_date=None)
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

Counter({'skipped-unranked team': 43118, 'incorrect': 9956, 'correct': 6838, 'tie': 254})
accuracy: 0.40716922710491843


### Positional Ranking Evaluation With Imputation (50/50 weight balance)

In [146]:
#AAU positional rankings with imputation
results=predict_games(vball_all,aau_positional_rankings,aau_dt_holder,max_is_higher=False,impute=True,better_player_weight=.5,start_date=None)
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 26882, 'incorrect': 17746, 'correct': 15314, 'tie': 224})
# accuracy: 0.4632183908045977

Counter({'skipped-unranked team': 26882, 'incorrect': 18128, 'correct': 14933, 'tie': 223})
accuracy: 0.4516802274583346


In [147]:
#BVNE positional rankings with imputation
results=predict_games(vball_all,bvne_positional_rankings,bvne_dt_holder,max_is_higher=False,impute=True,better_player_weight=.5,start_date=None)
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 40200, 'incorrect': 11215, 'correct': 8576, 'tie': 175})
# accuracy: 0.43332828053155475

Counter({'skipped-unranked team': 40200, 'incorrect': 11379, 'correct': 8425, 'tie': 162})
accuracy: 0.4254191072510604


In [148]:
#p1440 positional rankings with imputation
results=predict_games(vball_all,p1440_positional_rankings,p1440_dt_holder,max_is_higher=False,impute=True,better_player_weight=.5,start_date=None)
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 43118, 'incorrect': 9956, 'correct': 6838, 'tie': 254})
# accuracy: 0.40716922710491843

Counter({'skipped-unranked team': 43118, 'incorrect': 10026, 'correct': 6763, 'tie': 259})
accuracy: 0.40282327714575017


### Points Ranking Evaluation Without Imputation (50/50 weight balance)
Because points rankings were higher

In [22]:
#AAU points rankings without imputation
results=predict_games(vball_all,aau_point_rankings,aau_dt_holder,max_is_higher=True,impute=False,better_player_weight=.5,start_date=None)
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 26882, 'incorrect': 17746, 'correct': 15314, 'tie': 224})
# accuracy: 0.4632183908045977


Counter({'skipped-unranked team': 40739, 'incorrect': 10367, 'correct': 9027, 'tie': 33})
accuracy: 0.465453232958647


In [23]:
#BVNE points rankings without imputation
results=predict_games(vball_all,bvne_point_rankings,bvne_dt_holder,max_is_higher=True,impute=False,better_player_weight=.5,start_date=None)
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 40200, 'incorrect': 11215, 'correct': 8576, 'tie': 175})
# accuracy: 0.43332828053155475

Counter({'skipped-unranked team': 51529, 'incorrect': 4816, 'correct': 3804, 'tie': 17})
accuracy: 0.44129930394431555


In [24]:
#p1440 points rankings without imputation
results=predict_games(vball_all,p1440_point_rankings,p1440_dt_holder,max_is_higher=True,impute=False,better_player_weight=.5,start_date=None)
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 43118, 'incorrect': 9956, 'correct': 6838, 'tie': 254})
# accuracy: 0.40716922710491843

Counter({'skipped-unranked team': 52573, 'incorrect': 4570, 'correct': 2989, 'tie': 34})
accuracy: 0.3954226749570049


### Points Ranking Evaluation With Imputation (Adjusting the weight balance in favor of better player)
Because points rankings were higher and imputation had a minor drop off with significantly more predicitons

In [28]:
#AAU points rankings with imputation adjusting weight balance
aau_acc=[]
for i in [1-x/10 for x in range(5)]:
    results=predict_games(vball_all,aau_point_rankings,aau_dt_holder,max_is_higher=True,impute=True,better_player_weight=i,start_date=None)
    print('Better player weight:',i)
    print(results)
    print('accuracy:',results['correct']/(results['correct']+results['incorrect']))
    print('')
    aau_acc.append(results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 26882, 'incorrect': 17746, 'correct': 15314, 'tie': 224})
# accuracy: 0.4632183908045977


Better player weight: 1.0
Counter({'skipped-unranked team': 26882, 'incorrect': 17637, 'correct': 15360, 'tie': 287})
accuracy: 0.46549686335121376


Better player weight: 0.9
Counter({'skipped-unranked team': 26882, 'incorrect': 17674, 'correct': 15387, 'tie': 223})
accuracy: 0.46541241946704576


Better player weight: 0.8
Counter({'skipped-unranked team': 26882, 'incorrect': 17678, 'correct': 15382, 'tie': 224})
accuracy: 0.465275257108288


Better player weight: 0.7
Counter({'skipped-unranked team': 26882, 'incorrect': 17685, 'correct': 15379, 'tie': 220})
accuracy: 0.46512823614807647


Better player weight: 0.6
Counter({'skipped-unranked team': 26882, 'incorrect': 17708, 'correct': 15350, 'tie': 226})
accuracy: 0.4643354104906528




In [30]:
#BVNE points rankings with imputation adjusting weight balance
bvne_acc=[]
for i in [1-x/10 for x in range(5)]:
    results=predict_games(vball_all,bvne_point_rankings,bvne_dt_holder,max_is_higher=True,impute=True,better_player_weight=i,start_date=None)
    print('Better player weight:',i)
    print(results)
    print('accuracy:',results['correct']/(results['correct']+results['incorrect']))
    print('')
    bvne_acc.append(results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 40200, 'incorrect': 11215, 'correct': 8576, 'tie': 175})
# accuracy: 0.43332828053155475

Better player weight: 1.0
Counter({'skipped-unranked team': 40200, 'incorrect': 11169, 'correct': 8565, 'tie': 232})
accuracy: 0.43402249923989056

Better player weight: 0.9
Counter({'skipped-unranked team': 40200, 'incorrect': 11238, 'correct': 8571, 'tie': 157})
accuracy: 0.43268211419051944

Better player weight: 0.8
Counter({'skipped-unranked team': 40200, 'incorrect': 11219, 'correct': 8585, 'tie': 162})
accuracy: 0.43349828317511613

Better player weight: 0.7
Counter({'skipped-unranked team': 40200, 'incorrect': 11198, 'correct': 8607, 'tie': 161})
accuracy: 0.4345872254481192

Better player weight: 0.6
Counter({'skipped-unranked team': 40200, 'incorrect': 11211, 'correct': 8593, 'tie': 162})
accuracy: 0.43390224197131894



In [33]:
#p1440 points rankings with imputation adjusting weight balance
p1440_acc=[]
for i in [1-x/10 for x in range(5)]:
    results=predict_games(vball_all,p1440_point_rankings,p1440_dt_holder,max_is_higher=True,impute=True,better_player_weight=i,start_date=None)
    print('Better player weight:',i)
    print(results)
    print('accuracy:',results['correct']/(results['correct']+results['incorrect']))
    print('')
    p1440_acc.append(results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 43118, 'incorrect': 9956, 'correct': 6838, 'tie': 254})
# accuracy: 0.40716922710491843

Better player weight: 1.0
Counter({'skipped-unranked team': 43118, 'incorrect': 9862, 'correct': 6842, 'tie': 344})
accuracy: 0.4096024904214559

Better player weight: 0.9
Counter({'skipped-unranked team': 43118, 'incorrect': 9929, 'correct': 6871, 'tie': 248})
accuracy: 0.40898809523809526

Better player weight: 0.8
Counter({'skipped-unranked team': 43118, 'incorrect': 9931, 'correct': 6866, 'tie': 251})
accuracy: 0.40876346966720245

Better player weight: 0.7
Counter({'skipped-unranked team': 43118, 'incorrect': 9945, 'correct': 6854, 'tie': 249})
accuracy: 0.40800047621882257

Better player weight: 0.6
Counter({'skipped-unranked team': 43118, 'incorrect': 9930, 'correct': 6868, 'tie': 250})
accuracy: 0.4088581974044529



### Points Ranking Evaluation With Imputation (50/50 weight balance) (Using test data only)

In [34]:
#AAU points rankings with imputation
results=predict_games(vball_all,aau_point_rankings,aau_dt_holder,max_is_higher=True,impute=True,better_player_weight=.5,start_date=dt(2022,2,28))
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 26882, 'incorrect': 17746, 'correct': 15314, 'tie': 224})
# accuracy: 0.4632183908045977


Counter({'skipped-unranked team': 5655, 'incorrect': 2009, 'correct': 1629, 'tie': 25})
accuracy: 0.4477735019241341


In [37]:
#BVNE points rankings with imputation
results=predict_games(vball_all,bvne_point_rankings,bvne_dt_holder,max_is_higher=True,impute=True,better_player_weight=.5,start_date=dt(2022,2,28))
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 40200, 'incorrect': 11215, 'correct': 8576, 'tie': 175})
# accuracy: 0.43332828053155475

Counter({'skipped-unranked team': 5878, 'incorrect': 2049, 'correct': 1362, 'tie': 29})
accuracy: 0.3992963940193492


In [38]:
#p1440 points rankings with imputation
results=predict_games(vball_all,p1440_point_rankings,p1440_dt_holder,max_is_higher=True,impute=True,better_player_weight=.5,start_date=dt(2022,2,28))
print(results)
print('accuracy:',results['correct']/(results['correct']+results['incorrect']))

# Previous points with imputation
# Counter({'skipped-unranked team': 43118, 'incorrect': 9956, 'correct': 6838, 'tie': 254})
# accuracy: 0.40716922710491843

Counter({'skipped-unranked team': 6780, 'incorrect': 1544, 'correct': 960, 'tie': 34})
accuracy: 0.38338658146964855
