In [1]:
import json
import os
import numpy as np
from collections import Counter
from datetime import datetime as dt
from datetime import timedelta
import copy
import pickle
import bz2
import pandas as pd
from scipy.stats import rankdata  
import pickle
import bz2


In [2]:
#Load raw rankings (in JSON format)
aau_path = os.fsencode("Ranking_AAU.txt")
bvne_path = os.fsencode("Ranking_BVNE.txt")
p1440_path = os.fsencode("Ranking_p1440.txt")

file = open(aau_path)
aau_raw_rankings=json.load(file)
file.close()

file = open(bvne_path)
bvne_raw_rankings=json.load(file)
file.close()

file = open(p1440_path)
p1440_raw_rankings=json.load(file)
file.close()

## Prepare Vball Dataset with Match Outcomes

In [3]:
with open('/Users/mmaze/Desktop/vball_game_data.pbz2', 'rb') as file:
    uncompressed = bz2.BZ2File(file)
    vball = pickle.load(uncompressed)

#Sort by date (Final Array)
vball.sort(order='datetime')

#Equivalent output in a structured array
vball['tournament_name'][1]
vball[1]['tournament_name']

#Calling multiple fields
vball[['tournament_name','type']][1]

("25th Annual AAU Girls' Junior National Beach Volleyball Championships", 'National')

In [4]:
with open('C:/Users/mmaze/Desktop/Capstone/Pickle_Files/new_vball_game_data.pbz2', 'rb') as file:
    uncompressed = bz2.BZ2File(file)
    vball_new = pickle.load(uncompressed)

#Sort by date (Final Array)
vball_new.sort(order='datetime')
print(len(vball_new))

import datetime as dt

from_date = dt.datetime(2022, 3, 1)
vball_new = [i for i in vball_new if i[-1] > from_date]
print(len(vball_new))

print("new data dates range from", vball_new[0][-1], "to", vball_new[-1][-1])

from datetime import datetime as dt

38668
37272
new data dates range from 2022-03-05T09:00:00.000000 to 2022-06-20T00:30:00.000000


In [5]:
vball_all = np.concatenate((vball, np.array(vball_new)), axis=0)

### Quick check that the 3 point systems have the same week indices

In [6]:
for idx, week in enumerate(p1440_raw_rankings):
    if week.get('AsOf') > "2019-08-19T00:00:00":
        pass
        #print(idx)

print(aau_raw_rankings[33].get("AsOf"))
print(bvne_raw_rankings[33].get("AsOf"))
print(p1440_raw_rankings[33].get("AsOf"))

print(aau_raw_rankings[181].get("AsOf"))
print(bvne_raw_rankings[181].get("AsOf"))
print(p1440_raw_rankings[181].get("AsOf"))

2019-08-26T00:00:00
2019-08-26T00:00:00
2019-08-26T00:00:00
2022-06-27T00:00:00
2022-06-27T00:00:00
2022-06-27T00:00:00


## point_system_eval
Calculates accuracy of a given cumulative ranking system.  
Like the Elo system, opponent points are averaged  
However, the player is only considered and not her teammate

In [7]:
def point_system_eval_elo(player_points):
    match_counter = 0
    correct_counter = 0
    
    # loop through dates 2022-02-28 to 2022-06-27 which are between ranges (164,182)
    for idx in np.arange(164,181):
        initial_dt = dt.strptime(player_points[idx].get('AsOf'),'%Y-%m-%dT%H:%M:%S')
        end_dt = dt.strptime(player_points[idx+1].get('AsOf'),'%Y-%m-%dT%H:%M:%S')

        # initialize ratings databse. Note, this database is cleared at the beginning of each week
        db = dict()
        for week in player_points:
            dt_week = dt.strptime(week.get('AsOf'),'%Y-%m-%dT%H:%M:%S')
            if dt_week == initial_dt:
                for player in week.get("Players"):
                    db.update({player.get('PlayerProfileId'): player.get('Points')})

        for match in vball_all:
            if (match[-1] >= initial_dt) & (match[-1] < end_dt):
                # only consider matches were all players appeared in the db
                if (match["player_id"] in db.keys()) & (match["teammate_id"] in db.keys()) & (match["opponent1_id"] in db.keys()) & (match["opponent2_id"] in db.keys()):
                    # pull in p1140 rankings:
                    player_rtg = db.get(match["player_id"])
                    avg_oppon = .5 * (db.get(match["opponent1_id"]) + db.get(match["opponent2_id"]))

                    # skip over situations where both players are unranked or have same elo
                    if player_rtg == avg_oppon:
                        continue
                    
                    # p1140 predictions
                    if player_rtg > avg_oppon:
                        pred_win = True
                    else:
                        pred_win = False

                    match_counter += 1
                    if pred_win == match["win"]:
                        correct_counter += 1
    return([correct_counter, match_counter])

In [8]:
aau_eval = point_system_eval_elo(aau_raw_rankings)
print(aau_eval)
print(aau_eval[0]/aau_eval[1])

[4444, 7098]
0.6260918568610876


In [9]:
bvne_eval = point_system_eval_elo(bvne_raw_rankings)
print(bvne_eval)
print(bvne_eval[0]/bvne_eval[1])

[2908, 4883]
0.5955355314355929


In [10]:
p1440_eval = point_system_eval_elo(p1440_raw_rankings)
print(p1440_eval)
print(p1440_eval[0]/p1440_eval[1])

[2859, 4903]
0.5831123801754028


## point_system_eval_avg
Same as above function, except it averages the points on the player team  
Instead of just averaging opponents'

In [11]:
def point_system_eval_avg(player_points):
    match_counter = 0
    correct_counter = 0
    
    # loop through dates 2022-02-28 to 2022-06-27
    for idx in np.arange(164,181):
        initial_dt = dt.strptime(player_points[idx].get('AsOf'),'%Y-%m-%dT%H:%M:%S')
        end_dt = dt.strptime(player_points[idx+1].get('AsOf'),'%Y-%m-%dT%H:%M:%S')

        # initialize ratings databse. Note, this database is cleared at the beginning of each week
        db = dict()
        for week in player_points:
            dt_week = dt.strptime(week.get('AsOf'),'%Y-%m-%dT%H:%M:%S')
            if dt_week == initial_dt:
                for player in week.get("Players"):
                    db.update({player.get('PlayerProfileId'): player.get('Points')})

        for match in vball_all:
            if (match[-1] >= initial_dt) & (match[-1] < end_dt):
                # only consider matches were all players appeared in the db
                if (match["player_id"] in db.keys()) & (match["teammate_id"] in db.keys()) & (match["opponent1_id"] in db.keys()) & (match["opponent2_id"] in db.keys()):
                    # pull in p1140 rankings:
                    avg_player = .5 * (db.get(match["player_id"]) + db.get(match["teammate_id"]))
                    avg_oppon = .5 * (db.get(match["opponent1_id"]) + db.get(match["opponent2_id"]))

                    # skip over situations where both players are unranked or have same elo
                    if avg_player == avg_oppon:
                        continue
                    
                    # p1140 predictions
                    if avg_player > avg_oppon:
                        pred_win = True
                    else:
                        pred_win = False

                    match_counter += 1
                    if pred_win == match["win"]:
                        correct_counter += 1
    return([correct_counter, match_counter])

In [12]:
aau_eval = point_system_eval_avg(aau_raw_rankings)
print(aau_eval)
print(aau_eval[0]/aau_eval[1])

[4492, 7108]
0.6319639842431064


In [13]:
bvne_eval = point_system_eval_avg(bvne_raw_rankings)
print(bvne_eval)
print(bvne_eval[0]/bvne_eval[1])

[2932, 4880]
0.6008196721311475


In [14]:
p1440_eval = point_system_eval_avg(p1440_raw_rankings)
print(p1440_eval)
print(p1440_eval[0]/p1440_eval[1])

[2968, 4916]
0.6037428803905615


## point_system_eval_impute
Same as point_system_eval_avg function, except it imputes missing players with 0

In [22]:
def point_system_eval_impute(player_points):
    match_counter = 0
    correct_counter = 0
    
    # loop through dates 2022-02-28 to 2022-06-27
    for idx in np.arange(164,181):
        initial_dt = dt.strptime(player_points[idx].get('AsOf'),'%Y-%m-%dT%H:%M:%S')
        end_dt = dt.strptime(player_points[idx+1].get('AsOf'),'%Y-%m-%dT%H:%M:%S')

        # initialize ratings databse. Note, this database is cleared at the beginning of each week
        db = dict()
        for week in player_points:
            dt_week = dt.strptime(week.get('AsOf'),'%Y-%m-%dT%H:%M:%S')
            if dt_week == initial_dt:
                for player in week.get("Players"):
                    db.update({player.get('PlayerProfileId'): player.get('Points')})

        for match in vball_all:
            if (match[-1] >= initial_dt) & (match[-1] < end_dt):
                
                # add players to database if they don't already exist:
                for person in ["player_id", "teammate_id", "opponent1_id", "opponent2_id"]:
                    if match[person] not in db.keys():
                        db.update({match[person]: 0})

                # pull in p1140 rankings:
                avg_player = .5 * (db.get(match["player_id"]) + db.get(match["teammate_id"]))
                avg_oppon = .5 * (db.get(match["opponent1_id"]) + db.get(match["opponent2_id"]))

                # skip over situations where both players are unranked or have same elo
                if avg_player == avg_oppon:
                    continue
                
                # p1140 predictions
                if avg_player > avg_oppon:
                    pred_win = True
                else:
                    pred_win = False

                match_counter += 1
                if pred_win == match["win"]:
                    correct_counter += 1
                    
    return([correct_counter, match_counter])

In [23]:
aau_eval = point_system_eval_impute(aau_raw_rankings)
print(aau_eval)
print(aau_eval[0]/aau_eval[1])

[17404, 25668]
0.6780426990805672


In [24]:
bvne_eval = point_system_eval_impute(bvne_raw_rankings)
print(bvne_eval)
print(bvne_eval[0]/bvne_eval[1])

[18028, 27968]
0.6445938215102975


In [25]:
p1440_eval = point_system_eval_impute(p1440_raw_rankings)
print(p1440_eval)
print(p1440_eval[0]/p1440_eval[1])

[13436, 20740]
0.6478302796528448
