# Rankability
## March Madness Dataset

In [79]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [381]:
import copy
import os
import pandas as pd
import numpy as np
import networkx as nx
from scipy.stats import pearsonr
from scipy.stats import skew
from tqdm import tqdm
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
from IPython.display import display, Markdown, Latex

import itertools
import joblib

In [81]:
from pathlib import Path
home = str(Path.home())
home

'/disk/home/amy'

In [82]:
import sys
sys.path.insert(0,"%s/rankability_toolbox_dev"%home)
import pyrankability

In [83]:
sys.path.insert(0,"%s/sensitivity_study/src"%home)
from sensitivity_tests import *
from utilities import *
from base import *

In [84]:
games={}
remaining_games={}
madness_teams={}
all_teams={}
years = ["2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018"]
for year in years:
    games[year],remaining_games[year] = read_data('../data/%steams.txt'%year,'../data/%sgames.txt'%year,'../data/%sMadnessTeams.txt'%year)
    madness_teams[year] = list(np.unique(list(games[year].team1_name.loc[games[year].team1_madness == 1]) + list(games[year].team2_name.loc[games[year].team2_madness == 1])))
    all_teams[year] = list(np.unique(list(games[year].team1_name) + list(games[year].team2_name)))
print(year)
games[year]

2018


Unnamed: 0,team2,team1,notsure1,date,H_A_N1,points1,H_A_N2,points2,team1_name,team2_name,team1_madness,team2_madness
0,1,16,737011,2017-11-13,1,83,-1,69,Arkansas_St,Abilene_Chr,0,0
1,1,41,737114,2018-02-24,-1,74,1,72,Cent_Arkansas,Abilene_Chr,0,0
3,1,143,737018,2017-11-20,-1,75,1,67,Lipscomb,Abilene_Chr,1,0
4,1,143,737045,2017-12-17,1,67,-1,65,Lipscomb,Abilene_Chr,1,0
5,1,199,737056,2017-12-28,1,77,-1,74,New_Orleans,Abilene_Chr,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
5535,351,318,737048,2017-12-20,1,91,-1,74,Utah_St,Youngstown_St,0,0
5536,351,339,737086,2018-01-27,1,85,-1,67,WI_Green_Bay,Youngstown_St,0,0
5537,351,340,737084,2018-01-25,1,66,-1,55,WI_Milwaukee,Youngstown_St,0,0
5538,351,347,737074,2018-01-15,1,77,-1,67,Wright_St,Youngstown_St,1,0


In [85]:
remaining_games[year]

Unnamed: 0,team2,team1,notsure1,date,H_A_N1,points1,H_A_N2,points2,team1_name,team2_name,team1_madness,team2_madness
2,1,73,737130,2018-03-12,1,80,-1,73,Drake,Abilene_Chr,0,0
32,2,313,737125,2018-03-07,1,97,-1,90,UNLV,Air_Force,0,0
42,3,79,737126,2018-03-08,0,67,0,58,E_Michigan,Akron,0,0
59,4,137,737128,2018-03-10,0,86,0,63,Kentucky,Alabama,1,1
68,4,326,737135,2018-03-17,0,81,0,58,Villanova,Alabama,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
5478,347,287,737133,2018-03-15,0,73,0,47,Tennessee,Wright_St,1,1
5491,348,197,737126,2018-03-08,0,85,0,75,New_Mexico,Wyoming,0,0
5496,349,92,737136,2018-03-18,0,75,0,70,Florida_St,Xavier,1,1
5498,349,234,737127,2018-03-09,0,75,0,72,Providence,Xavier,1,1


In [197]:
# Parameters to search
direct_thress = [0,1,2]
spread_thress = [0,3,6]
weight_indirects = [0.25,0.5]
domains_ranges = [('all','madness'),('madness','madness')]

years_train = ['2002','2003','2004','2005','2006']
years_test = copy.copy(years)
for year in years_train:
    years_test.remove(year)
# fracs represent how much of the data to include
frac = 1.

In [184]:
def compute(game_df,team_range,frac,direct_thres,spread_thres,weight_indirect):
    columns = ["frac","direct_thres","spread_thres","weight_indirect"]+team_range
    hillside_columns = ["frac","direct_thres","spread_thres","weight_indirect","details"]

    upper = int(len(game_df)*frac)
    game_df_sample = game_df.iloc[:upper,:]

    map_func = lambda linked: pyrankability.construct.colley_matrices(linked,direct_thres=direct_thres,spread_thres=spread_thres,weight_indirect=weight_indirect)
    colley_matrix,colley_b = pyrankability.construct.map_vectorized(game_df_sample,map_func)
    colley_matrix = colley_matrix.reindex(index=team_range,columns=team_range)
    colley_b = colley_b.reindex(team_range)
    mask = colley_b.isna()
    colley_b = colley_b.loc[~mask]
    colley_matrix = colley_matrix.loc[~mask,~mask]
    inxs = list(np.where(mask)[0])
    ranking1,r1 = pyrankability.construct.ranking_from_matrices(colley_matrix.fillna(0),colley_b,inxs)

    map_func = lambda linked: pyrankability.construct.massey_matrices(linked,direct_thres=direct_thres,spread_thres=spread_thres,weight_indirect=weight_indirect)
    massey_matrix,massey_b = pyrankability.construct.map_vectorized(game_df_sample,map_func)
    massey_matrix = massey_matrix.reindex(index=team_range,columns=team_range)
    massey_b = massey_b.reindex(team_range)
    mask = massey_b.isna()
    massey_b = massey_b.loc[~mask]
    massey_matrix = massey_matrix.loc[~mask,~mask]    
    inxs = list(np.where(mask)[0])    
    ranking2,r2 = pyrankability.construct.ranking_from_matrices(massey_matrix.fillna(0),massey_b,inxs)
    ranking_values1 = [frac,direct_thres,spread_thres,weight_indirect]+list(ranking1)
    ranking_values2 = [frac,direct_thres,spread_thres,weight_indirect]+list(ranking2)
    r_values1 = [frac,direct_thres,spread_thres,weight_indirect]+list(r1)
    r_values2 = [frac,direct_thres,spread_thres,weight_indirect]+list(r2)

    map_func = lambda linked: pyrankability.construct.support_map_vectorized_direct_indirect_weighted(linked,direct_thres=direct_thres,spread_thres=spread_thres,weight_indirect=weight_indirect)
    D = pyrankability.construct.V_count_vectorized(game_df_sample,map_func).reindex(index=team_range,columns=team_range)
    k,details = pyrankability.rank.solve(D,method='hillside',lazy=False,cont=True)
    x = pd.DataFrame(details['x'],columns=D.columns,index=D.index)
    c = pd.DataFrame(pyrankability.construct.C_count(D),columns=D.columns,index=D.index)
    P = details['P']
    simple_details = {'k':k,'x':x,'c':c,'P':P,'D':D}
    hillside_ret = pd.Series([frac,direct_thres,spread_thres,weight_indirect,simple_details],index=hillside_columns)
    return pd.Series(ranking_values1,index=columns),pd.Series(ranking_values2,index=columns),pd.Series(r_values1,index=columns),pd.Series(r_values2,index=columns),hillside_ret


In [185]:
def get_game_df(domain_range,year_games):
    # set the team_domain
    team_domain = None
    if domain_range[0] == 'madness':
        team_domain = madness_teams[year]
    elif domain_range[0] == 'all':
        team_domain = all_teams[year]

    game_df = pd.DataFrame({"team1_name":year_games['team1_name'],
                            "team1_score":year_games['points1'],
                            "team1_H_A_N": year_games['H_A_N1'],
                            "team2_name":year_games['team2_name'],
                            "team2_score":year_games['points2'],
                            "team2_H_A_N": year_games['H_A_N1'],
                            "date": year_games['date']
                           }).sort_values(by='date').drop('date',axis=1)
    mask = game_df.team1_name.isin(team_domain) & game_df.team2_name.isin(team_domain)
    game_df = game_df.loc[mask]
    
    return game_df

In [195]:
def calc_predictability(games,r):
    numberCorrectPredictions = 0
    numGames = 0
    for i in games.index:
        team1ID = games.loc[i, "team1_name"]
        team1Score = games.loc[i, "points1"]
        team2ID = games.loc[i, "team2_name"]
        team2Score = games.loc[i, "points2"]
        
        if team1ID in r.index and team2ID in r.index:
            if team1Score > team2Score and r.loc[team1ID] > r.loc[team2ID]:
                numberCorrectPredictions += 1
            elif team2Score > team1Score and r.loc[team2ID] > r.loc[team1ID]:
                numberCorrectPredictions += 1
            elif team1Score == team2Score and r.loc[team1ID] == r.loc[team2ID]:
                numberCorrectPredictions += 1

            numGames += 1
           
    predictability = numberCorrectPredictions/numGames*100
    return predictability,numGames

In [204]:
results = pd.DataFrame(columns=["Year","Score","Method","Parameters"])
columns = results.columns
results.set_index('Parameters',inplace=True)
outer_keys = list(itertools.product(direct_thress,spread_thress,weight_indirects,domains_ranges))

def calc_score(direct_thres,spread_thres,weight_indirect,domain_range):
    parameter_string = f"{domain_range},frac={frac},dt={direct_thres},st={spread_thres},iw={weight_indirect}"
    predictability = pd.DataFrame(columns=columns)
    c=0
    for year in years_train:
        # set the team_range
        team_range = None
        if domain_range[1] == 'madness':
            team_range = madness_teams[year]
        elif domain_range[1] == 'all':
            team_range = all_teams[year]
        elif "top" in domain_range:
            team_range = all_teams[year]
        
        colley_rankings, colley_rs, massey_rankings, massey_rs, hillside_details = compute(get_game_df(domain_range,games[year]),team_range,frac,direct_thres,spread_thres,weight_indirect)
        
        score,numGames = calc_predictability(remaining_games[year],massey_rs)
        predictability=predictability.append(pd.Series([year,score,'Massey',parameter_string],index=predictability.columns,name=c)); c+=1
        score,numGames = calc_predictability(remaining_games[year],colley_rs)    
        predictability=predictability.append(pd.Series([year,score,'Colley',parameter_string],index=predictability.columns,name=c)); c+=1
        # hillside
        #perm = pd.Series(hillside_details['details']['P'][0],index=hillside_details['details']['D'].columns)
        #r = -perm.argsort()
        #score,numGames = calc_predictability(remaining_games[year],r)    
        #predictability=predictability.append(pd.Series([year,score,'Hillside',parameter_string],index=predictability.columns,name=c)); c+=1
    return predictability

job_results = Parallel(n_jobs=-1)(delayed(calc_score)(direct_thres,spread_thres,weight_indirect,domain_range) for direct_thres,spread_thres,weight_indirect,domain_range in outer_keys)

In [205]:
for predictability in job_results:
    predictability = predictability.set_index('Parameters')
    results = results.append(predictability)

In [232]:
# Average over all years
avg_results = results.reset_index().groupby(['Method','Parameters']).apply(lambda df: df['Score'].mean()).to_frame()
avg_results.columns=['Avg Score']
best_parameters = avg_results.reset_index().set_index('Parameters').groupby('Method').idxmax()
best_parameters.columns=['Parameters']
best_parameters

Unnamed: 0_level_0,Parameters
Method,Unnamed: 1_level_1
Colley,"('all', 'madness'),frac=1.0,dt=0,st=0,iw=0.25"
Massey,"('all', 'madness'),frac=1.0,dt=2,st=0,iw=0.25"


## Here are the scores

In [233]:
avg_results.reset_index().set_index('Parameters').groupby('Method').max()

Unnamed: 0_level_0,Avg Score
Method,Unnamed: 1_level_1
Colley,52.318668
Massey,64.565562


## Now run through the training data

In [253]:
def parameter_string_to_parameters(parameter_string):
    exec("domain_range="+parameter_string.split(')')[0]+")")
    exec(parameter_string.split("),")[1].replace(",",";"))
    return domain_range,frac,dt,st,iw

In [254]:
parameter_string_to_parameters(best_parameters.loc['Colley']['Parameters'])

(('all', 'madness'), 1.0, 0, 0, 0.25)

In [271]:
def compute_method(game_df,year,method,best_parameters):
    parameter_string = best_parameters.loc[method]['Parameters']
    domain_range,frac,direct_thres,spread_thres,weight_indirect = parameter_string_to_parameters(parameter_string)
    # set the team_range
    team_range = None
    if domain_range[1] == 'madness':
        team_range = madness_teams[year]
    elif domain_range[1] == 'all':
        team_range = all_teams[year]
    elif "top" in domain_range:
        team_range = all_teams[year]    
    
    columns = ["frac","direct_thres","spread_thres","weight_indirect"]+team_range
    hillside_columns = ["frac","direct_thres","spread_thres","weight_indirect","details"]

    upper = int(len(game_df)*frac)
    game_df_sample = game_df.iloc[:upper,:]

    if method == 'Colley':
        map_func = lambda linked: pyrankability.construct.colley_matrices(linked,direct_thres=direct_thres,spread_thres=spread_thres,weight_indirect=weight_indirect)
        colley_matrix,colley_b = pyrankability.construct.map_vectorized(game_df_sample,map_func)
        colley_matrix = colley_matrix.reindex(index=team_range,columns=team_range)
        colley_b = colley_b.reindex(team_range)
        mask = colley_b.isna()
        colley_b = colley_b.loc[~mask]
        colley_matrix = colley_matrix.loc[~mask,~mask]
        inxs = list(np.where(mask)[0])
        ranking,r = pyrankability.construct.ranking_from_matrices(colley_matrix.fillna(0),colley_b,inxs)

    if method == 'Massey':
        map_func = lambda linked: pyrankability.construct.massey_matrices(linked,direct_thres=direct_thres,spread_thres=spread_thres,weight_indirect=weight_indirect)
        massey_matrix,massey_b = pyrankability.construct.map_vectorized(game_df_sample,map_func)
        massey_matrix = massey_matrix.reindex(index=team_range,columns=team_range)
        massey_b = massey_b.reindex(team_range)
        mask = massey_b.isna()
        massey_b = massey_b.loc[~mask]
        massey_matrix = massey_matrix.loc[~mask,~mask]    
        inxs = list(np.where(mask)[0])    
        ranking,r = pyrankability.construct.ranking_from_matrices(massey_matrix.fillna(0),massey_b,inxs)
    
    ranking_values = [frac,direct_thres,spread_thres,weight_indirect]+list(ranking)
    r_values = [frac,direct_thres,spread_thres,weight_indirect]+list(r)

    map_func = lambda linked: pyrankability.construct.support_map_vectorized_direct_indirect_weighted(linked,direct_thres=direct_thres,spread_thres=spread_thres,weight_indirect=weight_indirect)
    D = pyrankability.construct.V_count_vectorized(game_df_sample,map_func).reindex(index=team_range,columns=team_range)
    k,details = pyrankability.rank.solve(D,method='hillside',lazy=False,cont=True)
    x = pd.DataFrame(details['x'],columns=D.columns,index=D.index)
    c = pd.DataFrame(pyrankability.construct.C_count(D),columns=D.columns,index=D.index)
    P = details['P']
    simple_details = {'k':k,'x':x,'c':c,'P':P,'D':D}
    hillside_ret = pd.Series([frac,direct_thres,spread_thres,weight_indirect,simple_details],index=hillside_columns)
    return pd.Series(ranking_values,index=columns),pd.Series(r_values,index=columns),hillside_ret


In [272]:
colley_rankings = {}
colley_rs = {}
massey_rankings = {}
massey_rs = {}
massey_hillside_details = {}
colley_hillside_details = {}
for year in tqdm(years):
    colley_rankings[year], colley_rs[year], colley_hillside_details[year] = compute_method(get_game_df(domain_range,games[year]),year,'Colley',best_parameters)
    massey_rankings[year], massey_rs[year], massey_hillside_details[year] = compute_method(get_game_df(domain_range,games[year]),year,'Massey',best_parameters)

















A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  game_df["game"] = list(game_df.index)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  game_df["game"] = list(game_df.index)
















  6%|▌         | 1/17 [00:14<03:48, 14.25s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 12%|█▏        | 2/17 [00:27<03:30, 14.03s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 18%|█▊        | 3/17 [00:41<03:16, 14.03s/it][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 24%|██▎       | 4/17 [00:56<03:

## Calculate the predictability

In [273]:
predictability = pd.DataFrame(columns=["Year","Score","Method","Parameters"])
c = 0
for year in tqdm(years):
    score,numGames = calc_predictability(remaining_games[year],massey_rs[year])
    predictability=predictability.append(pd.Series([year,score,'Massey',parameter_string],index=predictability.columns,name=c)); c+=1
    score,numGames = calc_predictability(remaining_games[year],colley_rs[year])    
    predictability=predictability.append(pd.Series([year,score,'Colley',parameter_string],index=predictability.columns,name=c)); c+=1
    # massey hillside
    perm = pd.Series(massey_hillside_details[year]['details']['P'][0],index=massey_hillside_details[year]['details']['D'].columns)
    r = -perm.argsort()
    score,numGames = calc_predictability(remaining_games[year],r)    
    predictability=predictability.append(pd.Series([year,score,'Hillside(Massey Params)',parameter_string],index=predictability.columns,name=c)); c+=1
    # massey hillside
    perm = pd.Series(colley_hillside_details[year]['details']['P'][0],index=colley_hillside_details[year]['details']['D'].columns)
    r = -perm.argsort()
    score,numGames = calc_predictability(remaining_games[year],r)    
    predictability=predictability.append(pd.Series([year,score,'Hillside(Colley Params)',parameter_string],index=predictability.columns,name=c)); c+=1
    

















  0%|          | 0/17 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 12%|█▏        | 2/17 [00:00<00:01, 10.92it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 24%|██▎       | 4/17 [00:00<00:01, 10.71it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 35%|███▌      | 6/17 [00:00<00:01, 10.77it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 47%|████▋     | 8/17 [00:00<00:00, 10.61it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 53%|█████▎    | 9/17 [00:00<00:00, 10.42it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 59%|█████▉    | 10/17 [00:00<00:00, 10.11it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 65%|██████▍   | 11/17 [00:01<00:00,  9.88it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















 71%|███████   | 12/17 [00:01<00:00,  9.70it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A
















In [274]:
predictability

Unnamed: 0,Year,Score,Method,Parameters
0,2002,60.674157,Massey,"('all', 'madness'),frac=1.0,dt=2,st=3,iw=0.5"
1,2002,58.426966,Colley,"('all', 'madness'),frac=1.0,dt=2,st=3,iw=0.5"
2,2002,57.303371,Hillside(Massey Params),"('all', 'madness'),frac=1.0,dt=2,st=3,iw=0.5"
3,2002,57.303371,Hillside(Colley Params),"('all', 'madness'),frac=1.0,dt=2,st=3,iw=0.5"
4,2003,63.095238,Massey,"('all', 'madness'),frac=1.0,dt=2,st=3,iw=0.5"
...,...,...,...,...
63,2017,61.290323,Hillside(Colley Params),"('all', 'madness'),frac=1.0,dt=2,st=3,iw=0.5"
64,2018,64.044944,Massey,"('all', 'madness'),frac=1.0,dt=2,st=3,iw=0.5"
65,2018,60.674157,Colley,"('all', 'madness'),frac=1.0,dt=2,st=3,iw=0.5"
66,2018,67.415730,Hillside(Massey Params),"('all', 'madness'),frac=1.0,dt=2,st=3,iw=0.5"


In [276]:
import altair as alt
alt.Chart(predictability).mark_line().encode(
    x='Year',
    y='Score',
    color='Method'
)

In [278]:
# TODO: you would have to pick a hillside here
hillside_details = colley_hillside_details
D_fig = None
for year in years:
    perm = np.array(hillside_details[year]['details']['P'][0])
    c_reordered = hillside_details[year]['details']['c'].iloc[perm,:].iloc[:,perm]
    x_reordered = hillside_details[year]['details']['x'].iloc[perm,:].iloc[:,perm]
    D_reordered = hillside_details[year]['details']['D'].iloc[perm,:].iloc[:,perm]
    
    fig_year = pyrankability.plot.show_hillside(hillside_details[year]['details']['D'],perm)
    if D_fig is None:
        D_fig = fig_year
    else:
        D_fig &= fig_year
hillside_D_fig = D_fig

In [280]:
hillside_details = colley_hillside_details
C_fig = None
for year in years:
    perm = np.array(hillside_details[year]['details']['P'][0])
    
    fig_year = pyrankability.plot.show_hillside(hillside_details[year]['details']['c'],perm)
    if C_fig is None:
        C_fig = fig_year
    else:
        C_fig &= fig_year
hillside_C_fig = C_fig

In [394]:
top_ns = list(range(2,30,1))
graph_df = pd.DataFrame(columns=["Method","Year","k","k_orig","sum_c","k_worst","top_n"])
name=0
for year in years:
    hillside_details = colley_hillside_details
    method = 'Colley'
    def add_results(graph_df,method,hillside_details,name):
        perm = np.array(hillside_details[year]['details']['P'][0])
        c = hillside_details[year]['details']['c']
        c_reordered = c.iloc[perm,:].iloc[:,perm]
        x_reordered = hillside_details[year]['details']['x'].iloc[perm,:].iloc[:,perm]
        D_reordered = hillside_details[year]['details']['D'].iloc[perm,:].iloc[:,perm]
        k_worst = np.sum(np.triu(c.iloc[perm[::-1],:].iloc[:,perm[::-1]]))
        k_orig = np.sum(np.triu(c.iloc[perm,:].iloc[:,perm]))
        for top_n in top_ns:
            k = np.sum(np.triu(c.iloc[perm[:top_n],:].iloc[:,perm[:top_n]]))
            sum_c = np.sum(np.sum(c.iloc[perm[:top_n],:].iloc[:,perm[:top_n]]))
            #c = c_reordered.iloc[:top_n,:].iloc[:,:top_n]
            #x = np.round(pyrankability.common.threshold_x(x_reordered.iloc[:top_n,:].iloc[:,:top_n].values))
            #k2 = np.sum(np.sum(x*c))
            graph_df = graph_df.append(pd.Series([method,year,k,k_orig,sum_c,k_worst,top_n],name=name,index=graph_df.columns)); name+=1
        return graph_df,name
    
    graph_df,name = add_results(graph_df,'Hillside(Colley Params)',colley_hillside_details,name)
    graph_df,name = add_results(graph_df,'Hillside(Massey Params)',massey_hillside_details,name)

In [395]:
graph_df

Unnamed: 0,Method,Year,k,k_orig,sum_c,k_worst,top_n
0,Hillside(Colley Params),2002,14.0,26752.0,44.0,47643.0,2
1,Hillside(Colley Params),2002,58.0,26752.0,137.0,47643.0,3
2,Hillside(Colley Params),2002,106.0,26752.0,248.0,47643.0,4
3,Hillside(Colley Params),2002,176.0,26752.0,415.0,47643.0,5
4,Hillside(Colley Params),2002,271.0,26752.0,623.0,47643.0,6
...,...,...,...,...,...,...,...
947,Hillside(Massey Params),2018,6368.0,30196.0,14454.0,52517.0,25
948,Hillside(Massey Params),2018,6996.0,30196.0,15864.0,52517.0,26
949,Hillside(Massey Params),2018,7556.0,30196.0,17122.0,52517.0,27
950,Hillside(Massey Params),2018,8165.0,30196.0,18646.0,52517.0,28


In [423]:
graph_df['k/k_orig'] = graph_df['k']/graph_df['k_orig']
graph_df['k/(k_worst-k_orig)'] = graph_df['k']/(graph_df['k_worst']-graph_df['k_orig'])
graph_df['k/k_worst'] = graph_df['k']/graph_df['k_worst']
graph_df['k/sum_c'] = graph_df['k']/graph_df['sum_c']

In [424]:
graph_df

Unnamed: 0,Method,Year,k,k_orig,sum_c,k_worst,top_n,k/k_orig,k/k_worst,k/sum_c,k*k_orig/k_worst,k/(k_worst-k_orig)
0,Hillside(Colley Params),2002,14.0,26752.0,44.0,47643.0,2,0.000523,0.000294,0.318182,7.861134,0.000670
1,Hillside(Colley Params),2002,58.0,26752.0,137.0,47643.0,3,0.002168,0.001217,0.423358,32.567555,0.002776
2,Hillside(Colley Params),2002,106.0,26752.0,248.0,47643.0,4,0.003962,0.002225,0.427419,59.520013,0.005074
3,Hillside(Colley Params),2002,176.0,26752.0,415.0,47643.0,5,0.006579,0.003694,0.424096,98.825683,0.008425
4,Hillside(Colley Params),2002,271.0,26752.0,623.0,47643.0,6,0.010130,0.005688,0.434992,152.169091,0.012972
...,...,...,...,...,...,...,...,...,...,...,...,...
947,Hillside(Massey Params),2018,6368.0,30196.0,14454.0,52517.0,25,0.210889,0.121256,0.440570,3661.445399,0.285292
948,Hillside(Massey Params),2018,6996.0,30196.0,15864.0,52517.0,26,0.231686,0.133214,0.440998,4022.530152,0.313427
949,Hillside(Massey Params),2018,7556.0,30196.0,17122.0,52517.0,27,0.250232,0.143877,0.441304,4344.516557,0.338515
950,Hillside(Massey Params),2018,8165.0,30196.0,18646.0,52517.0,28,0.270400,0.155473,0.437896,4694.676771,0.365799


In [456]:
top_n = graph_df.set_index(['Method','Year'])['top_n']
pct_change = graph_df[['Method','Year']+['k']].groupby(['Method','Year']).pct_change()*100
pct_change.columns = ['pct_change k']
pct_change_labels = graph_df[['Method','Year']+['k']+['k/k_worst','top_n']].groupby(['Method','Year']).apply(lambda df: df)
#pct_change["top_n"]=top_n
pct_change=pct_change_labels.join(pct_change)
#.join(graph_df.set_index(['Method','Year'])[['top_n']])

In [457]:
pct_change.reset_index().set_index('Year').loc[['2009','2014']].reset_index()

Unnamed: 0,Year,index,Method,k,k/k_worst,top_n,pct_change k
0,2009,392,Hillside(Colley Params),22.0,0.000440,2,
1,2009,393,Hillside(Colley Params),73.0,0.001459,3,231.818182
2,2009,394,Hillside(Colley Params),132.0,0.002638,4,80.821918
3,2009,395,Hillside(Colley Params),218.0,0.004357,5,65.151515
4,2009,396,Hillside(Colley Params),327.0,0.006535,6,50.000000
...,...,...,...,...,...,...,...
107,2014,723,Hillside(Massey Params),6592.0,0.107416,25,9.793471
108,2014,724,Hillside(Massey Params),7156.0,0.116606,26,8.555825
109,2014,725,Hillside(Massey Params),7647.0,0.124607,27,6.861375
110,2014,726,Hillside(Massey Params),8161.0,0.132982,28,6.721590


In [458]:
import altair as alt
alt.Chart(pct_change.reset_index().set_index('Year').loc[['2014','2017']].reset_index()).mark_line().encode(
    x='top_n',
    y='pct_change k',
    color='Year'
).facet(row='Method')

In [427]:
top_n = graph_df.set_index(['Method','Year'])['top_n']
cumsum_pct_change = graph_df[['Method','Year']+['k','k/k_worst']].set_index(['Method','Year']).pct_change()
#cumsum_pct_change["top_n"]=top_n
cumsum_pct_change=cumsum_pct_change.reset_index().groupby(['Method','Year']).cumsum()
cumsum_pct_change.index=top_n.index
cumsum_pct_change["top_n"]=top_n
cumsum_pct_change
#.join(graph_df.set_index(['Method','Year'])[['top_n']])

Unnamed: 0_level_0,Unnamed: 1_level_0,k,k/k_worst,top_n
Method,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Hillside(Colley Params),2002,,,2
Hillside(Colley Params),2002,3.142857,3.142857,3
Hillside(Colley Params),2002,3.970443,3.970443,4
Hillside(Colley Params),2002,4.630821,4.630821,5
Hillside(Colley Params),2002,5.170593,5.170593,6
...,...,...,...,...
Hillside(Massey Params),2018,6.592044,6.592044,25
Hillside(Massey Params),2018,6.690662,6.690662,26
Hillside(Massey Params),2018,6.770708,6.770708,27
Hillside(Massey Params),2018,6.851306,6.851306,28


In [428]:
import altair as alt
alt.Chart(cumsum_pct_change.reset_index()).mark_line().encode(
    x='top_n',
    y='k',
    color='Year'
).facet(row='Method')

In [429]:
import altair as alt
alt.Chart(graph_df).mark_line().encode(
    x='top_n',
    y='k',
    color='Year'
).facet(row='Method')

In [439]:
max_top_n = 16
min_top_n = 2
mask = (graph_df.top_n <= max_top_n) & (graph_df.top_n >= min_top_n)
alt.Chart(graph_df.loc[mask]).mark_line().encode(
    x='top_n',
    y='k',
    color='Year'
).facet(row='Method')

In [440]:
slopes = pd.DataFrame(graph_df.loc[mask].groupby(['Method','Year']).apply(lambda df: np.polyfit(df['top_n'].astype(int), df['k/sum_c'].astype(float),deg=1)[1]))
slopes.columns = [f"Slope over [{min_top_n},{max_top_n}]"]
slopes = slopes.reset_index()
slopes

Unnamed: 0,Method,Year,"Slope over [2,16]"
0,Hillside(Colley Params),2002,0.395388
1,Hillside(Colley Params),2003,0.426499
2,Hillside(Colley Params),2004,0.428957
3,Hillside(Colley Params),2005,0.406536
4,Hillside(Colley Params),2006,0.437414
5,Hillside(Colley Params),2007,0.463334
6,Hillside(Colley Params),2008,0.386713
7,Hillside(Colley Params),2009,0.462125
8,Hillside(Colley Params),2010,0.471859
9,Hillside(Colley Params),2011,0.400527


In [441]:
import scipy
slopes_new = slopes.set_index('Year').drop(years_train).copy().reset_index()
slopes_new['Method']=slopes_new['Method'].map({"Hillside(Massey Params)":"Massey","Hillside(Colley Params)":"Colley"})
slopes_new.set_index(['Year','Method'],inplace=True)
slopes_new
for_corr = slopes_new.join(predictability.set_index(['Year','Method'])).reset_index()
for_corr.groupby('Method').apply(lambda df: scipy.stats.pearsonr(df[f"Slope over [{min_top_n},{max_top_n}]"],df['Score']))

Method
Colley    (0.19885859786135188, 0.5355233719707768)
Massey    (0.03270017044354959, 0.9196415472088503)
dtype: object

In [433]:
check_vars = ['k','k/k_orig','k/k_worst','k/sum_c','k/(k_worst-k_orig)']
top_ns = [4,8,16,25]

In [434]:
import scipy
display_keys = list(itertools.product(check_vars,top_ns))
for var in check_vars:
    display(Markdown(f"## var = {var}"))
    for top_n in top_ns:
        display(Markdown(f"### top_n = {top_n}"))
        data_new = graph_df[['Year','Method',var]].loc[graph_df.top_n == top_n].set_index('Year').drop(years_train).reset_index().copy()
        data_new['Method']=data_new['Method'].map({"Hillside(Massey Params)":"Massey","Hillside(Colley Params)":"Colley"})
        data_new.set_index(['Year','Method'],inplace=True)
        data_new
        for_corr = data_new.join(predictability.set_index(['Year','Method'])).reset_index()
        display(Markdown('#### pearsonr'))
        display(for_corr.groupby('Method').apply(lambda df: scipy.stats.pearsonr(df[var],df['Score'])))
        display(Markdown('#### kendalltau'))
        display(for_corr.groupby('Method').apply(lambda df: scipy.stats.kendalltau(df[var],df['Score'])))

## var = k

### top_n = 4

#### pearsonr

Method
Colley    (0.3191777881918366, 0.311899515922242)
Massey    (0.2867218365088855, 0.366236531424159)
dtype: object

#### kendalltau

Method
Colley    (0.2727272727272727, 0.24957952123750735)
Massey    (0.39393939393939387, 0.0863171145983646)
dtype: object

### top_n = 8

#### pearsonr

Method
Colley     (0.23020459216259487, 0.4716489696270623)
Massey    (-0.09005753223727708, 0.7807532369805809)
dtype: object

#### kendalltau

Method
Colley    (0.015267620413811482, 0.9452010837327357)
Massey     (0.04580286124143444, 0.8366323297132776)
dtype: object

### top_n = 16

#### pearsonr

Method
Colley    (0.011937266971383392, 0.9706287128868298)
Massey    (-0.5151117333758413, 0.08655705636387187)
dtype: object

#### kendalltau

Method
Colley     (-0.0909090909090909, 0.7373055246579552)
Massey    (-0.39393939393939387, 0.0863171145983646)
dtype: object

### top_n = 25

#### pearsonr

Method
Colley     (0.06587467634752311, 0.8388208655718932)
Massey    (-0.5486009094343924, 0.06474467896927412)
dtype: object

#### kendalltau

Method
Colley     (0.07633810206905742, 0.7310931976144447)
Massey    (-0.3511552695176641, 0.11390323413538203)
dtype: object

## var = k/k_orig

### top_n = 4

#### pearsonr

Method
Colley    (0.34202064041181235, 0.27651913798675437)
Massey      (0.4836783304546249, 0.1111225836257224)
dtype: object

#### kendalltau

Method
Colley    (0.3333333333333333, 0.15259045898802842)
Massey    (0.39393939393939387, 0.0863171145983646)
dtype: object

### top_n = 8

#### pearsonr

Method
Colley    (0.27436154694633524, 0.38815019285709723)
Massey       (0.1507085777630092, 0.640120382019459)
dtype: object

#### kendalltau

Method
Colley    (0.1212121212121212, 0.6383612539081289)
Massey    (0.0606060606060606, 0.8405883696421891)
dtype: object

### top_n = 16

#### pearsonr

Method
Colley    (0.016636022259846575, 0.959074892540342)
Massey    (-0.25828316156175785, 0.417629646489386)
dtype: object

#### kendalltau

Method
Colley    (0.0606060606060606, 0.8405883696421891)
Massey    (-0.1818181818181818, 0.459023957331249)
dtype: object

### top_n = 25

#### pearsonr

Method
Colley     (0.09482250998817125, 0.7694227110058466)
Massey    (-0.16247452880130597, 0.6139034405303923)
dtype: object

#### kendalltau

Method
Colley      (0.1818181818181818, 0.459023957331249)
Massey    (-0.0606060606060606, 0.8405883696421891)
dtype: object

## var = k/k_worst

### top_n = 4

#### pearsonr

Method
Colley     (0.3770979987277154, 0.22690408995915465)
Massey    (0.44217748150171493, 0.15006127963507665)
dtype: object

#### kendalltau

Method
Colley    (0.2727272727272727, 0.24957952123750735)
Massey    (0.39393939393939387, 0.0863171145983646)
dtype: object

### top_n = 8

#### pearsonr

Method
Colley    (0.31151022776196885, 0.3243103696970954)
Massey    (0.12069092714586467, 0.7086805752674555)
dtype: object

#### kendalltau

Method
Colley     (0.1818181818181818, 0.459023957331249)
Massey    (0.1212121212121212, 0.6383612539081289)
dtype: object

### top_n = 16

#### pearsonr

Method
Colley     (0.12758704036193896, 0.6927320895491825)
Massey    (-0.21907008462897531, 0.4939234839313335)
dtype: object

#### kendalltau

Method
Colley      (0.1818181818181818, 0.459023957331249)
Massey    (-0.1212121212121212, 0.6383612539081289)
dtype: object

### top_n = 25

#### pearsonr

Method
Colley     (0.20446986179478888, 0.5238274994136501)
Massey    (-0.15220977226128976, 0.6367535425657391)
dtype: object

#### kendalltau

Method
Colley     (0.0909090909090909, 0.7373055246579552)
Massey    (-0.0909090909090909, 0.7373055246579552)
dtype: object

## var = k/sum_c

### top_n = 4

#### pearsonr

Method
Colley    (0.11912838513135712, 0.7123094580640662)
Massey    (0.08001084741090371, 0.8047693347994472)
dtype: object

#### kendalltau

Method
Colley    (0.2424242424242424, 0.3108100139957779)
Massey    (0.1212121212121212, 0.6383612539081289)
dtype: object

### top_n = 8

#### pearsonr

Method
Colley     (0.3789993972599416, 0.22437911238954186)
Massey    (-0.00818693055869242, 0.9798542760090568)
dtype: object

#### kendalltau

Method
Colley     (0.2424242424242424, 0.3108100139957779)
Massey    (-0.0606060606060606, 0.8405883696421891)
dtype: object

### top_n = 16

#### pearsonr

Method
Colley      (0.15034697609231873, 0.6409323074762807)
Massey    (-0.37457971365206016, 0.23027434416566944)
dtype: object

#### kendalltau

Method
Colley      (0.0303030303030303, 0.9465592891547753)
Massey    (-0.4545454545454545, 0.04473689858238469)
dtype: object

### top_n = 25

#### pearsonr

Method
Colley    (0.31503243878737786, 0.31857615657398497)
Massey      (-0.2495900026353346, 0.434015784875194)
dtype: object

#### kendalltau

Method
Colley     (0.2121212121212121, 0.38070480349126185)
Massey    (-0.2121212121212121, 0.38070480349126185)
dtype: object

## var = k/(k_worst-k_orig)

### top_n = 4

#### pearsonr

Method
Colley    (0.38384623538342416, 0.21801934440924317)
Massey     (0.3785062785262101, 0.22503232631566708)
dtype: object

#### kendalltau

Method
Colley    (0.2727272727272727, 0.24957952123750735)
Massey    (0.3333333333333333, 0.15259045898802842)
dtype: object

### top_n = 8

#### pearsonr

Method
Colley    (0.31347616693103286, 0.3211028695444383)
Massey      (0.08642703466987324, 0.78941258267018)
dtype: object

#### kendalltau

Method
Colley    (0.2424242424242424, 0.3108100139957779)
Massey    (0.1212121212121212, 0.6383612539081289)
dtype: object

### top_n = 16

#### pearsonr

Method
Colley    (0.1832629213021031, 0.5685965436897606)
Massey    (-0.1768766555685284, 0.582371207818975)
dtype: object

#### kendalltau

Method
Colley     (0.1212121212121212, 0.6383612539081289)
Massey    (-0.1212121212121212, 0.6383612539081289)
dtype: object

### top_n = 25

#### pearsonr

Method
Colley    (0.2515046250592857, 0.4303803130822559)
Massey     (-0.13131703839741, 0.6841528393427341)
dtype: object

#### kendalltau

Method
Colley    (0.2727272727272727, 0.24957952123750735)
Massey    (-0.0909090909090909, 0.7373055246579552)
dtype: object

In [374]:
import scipy
data_new = graph_df[['Year','Method',var]].loc[graph_df.top_n == 4].copy()
data_new['Method']=data_new['Method'].map({"Hillside(Massey Params)":"Massey","Hillside(Colley Params)":"Colley"})
data_new.set_index(['Year','Method'],inplace=True)
data_new
for_corr = data_new.join(predictability.set_index(['Year','Method'])).reset_index()
for_corr.groupby('Method').apply(lambda df: scipy.stats.pearsonr(df[var],df['Score']))

Method
Colley    (0.39283136542554026, 0.11880879476404413)
Massey      (0.444729450471552, 0.07366579611903842)
dtype: object

In [375]:
import scipy
data_new = graph_df[['Year','Method',var]].loc[graph_df.top_n == 8].copy()
data_new['Method']=data_new['Method'].map({"Hillside(Massey Params)":"Massey","Hillside(Colley Params)":"Colley"})
data_new.set_index(['Year','Method'],inplace=True)
data_new
for_corr = data_new.join(predictability.set_index(['Year','Method'])).reset_index()
for_corr.groupby('Method').apply(lambda df: scipy.stats.pearsonr(df[var],df['Score']))

Method
Colley    (0.2983891279352271, 0.24469909211815857)
Massey     (0.23592178410165124, 0.361979098202793)
dtype: object

In [376]:
import scipy
data_new = graph_df[['Year','Method',var]].loc[graph_df.top_n == 29].copy()
data_new['Method']=data_new['Method'].map({"Hillside(Massey Params)":"Massey","Hillside(Colley Params)":"Colley"})
data_new.set_index(['Year','Method'],inplace=True)
data_new
for_corr = data_new.join(predictability.set_index(['Year','Method'])).reset_index()
for_corr.groupby('Method').apply(lambda df: scipy.stats.pearsonr(df[var],df['Score']))

Method
Colley     (-0.0972196396596853, 0.7104917966217039)
Massey    (-0.26154539274017646, 0.3105673534545875)
dtype: object

In [22]:
massey_rankings = {}
colley_rankings = {}
hillside_details = {}
massey_rs = {}
colley_rs = {}

outer_keys = list(itertools.product(domains_ranges,years))
for domain_range,year in tqdm(outer_keys):
    # set the team_domain
    team_domain = None
    if domain_range[0] == 'madness':
        team_domain = madness_teams[year]
    elif domain_range[0] == 'all':
        team_domain = all_teams[year]

    # set the team_range
    team_range = None
    if domain_range[1] == 'madness':
        team_range = madness_teams[year]
    elif domain_range[1] == 'all':
        team_range = all_teams[year]
    elif "top" in domain_range:
        team_range = all_teams[year]

    columns = ["frac","direct_thres","spread_thres","weight_indirect"]+team_range
    massey_rankings[(domain_range,year)] = pd.DataFrame(columns=columns)
    colley_rankings[(domain_range,year)] = pd.DataFrame(columns=columns)
    massey_rs[(domain_range,year)] = pd.DataFrame(columns=columns)
    colley_rs[(domain_range,year)] = pd.DataFrame(columns=columns)
    hillside_columns = ["frac","direct_thres","spread_thres","weight_indirect","details"]
    hillside_details[(domain_range,year)] = pd.DataFrame(columns=hillside_columns)

    game_df = pd.DataFrame({"team1_name":games[year]['team1_name'],
                            "team1_score":games[year]['points1'],
                            "team1_H_A_N": games[year]['H_A_N1'],
                            "team2_name":games[year]['team2_name'],
                            "team2_score":games[year]['points2'],
                            "team2_H_A_N": games[year]['H_A_N1'],
                            "date": games[year]['date']
                           }).sort_values(by='date').drop('date',axis=1)
    mask = game_df.team1_name.isin(team_domain) & game_df.team2_name.isin(team_domain)
    game_df = game_df.loc[mask]

    keys = list(itertools.product(fracs,direct_thress,spread_thress,weight_indirects))

    def compute(frac,direct_thres,spread_thres,weight_indirect):
        upper = int(len(game_df)*frac)
        game_df_sample = game_df.iloc[:upper,:]

        map_func = lambda linked: pyrankability.construct.colley_matrices(linked,direct_thres=direct_thres,spread_thres=spread_thres,weight_indirect=weight_indirect)
        colley_matrix,colley_b = pyrankability.construct.map_vectorized(game_df_sample,map_func)
        colley_matrix = colley_matrix.reindex(index=team_range,columns=team_range)
        colley_b = colley_b.reindex(team_range)
        mask = colley_b.isna()
        colley_b = colley_b.loc[~mask]
        colley_matrix = colley_matrix.loc[~mask,~mask]
        inxs = list(np.where(mask)[0])
        ranking1,r1 = pyrankability.construct.ranking_from_matrices(colley_matrix.fillna(0),colley_b,inxs)

        map_func = lambda linked: pyrankability.construct.massey_matrices(linked,direct_thres=direct_thres,spread_thres=spread_thres,weight_indirect=weight_indirect)
        massey_matrix,massey_b = pyrankability.construct.map_vectorized(game_df_sample,map_func)
        massey_matrix = massey_matrix.reindex(index=team_range,columns=team_range)
        massey_b = massey_b.reindex(team_range)
        mask = massey_b.isna()
        massey_b = massey_b.loc[~mask]
        massey_matrix = massey_matrix.loc[~mask,~mask]    
        inxs = list(np.where(mask)[0])    
        ranking2,r2 = pyrankability.construct.ranking_from_matrices(massey_matrix.fillna(0),massey_b,inxs)
        ranking_values1 = [frac,direct_thres,spread_thres,weight_indirect]+list(ranking1)
        ranking_values2 = [frac,direct_thres,spread_thres,weight_indirect]+list(ranking2)
        r_values1 = [frac,direct_thres,spread_thres,weight_indirect]+list(r1)
        r_values2 = [frac,direct_thres,spread_thres,weight_indirect]+list(r2)
        
        map_func = lambda linked: pyrankability.construct.support_map_vectorized_direct_indirect_weighted(linked,direct_thres=direct_thres,spread_thres=spread_thres,weight_indirect=weight_indirect)
        D = pyrankability.construct.V_count_vectorized(game_df_sample,map_func).reindex(index=team_range,columns=team_range)
        k,details = pyrankability.rank.solve(D,method='hillside',lazy=False,cont=True)
        x = pd.DataFrame(details['x'],columns=D.columns,index=D.index)
        c = pd.DataFrame(pyrankability.construct.C_count(D),columns=D.columns,index=D.index)
        P = details['P']
        simple_details = {'k':k,'x':x,'c':c,'P':P}
        hillside_ret = pd.Series([frac,direct_thres,spread_thres,weight_indirect,simple_details],index=hillside_columns)
        return pd.Series(ranking_values1,index=columns),pd.Series(ranking_values2,index=columns),pd.Series(r_values1,index=columns),pd.Series(r_values2,index=columns),hillside_ret

    #frac,direct_thres,spread_thres,weight_indirect = keys[0]
    #compute(frac,direct_thres,spread_thres,weight_indirect)
    results = Parallel(n_jobs=-1)(delayed(compute)(frac,direct_thres,spread_thres,weight_indirect) for frac,direct_thres,spread_thres,weight_indirect in keys)

    c = 0
    for i,key in enumerate(keys):
        frac,direct_thres,spread_thres,weight_indirect = key
        massey,colley,massey_r,colley_r,hillside_ret = results[i]
        massey.name = c
        colley.name = c
        colley_r.name=c
        massey_r.name=c
        hillside_ret.name=c
        massey_rankings[(domain_range,year)] = massey_rankings[(domain_range,year)].append(massey)
        colley_rankings[(domain_range,year)] = colley_rankings[(domain_range,year)].append(colley)
        hillside_details[(domain_range,year)] = hillside_details[(domain_range,year)].append(hillside_ret)
        massey_rs[(domain_range,year)] = massey_rs[(domain_range,year)].append(massey_r)
        colley_rs[(domain_range,year)] = colley_rs[(domain_range,year)].append(colley_r)
        c+=1









  0%|          | 0/102 [00:00<?, ?it/s][A[A[A[A[A[A[A[A







  1%|          | 1/102 [01:00<1:41:40, 60.40s/it][A[A[A[A[A[A[A[A







  2%|▏         | 2/102 [01:53<1:37:01, 58.21s/it][A[A[A[A[A[A[A[A







  3%|▎         | 3/102 [02:50<1:35:23, 57.82s/it][A[A[A[A[A[A[A[A







  4%|▍         | 4/102 [03:51<1:36:00, 58.78s/it][A[A[A[A[A[A[A[A







  5%|▍         | 5/102 [04:49<1:34:54, 58.70s/it][A[A[A[A[A[A[A[A







  6%|▌         | 6/102 [05:53<1:36:09, 60.10s/it][A[A[A[A[A[A[A[A







  7%|▋         | 7/102 [06:51<1:34:14, 59.52s/it][A[A[A[A[A[A[A[A







  8%|▊         | 8/102 [07:51<1:33:32, 59.71s/it][A[A[A[A[A[A[A[A







  9%|▉         | 9/102 [08:57<1:35:34, 61.66s/it][A[A[A[A[A[A[A[A







 10%|▉         | 10/102 [10:07<1:38:07, 63.99s/it][A[A[A[A[A[A[A[A







 11%|█         | 11/102 [11:13<1:37:55, 64.57s/it][A[A[A[A[A[A[A[A







 12%|█▏        | 12/102 [12:

TypeError: can only concatenate list (not "NoneType") to list

In [None]:
joblib.dump({"colley_rankings":colley_rankings,"massey_rankings":massey_rankings,"massey_rs":massey_rs,"colley_rs":colley_rs},"checkpoint1.joblib.z");