In [1]:
# takes  1m30s
# download tables from https://www.bridgepowerratings.com/prhome.htm
# use acbl api to convert Name into player_id.
# create dicts helpful to converting Name->player_id and player_id->Name.
# deal with duplicate names and missing player_ids.

# previous steps:
# none

# next steps:
# merge bpr data in acbl_pair_summaries.ipynb and useful places.

# todo:
# implement teams (4+ players) similar to list used in pair_ids.
# should keys in acbl_player_name_to_id_dict use casefold() or not?

In [2]:
import pandas as pd
import pathlib
from collections import defaultdict
import mlBridgeLib

In [3]:
# override pandas display options
mlBridgeLib.pd_options_display()

In [4]:
rootPath = pathlib.Path('e:/bridge/data')
acblPath = rootPath.joinpath('acbl')
acblPath.mkdir(parents=True,exist_ok=True)
bprPath = rootPath.joinpath('bpr')
bprPath.mkdir(parents=True,exist_ok=True)

In [5]:
# takes 70s
# create dataframes from bridgepowerratings.com webpage tables.
# experimenting with flavor='bs4'. seems to be faster for this webpage.
pages = ['WEBPAGES/allmps.htm','WEBPAGES/allpr.htm','WEBPAGES/DISTRICT/d9rank.htm','WEBPAGES/improv6.htm','WEBPAGES/improv12.htm','WEBPAGES/improv24.htm','WEBPAGES/underate.htm','WEBPAGES/urpairs.htm']
dfsd = {}
for p in pages:
    url = f'https://www.bridgepowerratings.com/{p}'
    print(f'Reading: {url}')
    # read_html() retuns a list of dataframes. The dataframes are populated from tables embedded in the html.
    dfs = pd.read_html(url, flavor='bs4', encoding='UTF8')
    print(f'Table count: {len(dfs)}')
    dfs = [v for v in dfs if len(set(v.iloc[0])) != len(v.iloc[0])] # kludgy way to detect and bypass any tables which are composed of shortcuts to names.
    row1_cols = dfs[0].iloc[0]
    ncols = len(row1_cols)
    nnames = len(set(row1_cols))
    col_names = row1_cols[:nnames]
    #print(row1_cols,ncols,nnames,col_names)
    df = pd.concat([pd.DataFrame(df.iloc[1:,ncol:ncol+nnames].values,columns=col_names) for df in dfs for ncol in range(0,ncols,nnames)],ignore_index=True).dropna()
    # use url to create a filename
    n = pathlib.Path(url).stem
    dfsd[n] = df
    display(df)

Reading: https://www.bridgepowerratings.com/WEBPAGES/allmps.htm
Table count: 53


Unnamed: 0,Rank,MP's,Unit,Name
0,1,92862.58,128,"Meckstroth, Jeff"
1,2,87910.58,176,"Passell, Mike"
2,3,78168.46,128,"Rodwell, Eric"
3,4,76525.13,174,"Wold, Eddie"
4,5,71817.34,210,"Lair, Mark"
5,6,62350.16,538,"Itabashi, Mark"
6,7,51506.72,373,"Hampson, Geoff"
7,8,50419.44,176,"Compton, Chris"
8,9,47550.39,373,"Levin, Robert"
9,10,46419.44,373,"Cheek, Curtis"


Reading: https://www.bridgepowerratings.com/WEBPAGES/allpr.htm
Table count: 51


Unnamed: 0,Rank,PR,Games,Unit,Name
0,1,72.09,137,108,"Grossack, Zachary"
1,2,71.27,60,174,"Wold, Eddie"
2,3,71.14,86,373,"Shi, Sylvia"
3,4,71.02,201,538,"Itabashi, Mark"
4,5,71.01,164,128,"Cappelletti, Mike"
5,6,70.76,47,128,"Meckstroth, Jeff"
6,7,70.28,131,151,"Pollack, Frederic"
7,8,69.67,77,155,"Nathan, Marc"
8,9,69.59,69,128,"Stamatov, Jerry"
9,10,69.37,198,155,"Rotaru, Iulian"


Reading: https://www.bridgepowerratings.com/WEBPAGES/DISTRICT/d9rank.htm
Table count: 34


Unnamed: 0,Rank,PR,Games,Unit,Name
0,1,71.01,164,128,"Cappelletti, Mike"
1,2,70.76,47,128,"Meckstroth, Jeff"
2,3,69.59,69,128,"Stamatov, Jerry"
3,4,69.13,50,128,"Rodwell, Eric"
4,5,68.38,66,243,"Pavlicek, Richard"
5,6,67.42,97,243,"Castillo, Juan"
6,7,67.35,65,128,"Kristinsson, Jakob"
7,8,67.12,48,128,"Wolpert, Gavin"
8,9,66.77,181,128,"Ganness, Sean"
9,10,66.69,47,128,"Stauber, Allan"


Reading: https://www.bridgepowerratings.com/WEBPAGES/improv6.htm
Table count: 52


Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name
0,1,55.48,49.81,5.67,114,"Cone, Cynthia"
1,2,50.50,44.90,5.60,243,"Chance, Doris"
2,3,61.08,55.65,5.43,128,"Land, Jeanne"
3,4,40.47,35.14,5.34,159,"Warach, Jane"
4,5,56.37,51.06,5.31,128,"Baniewicz, Jeanne"
5,6,49.62,44.70,4.92,246,"McGrath, Kimberley"
6,7,51.58,46.69,4.88,547,"Acutt, Iris"
7,8,60.78,55.93,4.86,189,"Zamierowski, Barbara"
8,9,47.83,42.99,4.84,114,"Bedgood, Cheryl"
9,10,50.31,45.54,4.78,128,"Wallace, Constance"


Reading: https://www.bridgepowerratings.com/WEBPAGES/improv12.htm
Table count: 52


Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name
0,1,50.77,43.24,7.52,189,"Fitzpatrick, Bonnie"
1,2,47.76,40.89,6.87,373,"Koziol, Malgorzata"
2,3,56.37,49.53,6.84,128,"Baniewicz, Jeanne"
3,4,49.91,43.30,6.61,452,"Krasovic, Valarie"
4,5,53.21,46.70,6.51,140,"Armus, Nadine"
5,6,58.14,51.74,6.39,128,"Lieser, John"
6,7,58.65,52.27,6.38,201,"Fisher, Ernestine"
7,8,51.58,45.21,6.37,547,"Acutt, Iris"
8,9,61.69,55.53,6.16,470,"Williams, Busaba"
9,10,46.73,40.67,6.06,219,"Cox, Ronald"


Reading: https://www.bridgepowerratings.com/WEBPAGES/improv24.htm
Table count: 3


Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name
0,1,63.74,57.30,6.45,503,"Kemper, Carol"
1,2,68.27,61.88,6.39,119,"Ayyagari, Murthy"
2,3,55.95,49.73,6.21,196,"Drabek, Yan"
3,4,60.36,54.20,6.17,425,"Schmahl, Michael"
4,5,66.46,60.92,5.54,132,"Kopper, David"
5,6,60.73,55.23,5.50,531,"Denny, Christine"
6,7,58.49,53.04,5.45,373,"Williams, Daniel"
7,8,58.11,52.66,5.44,136,"Harner, Lori"
8,9,62.27,56.91,5.36,506,"Chen, Chris"
9,10,62.79,57.66,5.12,374,"Harris, Cindy"


Reading: https://www.bridgepowerratings.com/WEBPAGES/underate.htm
Table count: 52


Unnamed: 0,Rank,UR,PR/MP's,MP's,PR,Unit,Name
0,1,19.01,7231,1,59.22,128,"Olster, Evan"
1,2,18.80,6908,2,59.03,246,"Rosin, Leah"
2,3,17.70,8321,42,59.86,128,"Lieser, John"
3,4,17.57,9528,69,60.57,473,"Grijalva, Rolando"
4,5,17.39,5095,9,57.97,431,"Kelly, Ken"
5,6,15.94,51547,818,66.87,123,"Lishkov, Aleksandar"
6,7,15.56,9573,158,60.60,139,"Popkin, Arnold"
7,8,14.01,5827,128,58.40,222,"Renz, Greg"
8,9,13.95,9284,244,60.43,559,"Zappaterreno, R"
9,10,13.90,5228,116,58.05,188,"Zoulis, Michael"


Reading: https://www.bridgepowerratings.com/WEBPAGES/urpairs.htm
Table count: 78


Unnamed: 0,Rank,UR,PR/MP's,MP's,PairR,Unit,Name
0,1,18.80,37191,146,61.68,999/999,"Robinson, Claire - Bakhshi, Heather"
1,2,16.20,37598,404,62.06,999/999,"Sherman, David - Capal, Tracy"
2,3,15.69,8152,41,56.80,533/533,"Hu, Xiaodan - Wang, Jun"
3,4,15.52,7353,28,56.32,446/446,"Walsh, John - Rasmussen, Pete"
4,5,15.46,77953,1854,64.47,155/155,"Questembert, Patrick - Tadmor, Ilan"
5,6,14.83,12999,218,58.77,999/999,"Shapochnik, Gersh - Yadadov, Rami"
6,7,14.35,7608,84,56.48,150/150,"Woodard, Wendell - Cummings, William"
7,8,13.79,80306,1956,65.14,123/123,"Poska, Gediminas - Vitukynaite, Daiva"
8,9,13.42,53307,624,59.75,999/506,"Guven, Metin - Bichara, Rita"
9,10,13.41,4235,12,53.85,151/151,"Lavoie, Pierre-Andre - Dame, Jacques"


In [6]:
# Reading acbl_player_name_dict which has been created from sql db which was created by club data download.
# This file is the starting point for creating the larger player_id_name_matches_dict which is supplemented with api calls.
if False:
    acbl_player_id_to_name_dict_file = acblPath.joinpath('acbl_player_name_dict.pkl')
    acbl_player_id_to_name_dict = pd.read_pickle(acbl_player_id_to_name_dict_file)
    # Create a list of player_ids for each name. Many players share a common name but each has a unique player_id.
    acbl_player_name_to_id_dict = defaultdict(list)
    for k,v in acbl_player_id_to_name_dict.items():
        acbl_player_name_to_id_dict[v].append(k)

In [7]:
# ignoring acbl_player_id_to_name_dict and using the more complete player_id_name_matches_dict.pkl instead.
if True:
    player_id_name_matches_dict_file = acblPath.joinpath('player_id_name_matches_dict.pkl')
    acbl_player_name_to_id_dict = pd.read_pickle(player_id_name_matches_dict_file) # note name change!
    for k,v in acbl_player_name_to_id_dict.items():
        assert type(k) is str, k
        if type(v) is list:
            v = acbl_player_name_to_id_dict[k] = [str(vv) for vv in v] # todo: temp: force ints in list to be strs.
            assert all(type(vv) is str for vv in v), v
        else:
            assert type(v) is str and v[0] != '[', v

In [8]:
# for player dataframes, create new column containing player_id
# for pair dataframes, create new columns of lists containing pair_ids, Names
for k,df in dfsd.items():
    print(df.columns)
    if 'PairR' in df.columns:
        df['Names'] = df['Name'].map(lambda r: r.split(' - '))
        df['pair_ids'] = df['Names'].map(lambda r: [acbl_player_name_to_id_dict[n] for n in r]) # pair of ids and names
        df.drop('Name',axis='columns',inplace=True)
    else:
        df['player_id'] = df['Name'].map(acbl_player_name_to_id_dict) # scaler id and name
    display(k,df)

Index(['Rank', 'MP's', 'Unit', 'Name'], dtype='object', name=0)


'allmps'

Unnamed: 0,Rank,MP's,Unit,Name,player_id
0,1,92862.58,128,"Meckstroth, Jeff",[4580699]
1,2,87910.58,176,"Passell, Mike",[1622382]
2,3,78168.46,128,"Rodwell, Eric",[5482658]
3,4,76525.13,174,"Wold, Eddie",[3443949]
4,5,71817.34,210,"Lair, Mark",[5830400]
5,6,62350.16,538,"Itabashi, Mark",[6811434]
6,7,51506.72,373,"Hampson, Geoff",[9984488]
7,8,50419.44,176,"Compton, Chris",[8007713]
8,9,47550.39,373,"Levin, Robert",[7279515]
9,10,46419.44,373,"Cheek, Curtis",[2931060]


Index(['Rank', 'PR', 'Games', 'Unit', 'Name'], dtype='object', name=0)


'allpr'

Unnamed: 0,Rank,PR,Games,Unit,Name,player_id
0,1,72.09,137,108,"Grossack, Zachary",[2250896]
1,2,71.27,60,174,"Wold, Eddie",[3443949]
2,3,71.14,86,373,"Shi, Sylvia",[5420105]
3,4,71.02,201,538,"Itabashi, Mark",[6811434]
4,5,71.01,164,128,"Cappelletti, Mike",[2601087]
5,6,70.76,47,128,"Meckstroth, Jeff",[4580699]
6,7,70.28,131,151,"Pollack, Frederic",[6529224]
7,8,69.67,77,155,"Nathan, Marc",[1621807]
8,9,69.59,69,128,"Stamatov, Jerry",[9936327]
9,10,69.37,198,155,"Rotaru, Iulian",[7066325]


Index(['Rank', 'PR', 'Games', 'Unit', 'Name'], dtype='object', name=0)


'd9rank'

Unnamed: 0,Rank,PR,Games,Unit,Name,player_id
0,1,71.01,164,128,"Cappelletti, Mike",[2601087]
1,2,70.76,47,128,"Meckstroth, Jeff",[4580699]
2,3,69.59,69,128,"Stamatov, Jerry",[9936327]
3,4,69.13,50,128,"Rodwell, Eric",[5482658]
4,5,68.38,66,243,"Pavlicek, Richard",[3565408]
5,6,67.42,97,243,"Castillo, Juan",[8744475]
6,7,67.35,65,128,"Kristinsson, Jakob",[4606868]
7,8,67.12,48,128,"Wolpert, Gavin",[8149887]
8,9,66.77,181,128,"Ganness, Sean",[8071683]
9,10,66.69,47,128,"Stauber, Allan",[1481835]


Index(['Rank', 'last/PR', 'old/PR', 'PR/inc', 'Unit', 'Name'], dtype='object', name=0)


'improv6'

Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name,player_id
0,1,55.48,49.81,5.67,114,"Cone, Cynthia",[4928725]
1,2,50.50,44.90,5.60,243,"Chance, Doris",[5571790]
2,3,61.08,55.65,5.43,128,"Land, Jeanne",[1249495]
3,4,40.47,35.14,5.34,159,"Warach, Jane",[8043620]
4,5,56.37,51.06,5.31,128,"Baniewicz, Jeanne",[9442677]
5,6,49.62,44.70,4.92,246,"McGrath, Kimberley",[9038558]
6,7,51.58,46.69,4.88,547,"Acutt, Iris",[8671338]
7,8,60.78,55.93,4.86,189,"Zamierowski, Barbara",[3002217]
8,9,47.83,42.99,4.84,114,"Bedgood, Cheryl",[6751318]
9,10,50.31,45.54,4.78,128,"Wallace, Constance",[9074430]


Index(['Rank', 'last/PR', 'old/PR', 'PR/inc', 'Unit', 'Name'], dtype='object', name=0)


'improv12'

Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name,player_id
0,1,50.77,43.24,7.52,189,"Fitzpatrick, Bonnie",[3194043]
1,2,47.76,40.89,6.87,373,"Koziol, Malgorzata",[1441426]
2,3,56.37,49.53,6.84,128,"Baniewicz, Jeanne",[9442677]
3,4,49.91,43.30,6.61,452,"Krasovic, Valarie",[5511623]
4,5,53.21,46.70,6.51,140,"Armus, Nadine",[1974718]
5,6,58.14,51.74,6.39,128,"Lieser, John",[4240464]
6,7,58.65,52.27,6.38,201,"Fisher, Ernestine",[2295121]
7,8,51.58,45.21,6.37,547,"Acutt, Iris",[8671338]
8,9,61.69,55.53,6.16,470,"Williams, Busaba",[2143070]
9,10,46.73,40.67,6.06,219,"Cox, Ronald",[1794124]


Index(['Rank', 'last/PR', 'old/PR', 'PR/inc', 'Unit', 'Name'], dtype='object', name=0)


'improv24'

Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name,player_id
0,1,63.74,57.30,6.45,503,"Kemper, Carol",[4331699]
1,2,68.27,61.88,6.39,119,"Ayyagari, Murthy",[9146962]
2,3,55.95,49.73,6.21,196,"Drabek, Yan",[5172713]
3,4,60.36,54.20,6.17,425,"Schmahl, Michael",[4934148]
4,5,66.46,60.92,5.54,132,"Kopper, David",[2212501]
5,6,60.73,55.23,5.50,531,"Denny, Christine",[3225135]
6,7,58.49,53.04,5.45,373,"Williams, Daniel",[6527051]
7,8,58.11,52.66,5.44,136,"Harner, Lori",[5399963]
8,9,62.27,56.91,5.36,506,"Chen, Chris",[3973182]
9,10,62.79,57.66,5.12,374,"Harris, Cindy",[4997352]


Index(['Rank', 'UR', 'PR/MP's', 'MP's', 'PR', 'Unit', 'Name'], dtype='object', name=0)


'underate'

Unnamed: 0,Rank,UR,PR/MP's,MP's,PR,Unit,Name,player_id
0,1,19.01,7231,1,59.22,128,"Olster, Evan","[tmp:1bd69b97-671c-498b-9b13-554a2926a285, tmp:6637f30e-5942-4a56-87fb-7b8fc7f60e71, tmp:2ddfbbf..."
1,2,18.80,6908,2,59.03,246,"Rosin, Leah","[tmp:1c0dd554-02b5-4c03-931c-287c14ed2c79, tmp:617f2476-72a2-4dd1-85b1-20a9347f7beb, tmp:aaca79b..."
2,3,17.70,8321,42,59.86,128,"Lieser, John",[4240464]
3,4,17.57,9528,69,60.57,473,"Grijalva, Rolando","[7928378, tmp:dd9921da-821c-40bb-9e35-1cc85b8db6c3, tmp:80eda9d1-a123-467b-ada3-8931c969e76b]"
4,5,17.39,5095,9,57.97,431,"Kelly, Ken","[tmp:90dfb096-2e5f-495c-8804-eb1c6406843f, tmp:b3e7843c-01f9-4f87-97c1-91358940371c, tmp:940f616..."
5,6,15.94,51547,818,66.87,123,"Lishkov, Aleksandar",[1320998]
6,7,15.56,9573,158,60.60,139,"Popkin, Arnold",[8230021]
7,8,14.01,5827,128,58.40,222,"Renz, Greg",[3793281]
8,9,13.95,9284,244,60.43,559,"Zappaterreno, R",[3616320]
9,10,13.90,5228,116,58.05,188,"Zoulis, Michael",[6576222]


Index(['Rank', 'UR', 'PR/MP's', 'MP's', 'PairR', 'Unit', 'Name'], dtype='object', name=0)


'urpairs'

Unnamed: 0,Rank,UR,PR/MP's,MP's,PairR,Unit,Names,pair_ids
0,1,18.80,37191,146,61.68,999/999,"[Robinson, Claire, Bakhshi, Heather]","[[6025366], []]"
1,2,16.20,37598,404,62.06,999/999,"[Sherman, David, Capal, Tracy]","[[], []]"
2,3,15.69,8152,41,56.80,533/533,"[Hu, Xiaodan, Wang, Jun]","[[8237328, tmp:5b2e409e-03b4-44dd-afcf-def8afc94803, tmp:75660f98-02a8-42a8-a5be-2d5a8d4b1a03, t..."
3,4,15.52,7353,28,56.32,446/446,"[Walsh, John, Rasmussen, Pete]","[[2235897], [2235978]]"
4,5,15.46,77953,1854,64.47,155/155,"[Questembert, Patrick, Tadmor, Ilan]","[[4016424], [8337500]]"
5,6,14.83,12999,218,58.77,999/999,"[Shapochnik, Gersh, Yadadov, Rami]","[[], []]"
6,7,14.35,7608,84,56.48,150/150,"[Woodard, Wendell, Cummings, William]","[[], [9934987]]"
7,8,13.79,80306,1956,65.14,123/123,"[Poska, Gediminas, Vitukynaite, Daiva]","[[5128501], [5177944]]"
8,9,13.42,53307,624,59.75,999/506,"[Guven, Metin, Bichara, Rita]","[[], [7703252]]"
9,10,13.41,4235,12,53.85,151/151,"[Lavoie, Pierre-Andre, Dame, Jacques]","[[tmp:149ca897-2246-41d8-bc66-6dec85f3b11d, tmp:2d20daba-788c-438d-9f8c-a2527f3c1e7c, tmp:09372d..."


In [9]:
# create a list of names containing missing player_id or pair_ids.
l = [] # todo: change to list which only allows uniques. setlist?
for k,df in dfsd.items():
    if 'player_id' in df:
        df[df['player_id'].map(len).eq(0)]['Name'].map(lambda n: l.append(n.casefold()))
    elif 'pair_ids' in df:
        df.apply(lambda r: [l.append(name.casefold()) for lpid,name in zip(r['pair_ids'],r['Names']) if len(lpid) == 0],axis='columns')
    else:
        assert False
missing_player_ids = set(l) - acbl_player_name_to_id_dict.keys() # only need to call apis for names not in dict
# todo: should dict contain casefold() or not? Need consistency.
print([n for n in missing_player_ids if n in acbl_player_name_to_id_dict])
assert all([n not in acbl_player_name_to_id_dict for n in missing_player_ids])
len(l),len(missing_player_ids),sorted(missing_player_ids)

[]


(1636,
 76,
 ["a'hearn, beverly",
  'bacon-rothchild, debora',
  'bennett-perry, patricia',
  'benoit, marie-france',
  'berton, jr, jack',
  'bickel-scribner, susan',
  'bigel-casher, rita',
  'brown, jr, nelson',
  'burgess-strauss, marie',
  'cannamela, jr, r. arthu',
  'cooper-baker, rebecca',
  "d'amato, ellen",
  "d'amico, james",
  "d'litzenberger, richard",
  "d'souza, lino",
  'day, jo-ann',
  "del'monte, ishmael",
  "dell'osso, john",
  'dickson, jo-beth',
  'emmett, mary-lou',
  'fill in',
  'fu, tsun-sen',
  'goggin, m. gregory',
  'goodrich-baird, sarah',
  'hamilton-diesel, stepha',
  'harmon-hanna, e',
  'ho, sun-o',
  'housman jr, w.glover',
  'jean-jacques, palau',
  'kay-wolff, judy',
  'ketchum, jr, william',
  "l'ecuyer, nicolas",
  'lacombe-ingham, sherry',
  'lamothe, jean-guy',
  'lee, kang-won',
  'levine, md, paul',
  'liao, jin-ming',
  'litman-cohen, katherine',
  'lo, ai-tai',
  'luo, luen-jyh',
  'madison-jammal, sam',
  'maja, -',
  'maloney, k.',
  'mares

In [10]:
# read file containing acbl api bearer secret.
bearer_file = pathlib.Path('acbl_api_authorization_bearer_secret.txt')
with open(bearer_file,'rb') as f:
    bearer = f.read()

In [11]:
# takes 60s per 40 names
# call acbl api to retrieve acbl_number given first_name, last_name.
import requests
import urllib
import time
headers = {'accept':'application/json', 'Authorization':bearer[len('Authorization: '):]}
unmatched_names = []
for nn,n in enumerate(missing_player_ids):
    if n in acbl_player_name_to_id_dict and len(acbl_player_name_to_id_dict[n]):
        #print('found:',n,acbl_player_name_to_id_dict[n])
        continue
    split = n.split(', ')
    if len(split) != 2:
        print('wrong number of commas:',n)
        unmatched_names.append(n)
        continue
    lastname = split[0].split(' ')[0]
    if not lastname.isalnum() or '-' in lastname or '+' in lastname or '.' in lastname or ',' in lastname:
        print('invalid characters in lastname:',lastname,n)
        unmatched_names.append(n)
        continue
    firstname = split[1].split(' ')[0]
    if not firstname.isalnum() or '-' in firstname or '+' in firstname or '.' in firstname or ',' in firstname:
        print('invalid characters in firstname:',firstname,n)
        unmatched_names.append(n)
        continue
    url = 'https://api.acbl.org/v1/player_query'
    query = {'first_name':firstname,'last_name':lastname}
    params = urllib.parse.urlencode(query)
    time.sleep(0.5) # throttle api calling
    response = requests.get(url, params=params, headers=headers)
    assert response.status_code == 200, (n, url, params, response.status_code)
    json_response = response.json()
    founds = []
    for data in json_response['data']:
        if firstname.casefold() == data['first_name'].casefold() and lastname.casefold() == data['last_name'].casefold():
            founds += [(n,data['acbl_number'],data['city'])]
    if len(founds) == 1:
        print('found one exact match:',n)
    else:
        assert type(data['acbl_number']) is str
        founds = [(data['last_name']+', '+data['first_name'],data['acbl_number'],data['city']) for data in json_response['data']]
        unmatched_names.append(n)
    print(nn,len(missing_player_ids),n,founds)
    acbl_player_name_to_id_dict[n] = [n[1] for n in founds]

invalid characters in lastname: o'rourke o'rourke, lou ann
invalid characters in firstname: marie-france benoit, marie-france
invalid characters in lastname: bennett-perry bennett-perry, patricia
invalid characters in firstname: j.p. snyder, j.p.
invalid characters in firstname: m. goggin, m. gregory
invalid characters in lastname: harmon-hanna harmon-hanna, e
invalid characters in firstname: c. nettles, c. leland
invalid characters in lastname: o'connell o'connell, james
invalid characters in lastname: d'souza d'souza, lino
invalid characters in lastname: litman-cohen litman-cohen, katherine
invalid characters in firstname: e.r. mckinney, e.r. 'bud'
wrong number of commas: rudikoff, m.d., jeffrey
invalid characters in firstname: jean-guy lamothe, jean-guy
invalid characters in firstname: d. welton, d. scott
invalid characters in firstname: sun-o ho, sun-o
invalid characters in lastname: dell'osso dell'osso, john
invalid characters in firstname: jo-ann day, jo-ann
invalid characters in

In [12]:
# use same dict for both player_id->name lookup and name->[player_id] lookup
# key can be either a name or a player_id. value can be either a name or a list of player_ids.
d = {}
name_differences = []
for k,v in acbl_player_name_to_id_dict.items():
    assert type(k) is str, k
    if type(v) is list:
        # v is a list of player_ids. make sure every player_id can retieve its name.
        for pid in v:
            if type(pid) is int:
                pid = str(pid)
            assert type(pid) is str, pid
            if pid in acbl_player_name_to_id_dict:
                if acbl_player_name_to_id_dict[pid] != k:
                    # different names are using the same player_id. what to do?
                    name_differences.append([pid,[k,acbl_player_name_to_id_dict[pid]]])
            else:
                if pid in d:
                    assert d[pid] == k, [d[pid],k]
                else:
                    d[pid] = k
    else:
        assert type(k) is str, k
print('new player_ids to add to dict:',len(d))
acbl_player_name_to_id_dict.update(d)

new player_ids to add to dict: 0


In [13]:
# todo: resolve name differences by making acbl api call to convert player_id to name? Better to keep as is?
print('Name differences:')
for pid,names in name_differences:
    print(pid, names)

Name differences:
1083023 ['Moore, Tom', 'Moore, Loren']
7402872 ['Brantley, Theresa', 'Brantley, Theresa Ann']
6497594 ['Langland, Richard', 'lang, richard']
7749635 ['Walker, Jim', 'Walker, James']
1889443 ['Depner, Garnet', 'De Merchant, Garnet']
7006837 ['Panayotatos, Daniele-Favre', 'Panayotatos, Daniele']
6697151 ['Hendricks, Bob', 'Hendricks, Robert']
1448854 ['Black, Tom', 'Black, Thomas']
3105741 ['Jones, Hal', 'Jones Jr, Hal']
3629538 ['Rosenstock, Norm', 'Rosenstock, Norman']
2376318 ['Scott, Candy', 'Scott, Candace']
1718967 ['Scott, Patsy', 'scott, patricia']
3174603 ['Elson, Jane', 'Elson, Marjorie J']
6094481 ['Kirk, Pat', 'Kirk, Patricia']
5244781 ['Hirshon, Ken', 'Hirshon, Kenneth']
8114978 ['Laslie, MaryPat', 'Laslie, Mary Pat']
3429598 ['Dennis, Leona', 'De Grandpre, Leona']
2050153 ['Andrews, Mike', 'Andrews, Michael']
9999310 ['Sorenson, Eric', 'Sorenson, Steven']
9746129 ['Harker, Bill', 'Harker, William']
4989988 ['Templeton, Max', 'templeton, maxie']
4187458 ['Y

9824170 ['Deyerle, Richard', 'Deyerle, Rick']
5162246 ['Atteberry, Bill', 'Atteberry, William']
7344295 ['Loehnen, Elizabeth', 'loehnen, beth']
5552052 ['Kirsheman, Bill', 'kirsheman, william']
9977120 ['Smith, Sonia', 'Smith, Sonja']
3104338 ['Siegel, Ed', 'siegel, edward']
7489641 ['Griffin, James G', 'Griffin, James']
9163255 ['Blackewll, Patricia', 'blackwell, patricia']
5011639 ['Bourgoin, Bern', 'bourgoin, bernadette']
3900363 ['Slater, Jeff', 'slater, j']
7937431 ['Sanders, Bill', 'Sanders, Billy']
4397533 ['Mikulas, Bill', 'mikulas, william']
7765886 ['Clutterbuck, Tim', 'Clutterbuck, Timothy']
4322487 ['Skawronski, Mark', 'skowronski, mark']
9764372 ['Roby, Katherine', 'roby, kathy']
6498159 ['Hinkley, Bud', 'Hinckley, Bud']
8534365 ['Treiber, Steve', 'Treiber, Steven']
8891621 ['Robie, Jerry', 'Robie, Gerald']
6987478 ['Roman, Ed', 'roman, edmund']
8051321 ['Dubrau, Ken', 'Dubrau, Kenneth']
6853447 ['Dumbacher, Pat', 'dumbacher, patricia']
6645682 ['VanLierop, Gerry', 'Vanlie

In [14]:
# takes 20s
# write out dicts of matched and unmatched name/player_ids in pkl and excel formats.
player_id_name_matches_file = acblPath.joinpath('player_id_name_matches_dict.pkl')
pd.to_pickle(acbl_player_name_to_id_dict, player_id_name_matches_file) # create dict file
player_id_name_matches_df = pd.DataFrame({'Name':acbl_player_name_to_id_dict.keys(),'player_id':acbl_player_name_to_id_dict.values()})
player_id_name_matches_df.to_excel(player_id_name_matches_file.with_suffix('.xlsx')) # create excel file of matches
player_id_name_unmatches_file = acblPath.joinpath('player_id_name_unmatches_dict.pkl')
player_id_name_unmatches_df = pd.DataFrame({'Name':unmatched_names,'player_id':[None]*len(unmatched_names)})
player_id_name_unmatches_df.to_pickle(player_id_name_unmatches_file) # don't bother creating an unmatched df?
player_id_name_unmatches_df.to_excel(player_id_name_unmatches_file.with_suffix('.xlsx')) # create excel file of unmatches
len(missing_player_ids),len(player_id_name_matches_df),len(player_id_name_unmatches_df),len(acbl_player_name_to_id_dict)

(76, 285309, 76, 285309)

In [15]:
# takes 6s
# for each df, assign any missing player_ids using newly updated dict.
# Some player_ids will be missing because acbl API retrival is flawed e.g. api rejects names containing ' .,+-' etc.
for k,df in dfsd.items():
    print(k,df.columns,k in df.columns)
    if 'player_id' in df:
        df['player_id'] = df.apply(lambda r: r['player_id'] if r['player_id'] else acbl_player_name_to_id_dict[r['Name']],axis='columns')
        display(k,len(df),'Players missing player_id:',df[df['player_id'].map(lambda pid: len(pid) == 0)])
        df_file = bprPath.joinpath(k+'.pkl')
        df.to_pickle(df_file) # create df file
        df.to_excel(df_file.with_suffix('.xlsx')) # create excel file
    elif 'pair_ids' in df:
        df['pair_ids'] = df.apply(lambda r: [[str(pid) for pid in lpid] if lpid else acbl_player_name_to_id_dict[name] for lpid,name in zip(r['pair_ids'],r['Names'])],axis='columns')
        display(k,len(df),'Pairs missing a player_id:',df[df['pair_ids'].map(lambda llpid: any([len(lpid)==0 for lpid in llpid]))])
        df_file = bprPath.joinpath(k+'.pkl')
        df.to_pickle(df_file) # create df file
        df.to_excel(df_file.with_suffix('.xlsx')) # create excel file
    else:
        assert False
    display(df.head())

allmps Index(['Rank', 'MP's', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False


'allmps'

5000

'Players missing player_id:'

Unnamed: 0,Rank,MP's,Unit,Name,player_id
198,199,17664.6,354,"O'Rourke, Lou Ann",[]
255,256,15961.75,446,"Ho, Sun-O",[]
470,471,12557.4,151,"L'Ecuyer, Nicolas",[]
526,527,12166.49,373,"Del'Monte, Ishmael",[]
846,847,10054.72,999,"Jean-Jacques, Palau",[]
1331,1332,8231.9,218,"Lo, Ai-Tai",[]
1531,1532,7653.56,166,"D'Souza, Lino",[]
1725,1726,7160.18,373,"Kay-Wolff, Judy",[]
2613,2614,5708.77,253,"Cannamela, Jr, R. Arthu",[]
2838,2839,5453.64,207,"O'Donnell, Kevin",[]


Unnamed: 0,Rank,MP's,Unit,Name,player_id
0,1,92862.58,128,"Meckstroth, Jeff",[4580699]
1,2,87910.58,176,"Passell, Mike",[1622382]
2,3,78168.46,128,"Rodwell, Eric",[5482658]
3,4,76525.13,174,"Wold, Eddie",[3443949]
4,5,71817.34,210,"Lair, Mark",[5830400]


allpr Index(['Rank', 'PR', 'Games', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False


'allpr'

4853

'Players missing player_id:'

Unnamed: 0,Rank,PR,Games,Unit,Name,player_id
160,165,65.02,124,539,"Madison-Jammal, Sam",[]
683,694,61.89,34,253,"Cannamela, Jr, R. Arthu",[]
929,946,61.07,27,140,Fill In,[]
1054,1071,60.71,22,153,"Sundaram, K.V.K.",[]
1055,1072,60.71,18,999,"Lee, Kang-Won",[]
1252,1270,60.27,100,246,"Weisz-Margules, Adam",[]
1303,1321,60.15,69,128,"Goggin, M. Gregory",[]
1533,1555,59.65,89,128,"CALDEN, Gene",[]
1720,1743,59.31,41,140,Fill In,[]
1751,1774,59.26,39,102,"Litman-Cohen, Katherine",[]


Unnamed: 0,Rank,PR,Games,Unit,Name,player_id
0,1,72.09,137,108,"Grossack, Zachary",[2250896]
1,2,71.27,60,174,"Wold, Eddie",[3443949]
2,3,71.14,86,373,"Shi, Sylvia",[5420105]
3,4,71.02,201,538,"Itabashi, Mark",[6811434]
4,5,71.01,164,128,"Cappelletti, Mike",[2601087]


d9rank Index(['Rank', 'PR', 'Games', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False


'd9rank'

3099

'Players missing player_id:'

Unnamed: 0,Rank,PR,Games,Unit,Name,player_id
207,209,60.15,69,128,"Goggin, M. Gregory",[]
250,252,59.65,89,128,"CALDEN, Gene",[]
285,287,59.26,39,102,"Litman-Cohen, Katherine",[]
468,473,58.0,75,240,"CLEVELAND, Bernice",[]
597,604,57.12,64,128,"Sellers, Jr, Daniel",[]
847,856,55.9,30,102,"O'Connor, Timothy",[]
905,915,55.63,117,128,"Bruckman, David",[]
911,921,55.6,63,243,"Maloney, K.",[]
1056,1066,54.98,62,128,"Ketchum, Jr, William",[]
1247,1258,54.25,135,128,"LEVINE, Jerry",[]


Unnamed: 0,Rank,PR,Games,Unit,Name,player_id
0,1,71.01,164,128,"Cappelletti, Mike",[2601087]
1,2,70.76,47,128,"Meckstroth, Jeff",[4580699]
2,3,69.59,69,128,"Stamatov, Jerry",[9936327]
3,4,69.13,50,128,"Rodwell, Eric",[5482658]
4,5,68.38,66,243,"Pavlicek, Richard",[3565408]


improv6 Index(['Rank', 'last/PR', 'old/PR', 'PR/inc', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False


'improv6'

4944

'Players missing player_id:'

Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name,player_id
85,87,60.23,56.88,3.35,153,"Sundaram, K.V.K.",[]
102,104,53.34,50.07,3.27,351,"Maresso-Paco, Frank",[]
385,390,58.20,55.71,2.49,108,"Swan, Jr., James",[]
463,470,50.77,48.40,2.37,525,"EMAM, Amin",[]
546,557,44.37,42.11,2.25,128,"KENZER, Ricky",[]
572,584,50.33,48.11,2.22,243,"FERTIK, Lil",[]
622,635,50.99,48.83,2.16,454,"HAAS, George",[]
629,642,51.22,49.07,2.15,114,"Housman Jr, W.Glover",[]
684,698,46.99,44.89,2.10,452,"Lacombe-Ingham, Sherry",[]
723,738,45.43,43.36,2.07,151,"SHEFFREN, Hannah",[]


Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name,player_id
0,1,55.48,49.81,5.67,114,"Cone, Cynthia",[4928725]
1,2,50.5,44.9,5.6,243,"Chance, Doris",[5571790]
2,3,61.08,55.65,5.43,128,"Land, Jeanne",[1249495]
3,4,40.47,35.14,5.34,159,"Warach, Jane",[8043620]
4,5,56.37,51.06,5.31,128,"Baniewicz, Jeanne",[9442677]


improv12 Index(['Rank', 'last/PR', 'old/PR', 'PR/inc', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False


'improv12'

4950

'Players missing player_id:'

Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name,player_id
20,21,56.62,51.63,4.99,160,"PRITCHARD, Carolyn",[]
64,65,46.99,42.80,4.19,452,"Lacombe-Ingham, Sherry",[]
226,228,50.33,46.92,3.41,243,"FERTIK, Lil",[]
358,361,47.37,44.27,3.10,128,"REDDY, Bhaskar",[]
472,476,58.20,55.31,2.89,108,"Swan, Jr., James",[]
536,541,53.34,50.55,2.79,351,"Maresso-Paco, Frank",[]
677,687,57.68,55.08,2.60,530,"Maja, -",[]
718,728,52.78,50.22,2.56,243,"BENNETT, Sophia",[]
734,744,51.22,48.68,2.54,114,"Housman Jr, W.Glover",[]
774,785,60.23,57.75,2.48,153,"Sundaram, K.V.K.",[]


Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name,player_id
0,1,50.77,43.24,7.52,189,"Fitzpatrick, Bonnie",[3194043]
1,2,47.76,40.89,6.87,373,"Koziol, Malgorzata",[1441426]
2,3,56.37,49.53,6.84,128,"Baniewicz, Jeanne",[9442677]
3,4,49.91,43.3,6.61,452,"Krasovic, Valarie",[5511623]
4,5,53.21,46.7,6.51,140,"Armus, Nadine",[1974718]


improv24 Index(['Rank', 'last/PR', 'old/PR', 'PR/inc', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False


'improv24'

245

'Players missing player_id:'

Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name,player_id


Unnamed: 0,Rank,last/PR,old/PR,PR/inc,Unit,Name,player_id
0,1,63.74,57.3,6.45,503,"Kemper, Carol",[4331699]
1,2,68.27,61.88,6.39,119,"Ayyagari, Murthy",[9146962]
2,3,55.95,49.73,6.21,196,"Drabek, Yan",[5172713]
3,4,60.36,54.2,6.17,425,"Schmahl, Michael",[4934148]
4,5,66.46,60.92,5.54,132,"Kopper, David",[2212501]


underate Index(['Rank', 'UR', 'PR/MP's', 'MP's', 'PR', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False


'underate'

4953

'Players missing player_id:'

Unnamed: 0,Rank,UR,PR/MP's,MP's,PR,Unit,Name,player_id
67,68,11.34,9769,551,60.71,153,"Sundaram, K.V.K.",[]
113,114,10.25,2801,151,55.13,126,"Rudikoff, M.D., Jeffrey",[]
114,115,10.24,39378,2613,65.02,539,"Madison-Jammal, Sam",[]
231,232,9.13,9769,1067,60.71,999,"Lee, Kang-Won",[]
366,370,8.38,1838,154,53.33,533,"Layne Carr, Suzie",[]
392,396,8.24,7285,850,59.26,102,"Litman-Cohen, Katherine",[]
704,710,7.0,1176,149,51.86,452,"O'Hara, Susan",[]
991,998,6.32,3330,581,55.9,102,"O'Connor, Timothy",[]
1471,1481,5.45,8804,2566,60.15,128,"Goggin, M. Gregory",[]
1505,1516,5.39,7131,2074,59.17,106,"Dell'osso, John",[]


Unnamed: 0,Rank,UR,PR/MP's,MP's,PR,Unit,Name,player_id
0,1,19.01,7231,1,59.22,128,"Olster, Evan","[tmp:1bd69b97-671c-498b-9b13-554a2926a285, tmp:6637f30e-5942-4a56-87fb-7b8fc7f60e71, tmp:2ddfbbf..."
1,2,18.8,6908,2,59.03,246,"Rosin, Leah","[tmp:1c0dd554-02b5-4c03-931c-287c14ed2c79, tmp:617f2476-72a2-4dd1-85b1-20a9347f7beb, tmp:aaca79b..."
2,3,17.7,8321,42,59.86,128,"Lieser, John",[4240464]
3,4,17.57,9528,69,60.57,473,"Grijalva, Rolando","[7928378, tmp:dd9921da-821c-40bb-9e35-1cc85b8db6c3, tmp:80eda9d1-a123-467b-ada3-8931c969e76b]"
4,5,17.39,5095,9,57.97,431,"Kelly, Ken","[tmp:90dfb096-2e5f-495c-8804-eb1c6406843f, tmp:b3e7843c-01f9-4f87-97c1-91358940371c, tmp:940f616..."


urpairs Index(['Rank', 'UR', 'PR/MP's', 'MP's', 'PairR', 'Unit', 'Names', 'pair_ids'], dtype='object', name=0) False


'urpairs'

4930

'Pairs missing a player_id:'

Unnamed: 0,Rank,UR,PR/MP's,MP's,PairR,Unit,Names,pair_ids
0,1,18.80,37191,146,61.68,999/999,"[Robinson, Claire, Bakhshi, Heather]","[[6025366], []]"
1,2,16.20,37598,404,62.06,999/999,"[Sherman, David, Capal, Tracy]","[[], []]"
5,6,14.83,12999,218,58.77,999/999,"[Shapochnik, Gersh, Yadadov, Rami]","[[], []]"
6,7,14.35,7608,84,56.48,150/150,"[Woodard, Wendell, Cummings, William]","[[], [9934987]]"
8,9,13.42,53307,624,59.75,999/506,"[Guven, Metin, Bichara, Rita]","[[], [7703252]]"
10,11,13.15,83234,3281,65.44,999/446,"[King, Phillip, Castner, Kevin]","[[], []]"
12,13,12.45,43802,1336,61.22,999/999,"[Bird, Nigel, Senior, Brian]","[[], []]"
17,18,11.73,70333,2973,64.38,141/190,"[Foerster, Thomas, Marlow, Christopher]","[[4938828], []]"
18,19,11.60,5831,197,55.35,354/354,"[Gyde, Rita, Gyde, Steven]","[[7772866], []]"
21,22,11.36,58265,1432,58.35,115/115,"[Harding, Charles, Rizzi, Tom]","[[6690149], []]"


Unnamed: 0,Rank,UR,PR/MP's,MP's,PairR,Unit,Names,pair_ids
0,1,18.8,37191,146,61.68,999/999,"[Robinson, Claire, Bakhshi, Heather]","[[6025366], []]"
1,2,16.2,37598,404,62.06,999/999,"[Sherman, David, Capal, Tracy]","[[], []]"
2,3,15.69,8152,41,56.8,533/533,"[Hu, Xiaodan, Wang, Jun]","[[8237328, tmp:5b2e409e-03b4-44dd-afcf-def8afc94803, tmp:75660f98-02a8-42a8-a5be-2d5a8d4b1a03, t..."
3,4,15.52,7353,28,56.32,446/446,"[Walsh, John, Rasmussen, Pete]","[[2235897], [2235978]]"
4,5,15.46,77953,1854,64.47,155/155,"[Questembert, Patrick, Tadmor, Ilan]","[[4016424], [8337500]]"


In [16]:
# do some validation
assert all([type(v) is list for v in df['pair_ids'].values])

In [17]:
# do some validation
for k,df in dfsd.items():
    print(k,df.columns,k in df.columns)
    if 'player_id' in df:
        for l in df['player_id'].values:
            assert type(l) is list, [type(l),l]
            for v in l:
                assert type(v) is str, v
    elif 'pair_ids' in df:
        for ll in df['pair_ids'].values:
            assert type(ll) is list, [type(ll),ll]
            for l in ll:
                assert type(l) is list, [type(l),l]
                for v in l:
                    assert type(v) is str, v
    else:
        assert False

allmps Index(['Rank', 'MP's', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False
allpr Index(['Rank', 'PR', 'Games', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False
d9rank Index(['Rank', 'PR', 'Games', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False
improv6 Index(['Rank', 'last/PR', 'old/PR', 'PR/inc', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False
improv12 Index(['Rank', 'last/PR', 'old/PR', 'PR/inc', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False
improv24 Index(['Rank', 'last/PR', 'old/PR', 'PR/inc', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False
underate Index(['Rank', 'UR', 'PR/MP's', 'MP's', 'PR', 'Unit', 'Name', 'player_id'], dtype='object', name=0) False
urpairs Index(['Rank', 'UR', 'PR/MP's', 'MP's', 'PairR', 'Unit', 'Names', 'pair_ids'], dtype='object', name=0) False
