In [2]:
# takes 30m to 10h for 10m files. Highly variable depending on caching of lin files.
# not working in jupyterlab but ok in vscode?
# be sure to pip install -U -r requirements.txt # in particular xlsxwriter must be installed otherwise there may be a silent failure.
# requires 150gb of memory.

# requires outputs to be cleared before posting to github due to 250MB size.
# if git fails due to large size, rollback one git commit using: git reset --soft HEAD~1

# reads and parses BBO .lin files. Lin files are a record of bids and card plays for a single board and single table of a BBO online match.
# use BBO-DownLoader to download *.lin files
# parses all *.lin files in a directory.
# outputs a .py file of one tuple per per bidding sequences (id,previous bids,candidate bid, announcement, pandas eval expr).

# next steps:
# acbl_club_results_hand_records_bidding_BBO.ipynb augments hand records with BBO bidding sequences.

# previous steps:
# BBO-Downloader.py downloads BBO's .lin files based on date range.

# todo:
# make Parse_BBO_Lin_File() return values for dealer, hands, vul
# validate bidding sequences for consistency in criteria e.g. HCP, overlap with other bids. Produce coverage heat maps.
# chart player vs field
# process error_files to understand disconnect/withdrawl rates of players.
# double check each bids criteria by comparing to actual use. create mean and stdev (BBO's HCP, SL, QT, ...) for each bid.
#    ... See which bids fall outside criteria or are missing crucial criteria. e.g. 4N opener.
# Compare bidding sequences to actual PAR/SD looking for bad result outliers. Use info to correct or optimize bids.
# get additional info from tourney*.html (.3%), traveler*.html (3%) (*.lin 97%)

In [3]:
import pandas as pd
import pathlib
import pickle
import re
from collections import defaultdict
from IPython.display import display # needed to define display() method in vscode
import time
import datetime
import sys

#!pip install xlsxwriter

In [4]:
rootPath = pathlib.Path('e:/bridge/data')
bboPath = rootPath.joinpath('bbo')
dataPath = bboPath.joinpath('data')
# create parent directories in case they don't already exist.
dataPath.mkdir(parents=True, exist_ok=True)

In [5]:
# takes 1m
lin_wildcard = '*/*.lin'
lin_files = list(dataPath.glob(lin_wildcard))
len(lin_files)

9898008

In [6]:
# parse bbo lin files.
# todo: return values for dealer, hands, vul

# using walrus operator (:=) so must use python >= 3.8

# todo: implement vulnerability. bids may differ according to vulnerability.

# Lin files are often malformed because of disconnects or withdrawls

def Parse_BBO_Lin_File(lin):
    bids = None
    board = None
    cards = None
    dealer = None
    hands = None
    username = None
    vul = None
    i = 0
    #print(f"{lin=}")
    if not lin.startswith('pn'):
        print('Player name marker missing')
        return i, username, board, dealer, vul, hands, bids, cards
    parts = lin.replace('\n','').split('|')
    #print(f"{parts=}")
    i = 0
    if i >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i]=='pn', parts[i] # player marker
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    players = parts[i].split(',')
    assert len(players) == 4, parts[i]
    username = players[0] # should robotnames (players[1:]) be discarded?
    #print(f"{username=} {players}")
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i] == 'st', parts[i] # start?
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i] == '', parts[i] # expecting empy
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i] == 'md', parts[i] # hands
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    dealer = parts[i][0] # 1=south, 2=west, 3=north, 4=east
    assert dealer in '1234', dealer
    hands = parts[i][1:].split(',') # 4th hand is '' to minimize BBO's disk space
    assert len(hands) == 4, parts[i]
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i] == 'rh' # unknown
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i] == '' # expecting empty
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i] == 'ah', parts[i]
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    board_parts = parts[i].split(' ')
    assert len(board_parts) == 2 and board_parts[0] == 'Board', parts[1]
    board = board_parts[1]
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i] == 'sv', parts[i] # unknown
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    vul = parts[i] # o=none, n=north-south, e=east-west, b=both
    assert vul in 'oneb', vul
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    if len(parts) < i+2:
        return i, username, board, dealer, vul, hands, bids, cards
    bids = []
    passes = 0
    while parts[i] == 'mb': # make bid
        #print(f"1: {parts[i]=}")
        if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
        bid = parts[i]
        #print(f"2: {parts[i]=}")
        announcement = ''
        if bid == 'p' or bid == 'p!':
            passes += 1
        else:
            passes = 0
        if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
        #print(f"3: {parts[i]=}")
        # rarely there's an unannounced bid e.g 1C in 3268628079-1676148841-linkay01.lin
        if parts[i] == 'an': # announcement
            if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
            announcement = parts[i]
            assert isinstance(announcement,str), announcement
            announcement = str(announcement).strip() # could have leading/trailing spaces or be mistaken for numeric type
            #print(f"5: {parts[i]=} {i} {len(parts)}")
            if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
        #print(f"6: {parts[i]=} {i} {len(parts)}")
        if parts[i] != 'mb' and parts[i] != 'pg':
            return i, username, board, dealer, vul, hands, bids, cards
        bids.append((bid, announcement))
    if passes == 4:
        assert len(bids) == 4
        return i, username, board, dealer, vul, hands, bids, cards # passed out so it's ok that no cards are played
    assert passes == 3 and len(bids) > 3, f"{passes=} {len(bids)=} {bids=}"
    #print(f"{bids=}")
    cards = {}
    for trick in range(13):
        assert parts[i] == 'pg', parts[i] # play card
        if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
        if parts[i] == 'mc': # member claimed
            return -2, username, board, dealer, vul, hands, bids, cards
        assert parts[i] == '' # expecting empty
        if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
        trick += 1
        for pcn in range(4):
            if parts[i] == 'mc': # member claimed
                return -2, username, board, dealer, vul, hands, bids, cards
            if parts[i] != 'pc': # malformed
                return i, username, board, dealer, vul, hands, bids, cards
            if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
            card = parts[i] # todo: parse card to validate
            if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
            assert card not in cards
            pcn += 1
            cards[card] = (card, trick, pcn)
    #print(f"{cards}")
    assert len(cards) == 52, len(cards) # todo: show which cards are missing
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i] == ''
    if (i := i+1) >= len(parts): return i, username, board, dealer, vul, hands, bids, cards
    assert parts[i] == ''
    assert i+1 == len(parts), [i,len(parts)]
    return -1, username, board, dealer, vul, hands, bids, cards

In [7]:
# takes 20m to 35m
# previously parsed lin files are cached in bbo_parsed_lin_files. use it if exists.
bbo_parsed_lin_files_filename = 'bbo_parsed_lin_files.pkl'
bbo_parsed_lin_files_file = dataPath.joinpath(bbo_parsed_lin_files_filename)
if bbo_parsed_lin_files_file.exists():
    with open(bbo_parsed_lin_files_file, 'rb') as f:
        files_processed, bidding_table, error_files, bid_table, final_contracts, announcements = pickle.load(f)
else:
    files_processed = {} # initialize here for re-runing
    bidding_table = {} # (previous bids, current bid, announcement)
    error_files = {} # invalid files
    bid_table = {} # individual bids
    final_contracts = {} # final contract
    announcements = defaultdict(list) # announcements (alerts)

In [None]:
# takes 30m or 4h or 9h depending on bbo_parsed_lin_files and file caching. 5m files (150000 files per minute).
# Read a glob of lin files, parse, create various dicts (bids, errors, final contracts, announcements).

# todo: switch to defaultdict

# Make4thHand adds the missing 4th hand which isn't present to minimize BBO's disk space. adds < 1m
def Make4thHand(hands):
    #print(hands)
    if hands is not None:
        #hands[3] = ''
        assert len(hands) == 4 and all(len(h)==13+4 for h in hands[0:3]) and hands[3] == '', hands
        suits = ['']*4
        for h in hands[0:3]:
            mg = re.match(r'^S(.*)H(.*)D(.*)C(.*)$',h)
            assert mg is not None, mg
            for i,cards in enumerate(mg.groups(0)):
                #print(i,cards)
                suits[i] += cards
        #print(suits)
        #hands[3] = ''.join(['SHDC'[i]+''.join(set(list('23456789TJQKA'))-set(list(suits[i]))) for i,s in enumerate(suits)])
        for i,suit in enumerate('SHDC'):
            hands[3] += suit
            for c in '23456789TJQKA':
                #print(c,suits[i])
                if c not in suits[i]:
                    hands[3] += c
        #print(hands[3])
        assert len(hands[3]) == 13+4,hands[3]
    return hands

for i,lin_file in enumerate(lin_files):
    #print(f"\n{i}/{len(lin_files)}: Reading:{lin_file.name}",end='')
    if lin_file in files_processed:
        error, username, board, dealer, vul, hands, bids, cards = files_processed[lin_file]
    else:
        with open(lin_file, 'r', encoding='utf8') as f:
            lin = f.read()
        #print(f" len:{len(lin)}",end='')
        #if (i % 200) == 0: time.sleep(.1) # needed in jupyter notebook to slow down output
        Parse_BBO_Lin_File(lin)
        error, username, board, dealer, vul, hands, bids, cards = Parse_BBO_Lin_File(lin)
        files_processed[lin_file] = (error, username, board, dealer, vul, Make4thHand(hands), bids, cards)
    if error == -1:
        #print(f" Played out. {username=}",end='') # {bids=} {cards=}")
        pass
    elif error == -2:
        #print(f" Successful claim. {username=}",end='') # {bids=} {cards=}")
        pass
    else:
        print(f"{i}/{len(lin_files)}: Invalid file (disconnect or withdrawn). {error=}")
        if username is not None:
            if username not in error_files:
                error_files[username] = []
            error_files[username].append(lin_file)
        continue
        
    # iterate through hand's bidding history
    prev_bids = [] # previous bids
    last_bid = None # last bid made
    for bid, announcement in bids:
        #print(f"\n{prev_bids=} {bid=} {announcement=}")
        prev_bid = (tuple(prev_bids),(bid,))
        assert isinstance(prev_bid,tuple) and isinstance(prev_bid[0],tuple) and isinstance(prev_bid[1],tuple), prev_bid
        if prev_bid not in bidding_table:
            bidding_table[prev_bid] = []
        bidding_table[prev_bid].append((announcement,lin_file))
        prev_bids.append(bid)
        announcements[announcement].append((prev_bids,bid,lin_file))
    # validate some more. make some handy collections.
    passes = 0 # number of consecutive passes
    d = False # double
    r = False # redouble
    last_bid = None
    for bid in prev_bids:
        if bid == 'p' or bid == 'p!':
            passes += 1
            assert passes < 4 or (passes == 4 and len(prev_bids) == 4)
        else:
            assert passes < 3 or (passes == 3 and len(prev_bids) >= 4)
            passes = 0
            if bid[0] == 'd': # could be d or d!
                assert last_bid is not None
                assert not d and not r
                d = True
                r = False
            elif bid[0] == 'r': # could be r or r!
                assert last_bid is not None
                assert d and not r
                d = False
                r = True
            else: # all others
                last_bid = bid
                d = False
                r = False
                if last_bid not in bid_table:
                    bid_table[last_bid] = []
                bid_table[last_bid].append((d,r,lin_file))
    assert passes == 3 or (passes == 4 and len(prev_bids) == 4)
    assert passes == 4 or last_bid is not None
    if last_bid not in final_contracts:
        final_contracts[last_bid] = []
    final_contracts[last_bid].append((d,r,lin_file))

In [None]:
# takes 25s
len(bidding_table), bidding_table

In [None]:
# takes 10m
bbo_parsed_lin_files_filename = 'bbo_parsed_lin_files.pkl'
bbo_parsed_lin_files_file = dataPath.joinpath(bbo_parsed_lin_files_filename)
with open(bbo_parsed_lin_files_file, 'wb') as f:
    pickle.dump([files_processed, bidding_table, error_files, bid_table, final_contracts, announcements],f)

In [8]:
print(len(announcements))
if False: # disabled because takes 10+m
    for k,v in sorted(announcements.items()):
        print(f'"{k}" {len(v)}')
        time.sleep(.0001) # slow down output to avoid "too much, too fast" warning
        assert isinstance(k,str), k
        assert '"' not in k

314739


In [9]:
# takes 24m for 10m sequences
# creates corrected bidding and announcements tables. Remove wrong/obsolete announcements keeping one and only one.
# todo: fix legacy filename

corrected_bidding_table = {}
corrected_announcements = defaultdict(list)
obsolete_announcements = {} # bids which have been obsoleted by corrected bids
with open('announcement_conflicts.txt','w',encoding='utf8') as f:
    date_time_str = datetime.datetime.now().strftime("%d-%b-%Y %H:%M:%S")
    f.write('\n')
    f.write(f"List of conflicting annnouncements. Automatically created on {date_time_str} by bbo_parse_lin_files.ipynb.\n")
    f.write('\n')
    for i,(k,v) in enumerate(bidding_table.items()):
        #print(i,k,v)
        # escape special characters
        #sorted_announcements = sorted((lin_file,an) for an,lin_file in v) # keep newest announcement per id in filename. Could use epoch instead.
        #keeper_lin_file, keeper_an = sorted_announcements[-1]
        sorted_announcements = sorted((len(an),an,lin_file) for an,lin_file in v) # keep longest announcement per id in filename. Could use epoch instead.
        keeper_an_len, keeper_an, keeper_lin_file = sorted_announcements[-1]
        corrected_announcements[keeper_an].append(keeper_lin_file)
        corrected_bidding_table[k] = (keeper_an,keeper_lin_file)
        for an_len,an,lin_file in sorted_announcements[:-1]:
            #if an != keeper_an:
            #    if len(an) > len(keeper_an):
            #        print('length discrepency:',len(an),len(keeper_an))
            if an != keeper_an and an not in obsolete_announcements: # capture only first instance of obs an
                    print(f"\n{i}/{len(bidding_table)}: Conflicting announcements: bidding sequence: {' '.join(k[0])} {k[1][0]}?")
                    f.write(f"\n{i}/{len(bidding_table)}: Conflicting announcements: bidding sequence: {' '.join(k[0])} {k[1][0]}?\n")
                    print(f"   Keeping:'{keeper_an}' file:{keeper_lin_file.name}")
                    f.write(f"   Keeping:'{keeper_an}' file:{keeper_lin_file.name}\n")
                    print(f"  Obsolete:'{an}' file:{lin_file.name}")
                    f.write(f"  Obsolete:'{an}' file:{lin_file.name}\n")
                    obsolete_announcements[an] = (an,lin_file,keeper_an,keeper_lin_file)

In [None]:
# dict of bidding table
print(corrected_bidding_table[((),('1N',))])
# dict of corrected announcements, key is corrected announcement, value is lin file containing correction
print('\n',corrected_announcements['notrump opener. Could have 5M. -- 2-5 !C; 2-5 !D; 2-5 !H; 2-5 !S; 15-17 HCP; 18- total points'])
# dict of obsolete_announcements, key is obsolete announcement, value is (obs an,obs lin file, corrected an, corrected lin file).
print('\n',obsolete_announcements['notrump opener. Could have 5M. -- 2-5 !C; 2-5 !D; 2-5 !H; 2-5 !S; 15-'])

In [None]:
corrected_bidding_table

In [None]:
corrected_announcements

In [None]:
pd.DataFrame.from_dict(obsolete_announcements,orient='index',columns=['Obsolete Announcement','Obsolete Lin File','Corrected Announcement','Corrected Lin File'])

In [None]:
# takes 3s
# attempt to find truncated announcements and replace with full description
truncated_bids = {}
for k in corrected_bidding_table.keys():
    #print(k)
    if (k[0],(k[1][0]+'!',)) in corrected_bidding_table:
        print()
        print('dup!')
        k1 = k
        b1 = corrected_bidding_table[k1][0]
        k2 = (k[0],(k[1][0]+'!',))
        b2 = corrected_bidding_table[k2][0]
        if b1 == b2:
            print('both announcements are same except for candidate bid:',b1)
            if k1[0] != k2[0]:
                print('also differ in previouis bids:',k1[0],k2[0])
            continue
        if len(b1) <= len(b2):
            if b2.startswith(b1):
                print('b1 is truncated:')
                truncated_bids[k1] = corrected_bidding_table[k2]
            else:
                print('NOT truncated') 
            print(f"b1: {b1} # {k1}")
            print(f"b2: {b2} # {k2}")
        else:
            if b1.startswith(b2):
                print('b2 is truncated:')
                truncated_bids[k2] = corrected_bidding_table[k1]
            else:
                print('NOT truncated') 
            print(f"b2: {b2} # {k2}")
            print(f"b1: {b1} # {k1}")

# attempt to fix truncated announcement so pandas eval expr will be correct
for k,v in truncated_bids.items():
    corrected_bidding_table[k] = v
    corrected_announcements[v[0]] = v[1]

In [None]:
# interesting algorithm. given a list of possibly truncated words, return the list of non-truncated words.
# unable to describe to chatgpt or bard. maybe it just can't do it.

# function comments automatically generated by chatgpt from no-comment form. It can rename variables too.

def create_list_of_non_trucations(expr_keywords, kw):
    """
    Modifies a list of word and truncations. Discovers the word and discards any truncations.

    Args:
        expr_keywords (list): The list of existing words or expressions.
        kw (str): The new word or expression to be added or checked for truncations.

    Returns:
        None: The function modifies the input list in-place by removing truncations or appending the new word.
    """
    for k in expr_keywords.copy():  # Iterate over a copy of the expr_keywords list
        if k.startswith(kw):  # If new word matches the start of another word in the list, no action needed
            return  # Return to exit the function
        if kw.startswith(k):  # If an existing word in the list matches the start of the new word
            expr_keywords.remove(k)  # Remove the existing word from the list
            expr_keywords.append(kw)  # Add the new word to the list
            return  # Return to exit the function
    expr_keywords.append(kw)  # Add the new word to the list if no truncation is found


In [None]:
# takes 30s for 300,000 corrected announcements
# must be case-insensitive

sub_expr_parts = defaultdict(list)
expr_keywords = []
truncations =  []
sub_expr_parts_list = []
corrected_announcement_parts = {}

print('corrected_announcements:',len(corrected_announcements))
for i,an in enumerate(corrected_announcements):
    print(f"\n{i}: {an=}")
    corrected_announcement_parts[an] = (None, None, None)
    if an == '':
        continue
    an_parts = an.split('--')
    if len(an_parts) == 1:
        comment = None
        expr_list = an_parts[0].strip()
    elif len(an_parts) == 2:
        comment = an_parts[0].strip()
        expr_list = an_parts[1].strip()
    else:
        print('Invalid announcement:',an)
        continue
    print(f"{comment=}")
    print(f"{expr_list=}")
    expr_parts = expr_list.split(';') # expression delimiter. all expressions are logicals AND meaning they must all be True.
    # some expressions contain a comma. That's probably a BBO bug. We're treating a ',' as a ';'
    expr_parts = sum([e.split(',') for e in expr_parts],[]) # sum() will flatten a list of lists
    corrected_announcement_parts[an] = (comment,expr_list,expr_parts)
    for sub_expr in expr_parts:
        print(f"  {sub_expr=}")
        sub_expr_parts = sub_expr.split(' ')
        if '' in sub_expr_parts: # todo: how does this happen?
            sub_expr_parts.remove('')
        sub_expr_parts_list.append((sub_expr_parts,an))
        for se in sub_expr_parts:
            sep = se.strip()
            if sep == '':
                continue
            print(f"   {sep=}")
            if str.isalpha(sep):
                create_list_of_non_trucations(expr_keywords,sep)
                if sep not in truncations:
                    truncations.append(sep)

In [None]:
sorted(expr_keywords), sorted(truncations)

In [None]:
 # initialize with special cases to prevent undesired substitution
truncation_to_keywords = {
    #'points':'', # prevent total without points
    'st':'stop', # prevent st->strong as it seems to be mainly stop
    'stop':'stop', # prevent stop->stops
    'to':'to', # prevent to->total
    #'total':'total points', # prevent total without points
}

def create_dict_of_trunctions(expr_keywords,truncations,truncation_to_keywords):
    for t in truncations:
        if t not in truncation_to_keywords:
            for k in expr_keywords:
                if k.startswith(t):
                    break
            truncation_to_keywords[t] = k
        #assert False, f"{t} not in {expr_keywords}"
        
create_dict_of_trunctions(expr_keywords,truncations,truncation_to_keywords)
sorted(truncation_to_keywords)

In [None]:
# takes 20m or 300,000 bids/s
# Convert bidding announcememts to Pandas compatible eval() expressions.
# GIB jargon is well-suited for direct use by eval(). e.g. 12 <= HCP <= 21 & SL_H == 5 & Rebiddable==True

# GIB bidding descriptions are defined per https://www.bridgebase.com/doc/gib_descriptions.php
# GIB's HCP uses 4321 point count system
# GIB's Total Points = HCP + (short-suit points (123)) + (-1 for each short suit with HCP)

# RegEx notes:
# all bids must match one and only one of these regex. Otherwise a new regex is needed.
# GIB regex for suits: !([CDHS]) e.g. !C meaning club suit
# GIB regex for bids: \d[CDHSN] e.g. 1N meaning 1 no-trump

# Target column naming:
# SL_[CDHS] is suit length. e.g. SL_C is suit length of clubs
# C_[CDHS][AKQJT98765432]+ designates specific cards in a suit e.g C_HAK means Heart suit must include both Ace and King.

# todo: double check that all announcements without the words HCP or Total are implied HCP and not Total Points
def HCP_Exact(regex,match): # 10
    return f"HCP == {match.group(1)}"

def HCP_At_Most(regex,match): # 10-
    return f"HCP <= {match.group(1)}"

def HCP_At_Least(regex,match): # 10+
    return f"HCP >= {match.group(1)}"

def HCP_Between(regex,match): # 10-12
    return f"{match.group(1)} <= HCP <= {match.group(2)}"

def Suit_Length_Exact(regex,match): # 4 !C
    return f"SL_{match.group(2)} == {match.group(1)}"

def Suit_Length_At_Most(regex,match): # 4- !C
    return f"SL_{match.group(2)} <= {match.group(1)}"

def Suit_Length_At_Least(regex,match): # 4+ !C
    return f"SL_{match.group(2)} >= {match.group(1)}"

def Suit_Length_Between(regex,match): # 2-3 !C
    return f"{match.group(1)} <= SL_{match.group(3)} <= {match.group(2)}"

# todo: Eliminate Total_Points replacing with HCP and DP?
def Total_Points_Exact(regex,match): # 10 total points
    return f"Total_Points == {match.group(1)}"

def Total_Points_At_Most(regex,match): # 10- total points
    return f"Total_Points <= {match.group(1)}"

def Total_Points_At_Least(regex,match): # 10+ total points
    return f"Total_Points >= {match.group(1)}"

def Total_Points_Between(regex,match): # 10-12 total points
    return f"{match.group(1)} <= Total_Points <= {match.group(2)}"

def Cards_In_Suit(regex,match): # Q+ in !C
    return ' & '.join('C_'+match.group(2)+c+'==True' for c in match.group(1))

def Suit_Has_Cards(regex,match): # !CAK
    return ' & '.join('C_'+match.group(1)+c+'==True' for c in match.group(2))

def At_Best_Stopper_In_Suit(regex,match): # at best stop in !C
    return f"At_Best_Stopper_{match.group(1)}==True"

def At_Best_Partial_Stopper_In_Suit(regex,match): # at best partial stop in !C
    return f"At_Best_Partial_Stopper_{match.group(1)}==True"

def Biddable_Suit(regex,match): # biddable !C
    return f"Biddable_{match.group(1)}==True"

def Forcing_One_Round(regex,match): # forcing (one round)
    return None # f"Forcing_One_Round==True"

def Forcing_To(regex,match): # forcing to 3N
    return None # f"Forcing_To_{match.group(1)}==True"

def Likely_Stopper_In_Suit(regex,match): # likely stop in !C
    return f"Likely_Stopper_{match.group(1)}==True"

def No_Cards_In_Suit(regex,match): # no !C
    return f"SL_{match.group(1)} == 0"

def Cards_Not_In_Suit(regex,match): # no !CAKQ
    return ' & '.join('C_'+match.group(1)+c+'==False' for c in match.group(2)) # negated

def Opponents_Cannot_Play_Undoubled_Below_Bid(regex,match): # opponents cannot play undoubled below 2N
    return f"Opponents_Cannot_Play_Undoubled_Below_{match.group(1)}==True"

def Partial_Stopper_In_Suit(regex,match): # partial stop in !C
    return f"Partial_Stopper_{match.group(1)}==True"

def Rebiddable_Suit(regex,match): # twice rebiddable !C
    return f"Rebiddable_{match.group(1)}==True"

def Solid_SL_Suit(regex,match): # sold suit of n cards
    return f"Solid_{match.group(2)}==True & SL_{match.group(2)} >= {match.group(1)}" # todo: == or >=?

def Stopper_In_Suit(regex,match): # stop in !C
    return f"Stopper_{match.group(1)}==True"

def Strong_Rebiddable_Suit(regex,match): # strong rebiddable !C
    return f"Strong_Rebiddable_{match.group(1)}==True"

def Twice_Rebiddable_Suit(regex,match): # twice rebiddable !C
    return f"Twice_Rebiddable_{match.group(1)}==True"

def Two_Stoppers_In_Suit(regex,match): # two stops in !C
    return f"Two_Stoppers_{match.group(1)}==True"

# careful: regex must use longest form of sub_expr. e.g. HCP -> HCPs
expr_regex = [
    (r'^(\d+)$',HCP_Exact), # 10 todo: HCP or total points?
    (r'^(\d+)\-$',HCP_At_Most), # 10- todo: HCP or total points?
    (r'^(\d+)\+$',HCP_At_Least), # 10+ todo: HCP or total points?
    (r'^(\d+)\-(\d+)$',HCP_Between), # 10-12 todo: HCP or total points?
    (r'^(\d+) \!([CDHS])$',Suit_Length_Exact), # 4 !C
    (r'^(\d+)\- \!([CDHS])$',Suit_Length_At_Most), # 4- !C
    (r'^(\d+)\-card \!([CDHS])$',Suit_Length_Exact), # 4-card !C should be 4 !C
    (r'^(\d+)\+ \!([CDHS])$',Suit_Length_At_Least), # 4+ !C
    (r'^(\d+)\-(\d+) \!([CDHS])$',Suit_Length_Between), # 2-3 !C
    (r'^(\d+) HCPs$',HCP_Exact), # 10 HCP
    (r'^(\d+)\- HCPs$',HCP_At_Most), # 10- HCP
    (r'^(\d+)\+ HCPs$',HCP_At_Least), # 10+ HCP
    (r'^(\d+)\-(\d+) HCPs$',HCP_Between), # 10-12 HCP
    (r'^(\d+) (?:to|total)(?: points)?$',Total_Points_Exact), # 10 total points
    (r'^(\d+)\- (?:to|total)(?: points)?$',Total_Points_At_Most), # 10- total points
    (r'^(\d+)\+ (?:to|total)(?: points)?$',Total_Points_At_Least), # 10+ total points
    (r'^(\d+)-(\d+) (?:to|total)(?: points)?$',Total_Points_Between), # 10-12 total points
    (r'^([AKQ]+)\+ in \!([CDHS])$',Cards_In_Suit), # Q+ in !C
    (r'^\!([CDHS])([AKQ]+)$',Suit_Has_Cards), # !CAK
    (r'^at best stop in \!([CDHS])$',At_Best_Stopper_In_Suit), # partial stop in !C
    (r'^at best partial stop in \!([CDHS])$',At_Best_Partial_Stopper_In_Suit), # partial stop in !C
    (r'^biddable \!([CDHS])$',Biddable_Suit), # biddable !C
    (r'^forcing$',Forcing_One_Round), # forcing (one round)
    (r'^forcing to (\d[CDHSN])$',Forcing_To), # forcing to 3N
    (r'^likely stop in \!([CDHS])$',Likely_Stopper_In_Suit), # likely stop in !C
    (r'^no \!([CDHS])$',No_Cards_In_Suit), # no !C
    (r'^no \!([CDHS])([AKQ]+)$',Cards_Not_In_Suit), # no !CAKQ
    (r'^opponents cannot play undoubled below (\d[CDHSN])$',Opponents_Cannot_Play_Undoubled_Below_Bid), # opponents cannot play undoubled below 2!N
    (r'^partial stop(?: in)? \!([CDHS])$',Partial_Stopper_In_Suit), # partial stop in !C
    (r'^rebiddable \!([CDHS])$',Rebiddable_Suit), # twice rebiddable !C
    # 'No stoppers to bid' is superfluous. It's always accompanied by explict 'at best partial stop in' and 'forcing'.
    (r'^solid (\d)\-card \!([CDHS])$',Solid_SL_Suit), # sold suit of n cards
    (r'^stop in \!([CDHS])$',Stopper_In_Suit), # stop in !C
    (r'^strong rebiddable \!([CDHS])$',Strong_Rebiddable_Suit), # strong rebiddable !C
    (r'^twice rebiddable \!([CDHS])$',Twice_Rebiddable_Suit), # twice rebiddable !C
    (r'^two stops in !([CDHS])$',Two_Stoppers_In_Suit), # two stops in !C
]

expr_regex_matches = defaultdict(list)
unknown_sub_expr = defaultdict(list)
for i,(sub_expr_parts,an) in enumerate(sub_expr_parts_list):
    if i % 1000 == 0:
        print(f"{i}/{len(sub_expr_parts_list)} {sub_expr_parts=} {an=}")
    if sub_expr_parts == []:
        print('  Skipping empty list')
        continue
    sub_expr = ' '.join([truncation_to_keywords[se] if se in truncation_to_keywords else se for se in sub_expr_parts])
    #print(f"  {sub_expr=}")
    matches = []
    #time.sleep(.00001)
    for regex,f in expr_regex:
        #print(sub_expr,regex)
        mg = re.match(regex,sub_expr)
        if mg is not None:
            #print(f"{regex=} {mg=}")
            eval_expr = f(regex,mg)
            #print(eval_expr)
            if eval_expr is None: # todo: forcing ... not implemented.
                print('Unimplemented an:',an,' sub_expr:',sub_expr)
                continue
            matches.append((regex,mg.groups(1),eval_expr))
            #assert False, eval_expr
    if len(matches) == 0:
        print('Unmatched matches: an:',an,' matches:',matches)
        if an in obsolete_announcements:
            obs_an, obs_lin_file, obs_keeper_an, obs_keeper_lin_file = obsolete_announcements[an]
            if an in corrected_announcements:
                print('Announcement has been corrected:',obs_keeper_an,' file:',obs_keeper_lin_file)
        unknown_sub_expr[sub_expr].append(an)
        continue
    assert len(matches) == 1, ['Ambiguous sub_expr:',an,matches]
    expr_regex_matches[an].append(matches)

expr_regex_matches

In [None]:
# takes 3m
# show a list of unknown announcement sub-expressions errors
print('Unknown sub-expressions:',len(unknown_sub_expr))
rows = []
for k,v in sorted(unknown_sub_expr.items()):
    print(f"\n'{k}' occurences={len(v)}")
    for an in v:
        print(f"{an=}")
        for prev_bids,bid,lin_file in announcements[an]:
            print(f"    {prev_bids=} {bid=} '{an=} {lin_file}'")
            rows.append((prev_bids,bid,an,k,lin_file))

In [None]:
# takes 10s
# create python file containing a table of all bidding sequences, 2,500,000.
#bbo_bidding_sequences_table = [
#    (0, (), 'p', '', 'not implemented'),
#    (1, 'p', 'p', '', 'not implemented'),
#    (2, ('p', 'p'), '1D', 'Minor suit opening -- 3+ !D; 11-21 HCP; 12-22 total points', 'not implemented'),
# ...
# ]

with open('bbo_bidding_sequences_table.py', 'w', encoding='utf8') as f:
    date_time_str = datetime.datetime.now().strftime("%d-%b-%Y %H:%M:%S")
    f.write('\n')
    f.write("'''\n")
    f.write(f"# This python file was automatically created on {date_time_str}.\n")
    f.write('\n')
    f.write('Glossary. Unless explicitly stated, criteria applies to bidders hand.\n')
    f.write('HCP = High Card Points\n')
    f.write('QT = Quick Tricks\n')
    f.write('SL_[NESW] = Suit Length e.g SL_S is suit length in spades\n')
    f.write('Balanced = Balanced distribution\n')
    f.write('Vul = Vulnerability e.g. None, Us, Them, Both\n')
    f.write('\n')
    f.write('Shortcut Notations\n')
    f.write('{suit} is a shortcut notation for the bid suit. e.g For a bid of 1S: HCP_{suit} will become HCP_S\n')
    f.write('HCP_{suit} = HCP in bid suit\n')
    f.write('QT_{suit} = QT in bid suit\n')
    f.write('SL_{suit} = SL in bid suit\n')
    f.write('\n')
    f.write('Arithmetic Operators. Same as using a calculator.\n')
    f.write('+ is addition\n')
    f.write('- is subtraction\n')
    f.write('* is multiplication\n')
    f.write('/ is division\n')
    f.write('\n')
    f.write('Boolean Operators')
    f.write('Due to Pandas restrictions, boolean values must be written using a comparison operator. e.g. Balanced==True\n')
    f.write('\n')
    f.write('Logical Operators\n')
    f.write('== is compares equal\n')
    f.write('!= is compares not equal\n')
    f.write('other variations are <=, <, >, >=\n')
    f.write('& = logical and e.g. result is true if both left and right are true\n')
    f.write('| = logical or e.g. result is true if either left or right is true\n')
    f.write('^ = logical exclusive or (rare) e.g. result is true if either both left and right are same (both true or both false)\n')
    f.write('() groups expressions togther e.g for 1NT-2C: HCP >= 8 (SL_H >= 4 | SL_S >= 4)\n')
    f.write("'''\n")
    f.write('\n')
    f.write('# (Id, previous bids, candidate bid):(comment, eval_expr)\n')
    f.write('bbo_bidding_sequences_table = [\n')
    for i,(k,v) in enumerate(corrected_bidding_table.items()):
        # previous bids, candidate bid, announcement, pandas eval expr
        eval_expr = ' & '.join(e[2] for m in expr_regex_matches[v[0]] for e in m if e[2] is not None)
        announcement = v[0].replace("'","\\'")
        f.write(f"    ({i},{k[0]},{k[1]},'{announcement}','{eval_expr}'),\n")
    f.write(']\n')
 

In [None]:
error_files

In [None]:
bid_table

In [None]:
final_contracts

In [None]:
# takes 4m for bidding sequences/s
# if attribute error: pip install xlsxwriter

# error: 'Worksheet' object has no attribute 'set_column' due to missing: pip install xlsxwriter

# todo: expr_regex sheet

def Autosize_Column_Widths(writer, sheet_name, df, max_width=50):
    
    # Auto-adjust columns' width
    for column in df:
        column_width = max(df[column].astype(str).map(len).max(), len(column))
        column_width = min(column_width, max_width)
        col_idx = df.columns.get_loc(column)
        writer.sheets[sheet_name].set_column(col_idx, col_idx, column_width)

# output Excel file. Excel is limited to 1M rows total of all sheets.
with pd.ExcelWriter('BBO_Bidding_Sequences.xlsx') as writer:

    # lin files contain multiple bids so there may be 25% more bids than lin files.
    sheet_name = 'Bidding Sequences (Sampled)'
    cols = defaultdict(list)
    for i,(k,v) in enumerate(corrected_bidding_table.items()):
        criterias = ([None]*20)[:len(expr_regex_matches[v[0]])] if corrected_announcement_parts[v[0]][2] is None else corrected_announcement_parts[v[0]][2]
        cols['Id'].append(i)
        cols['Previous_Bids'].append(k[0])
        cols['Candidate_Bid'].append(k[1])
        cols['Announcement'].append(v[0])
        cols['Comment'].append(corrected_announcement_parts[v[0]][0])
        cols['Criteria'].append('')
        cols['RegEx'].append('')
        cols['Match_Values'].append('')
        eval_expr = ' & '.join('' if e[2] is None else e[2] for m in expr_regex_matches[v[0]] for e in m)
        #print(expr_regex_matches[v[0]])
        #print(eval_expr)
        #continue
        cols['Pandas_Eval_Expr'].append(eval_expr)
        for i,(criteria,matches) in enumerate(zip(criterias,expr_regex_matches[v[0]])):
            # Clear out cells when columns are redundant, 2nd+ rows, for better readability.
            cols['Id'].append('')
            cols['Previous_Bids'].append('')
            cols['Candidate_Bid'].append('')
            cols['Announcement'].append('')
            cols['Comment'].append('')
            cols['Criteria'].append(criteria)
            cols['RegEx'].append(matches[0][0])
            cols['Match_Values'].append(matches[0][1])
            cols['Pandas_Eval_Expr'].append(matches[0][2])

    max_rows = 100000
    df = pd.DataFrame(cols).sample(max_rows).sort_index()
    display(df)
    df.to_excel(writer, sheet_name, index=False, na_rep='NaN',freeze_panes=(1,0))
    Autosize_Column_Widths(writer,sheet_name,df)

    sheet_name = 'Lin Files (Sampled)' # sample 100000 rows
    df = pd.DataFrame(
        [[k,*v] for k,v in files_processed.items()],
        columns=['lin_file', 'error', 'username', 'board', 'dealer', 'vul', 'hands', 'bids', 'cards']).sample(100000)
    display(df)
    df.to_excel(writer, sheet_name, index=False, na_rep='NaN',freeze_panes=(1,0))
    Autosize_Column_Widths(writer,sheet_name,df)

    sheet_name = 'Announcement Errors (Sampled)' # sample 100000 rows
    df = pd.DataFrame([[', '.join(prev_bids),bid,an,k,files_processed[lin_file][1],lin_file.name] for prev_bids,bid,an,k,lin_file in rows],columns=['Previous Bids','Candidate Bid','Announcement','Invalid Part','Username','Source file']).sample(max_rows).sort_index()
    display(df)
    df.to_excel(writer, sheet_name, index=False, na_rep='NaN',freeze_panes=(1,0))
    Autosize_Column_Widths(writer,sheet_name,df)

    sheet_name = 'Obsolete Announcements List'
    df = pd.DataFrame.from_dict(obsolete_announcements,orient='index',columns=['Obsolete Announcement','Obsolete Lin File','Corrected Announcement','Corrected Lin File'])
    display(df)
    df.to_excel(writer, sheet_name, index=False, na_rep='NaN',freeze_panes=(1,0))
    Autosize_Column_Widths(writer,sheet_name,df)

    sheet_name = 'Expression Vocabulary'
    df = pd.DataFrame(expr_keywords,columns=[sheet_name])
    display(df)
    df.to_excel(writer, sheet_name, index=False, na_rep='NaN',freeze_panes=(1,0))
    Autosize_Column_Widths(writer,sheet_name,df)

    sheet_name = 'Usernames'
    df = pd.DataFrame({'Username':[v[1] for k,v in files_processed.items()]}).value_counts().reset_index() # reset allows column renaming
    df.columns = ['Username','Count']
    display(df)
    df.to_excel(writer, sheet_name, index=False, na_rep='NaN',freeze_panes=(1,0))
    Autosize_Column_Widths(writer,sheet_name,df)

    #writer.close() # saves and closes # not needed within with statement


In [None]:
l = df['Username'].tolist()
#for un in l:
print("['"+"','".join(ll for ll in l if not ll.startswith("~~"))+"']")

In [None]:
# output usernames to file. copy to bbo-downloader to update with latest collection of usernames.
with open('bbo_usernames.txt','w',encoding='utf8') as f:
    for un in df['Username'].tolist(): # output in count order as opposed to alphabetical.
        if un.startswith('~~'):
            continue
        f.write(un+'\n')