# Bulk Learn New Rules Per Step

Takes 2h30m/3h. 1.2TB memory/pagefile.

Discover high-correlation criteria ("New_Rules") for BT rows using actual auctions as ground truth.

**Goal**: For each (auction_prefix, next_bid) combination, find bitmap criteria that best separate:
- **Positives**: Deals where actual auction has this exact (prefix, next_bid)
- **Negatives**: Deals where auction has same prefix but DIFFERENT next_bid

**Output**: `E:/bridge/data/bbo/bidding/bbo_bt_new_rules.parquet` with `New_Rules: List(Utf8)` column


In [1]:
# Configuration
import gc
import time
import re
from pathlib import Path
from typing import Any, Dict, List, Tuple, Optional
from dataclasses import dataclass
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

import polars as pl
import numpy as np
from tqdm.auto import tqdm

program_start_time = time.time()
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

# Paths
DEALS_FILE = Path("E:/bridge/data/bbo/data/bbo_mldf_augmented.parquet")
BT_FILE = Path("E:/bridge/data/bbo/bidding/bbo_bt_seat1.parquet")
BITMAP_FILE = Path("E:/bridge/data/bbo/data/bbo_mldf_augmented_criteria_bitmaps.parquet")
OUTPUT_FILE = Path("E:/bridge/data/bbo/bidding/bbo_bt_new_rules.parquet")

# Column constants
AGG_EXPR_COLS = [f"Agg_Expr_Seat_{i}" for i in range(1, 5)]

# Processing parameters
MAX_DEALS = None  # Set to int for testing, None for all deals
MIN_POS_SAMPLES = 100  # Minimum positive samples to learn rules
MIN_SUPPORT = 0.10  # Minimum criterion support within positives
MIN_LIFT = 1.3  # Minimum lift to consider criterion valuable
TOP_K_CRITERIA = 6  # Max criteria to discover per step

DIRECTIONS = ["N", "E", "S", "W"]

print(f"Configuration:")
print(f"  MAX_DEALS: {MAX_DEALS or 'all'}")
print(f"  MIN_POS_SAMPLES: {MIN_POS_SAMPLES}")
print(f"  MIN_SUPPORT: {MIN_SUPPORT}")
print(f"  MIN_LIFT: {MIN_LIFT}")
print(f"  TOP_K_CRITERIA: {TOP_K_CRITERIA}")


2025-12-27 12:06:50
Configuration:
  MAX_DEALS: all
  MIN_POS_SAMPLES: 100
  MIN_SUPPORT: 0.1
  MIN_LIFT: 1.3
  TOP_K_CRITERIA: 6


## Phase 1: Load Data


In [2]:
# Helper: format elapsed time
def fmt_elapsed(seconds: float) -> str:
    if seconds < 60:
        return f"{seconds:.1f}s"
    elif seconds < 3600:
        return f"{seconds/60:.1f}m"
    else:
        return f"{seconds/3600:.1f}h"

def fmt_eta(done: int, total: int, elapsed: float) -> str:
    if done <= 0:
        return "?"
    rate = done / elapsed if elapsed > 0 else 0
    remaining = total - done
    eta_s = remaining / rate if rate > 0 else 0
    return fmt_elapsed(eta_s)


In [3]:
# takes 6m for 461M rows
# Load BT (for existing criteria and bt_index lookup)
print("Loading BT...")
t0 = time.perf_counter()

bt_cols = ["bt_index", "Auction", "seat", "Expr"] + AGG_EXPR_COLS
bt_schema = pl.scan_parquet(BT_FILE).collect_schema()
bt_cols_available = [c for c in bt_cols if c in bt_schema.names()]

bt_df = pl.scan_parquet(BT_FILE).select(bt_cols_available).collect()
bt_df = bt_df.with_columns(pl.col("Auction").cast(pl.Utf8).str.to_lowercase().alias("_auction_lower"))

print(f"  Loaded {bt_df.height:,} BT rows in {fmt_elapsed(time.perf_counter() - t0)}")
print(f"  Columns: {bt_df.columns}")


Loading BT...
  Loaded 461,681,310 BT rows in 6.1m
  Columns: ['bt_index', 'Auction', 'seat', 'Expr', 'Agg_Expr_Seat_1', 'Agg_Expr_Seat_2', 'Agg_Expr_Seat_3', 'Agg_Expr_Seat_4', '_auction_lower']


In [4]:
# takes 5s
#  Load deals
print("Loading deals...")
t0 = time.perf_counter()

deal_cols = ["index", "Dealer", "bid"]
deals_scan = pl.scan_parquet(DEALS_FILE).select(deal_cols)
if MAX_DEALS:
    deals_scan = deals_scan.head(MAX_DEALS)
deals_df = deals_scan.collect()

print(f"  Loaded {deals_df.height:,} deals in {fmt_elapsed(time.perf_counter() - t0)}")


Loading deals...
  Loaded 15,994,827 deals in 2.7s


In [5]:
# takes 18m
# Build candidate criteria pool from BT Agg_Expr columns
print("Building candidate criteria pool...")

# Conservative pattern: HCP, Total_Points, SL_* with comparison operators
CAND_RE = re.compile(r"^(HCP|Total_Points|SL_[SHDC])\s*(>=|<=|==|!=|>|<)\s*(\d+)\s*$")

@dataclass(frozen=True)
class CandidateCriterion:
    text: str  # e.g. "HCP >= 30", "SL_S >= 5"
    field: str  # "HCP" | "Total_Points" | "SL_S" etc
    op: str
    value: int

def parse_candidate(c: str) -> Optional[CandidateCriterion]:
    s = (c or "").strip()
    m = CAND_RE.match(s)
    if not m:
        return None
    field, op, val_s = m.group(1), m.group(2), m.group(3)
    try:
        v = int(val_s)
    except Exception:
        return None
    return CandidateCriterion(text=s, field=field, op=op, value=v)

def candidate_to_bitmap_col(crit: CandidateCriterion, bidder_dir: str) -> str:
    """Map directionless criterion to a directional bitmap parquet column name."""
    d = bidder_dir.upper()
    if crit.field in ("HCP", "Total_Points"):
        return f"DIR_{d}_{crit.field}_{d} {crit.op} {crit.value}"
    if crit.field.startswith("SL_"):
        suit = crit.field.split("_", 1)[1]  # S/H/D/C
        return f"DIR_{d}_SL_{d}_{suit} {crit.op} {crit.value}"
    return ""

# Extract all criteria strings from BT
cand_map: Dict[str, CandidateCriterion] = {}
for col in AGG_EXPR_COLS:
    if col not in bt_df.columns:
        continue
    try:
        vals = bt_df.select(pl.col(col).explode()).get_column(col).drop_nulls().unique().to_list()
    except Exception:
        continue
    for v in vals:
        if v is None:
            continue
        cc = parse_candidate(str(v))
        if cc is not None:
            cand_map[cc.text] = cc

candidates = list(cand_map.values())
print(f"  Found {len(candidates)} candidate criteria from BT Agg_Expr columns")


Building candidate criteria pool...
  Found 223 candidate criteria from BT Agg_Expr columns


In [6]:
# Load bitmap schema and filter candidates to those with bitmap columns
print("Loading bitmap schema...")
t0 = time.perf_counter()

bitmap_schema = pl.scan_parquet(BITMAP_FILE).collect_schema()
bitmap_cols_set = set(bitmap_schema.names())

# Filter candidates: keep only those with at least one direction having a bitmap column
valid_candidates: List[CandidateCriterion] = []
for c in candidates:
    for d in DIRECTIONS:
        col = candidate_to_bitmap_col(c, d)
        if col and col in bitmap_cols_set:
            valid_candidates.append(c)
            break

candidates = valid_candidates
print(f"  Filtered to {len(candidates)} candidates with bitmap columns")
print(f"  Sample candidates: {[c.text for c in candidates[:10]]}")


Loading bitmap schema...
  Filtered to 201 candidates with bitmap columns
  Sample candidates: ['Total_Points <= 3', 'SL_S >= 6', 'HCP >= 32', 'HCP >= 8', 'Total_Points >= 20', 'SL_H >= 6', 'Total_Points <= 29', 'HCP <= 23', 'Total_Points >= 15', 'HCP >= 12']


## Phase 2: Extract Auction Steps from Deals


In [7]:
def canon_token(t: Any) -> str:
    s = "" if t is None else str(t).strip()
    return s.lower()

def parse_bid_list(bids: Any) -> Tuple[int, List[str]]:
    """Parse bid column into (leading_passes, seat1_tokens)."""
    if bids is None:
        return 0, []
    if isinstance(bids, pl.Series):
        bids = bids.to_list()
    if not isinstance(bids, list):
        s = canon_token(bids)
        if not s:
            return 0, []
        tokens = [t for t in s.split("-") if t != ""]
    else:
        tokens = [canon_token(x) for x in bids if canon_token(x) != ""]
    
    lp = 0
    while lp < len(tokens) and tokens[lp] == "p":
        lp += 1
    seat1_tokens = tokens[lp:]
    return lp, seat1_tokens

def bidder_direction_for_token(dealer: str, leading_passes: int, token_idx_in_seat1: int) -> str:
    """Get direction of bidder making the token at position token_idx_in_seat1."""
    d = (dealer or "N").upper()
    dealer_i = DIRECTIONS.index(d) if d in DIRECTIONS else 0
    original_token_idx = leading_passes + token_idx_in_seat1
    return DIRECTIONS[(dealer_i + original_token_idx) % 4]

def seat_for_token(token_idx_in_seat1: int) -> int:
    """Get seat number (1-4) for token at position token_idx_in_seat1."""
    return (token_idx_in_seat1 % 4) + 1


In [8]:
# takes 2m15s
# Extract all (prefix, next_bid, seat, bidder_dir, deal_idx) tuples from deals
print("Extracting auction steps from deals...")
t0 = time.perf_counter()

# Pre-extract columns
deal_indices = deals_df.get_column("index").to_list()
dealers = deals_df.get_column("Dealer").to_list()
bids_col = deals_df.get_column("bid").to_list()

# Accumulate step data
step_data: List[Tuple[str, str, int, str, int]] = []  # (prefix, next_bid, seat, bidder_dir, deal_idx)

n_deals = len(deal_indices)
last_print = time.perf_counter()

for i in tqdm(range(n_deals), desc="Parsing deals", unit="deal"):
    dealer = dealers[i] or "N"
    lp, seat1_tokens = parse_bid_list(bids_col[i])
    
    if len(seat1_tokens) < 1:
        continue
    
    # Generate all step pairs for this deal
    for tok_idx in range(len(seat1_tokens)):
        prefix = "-".join(seat1_tokens[:tok_idx]) if tok_idx > 0 else ""
        next_bid = seat1_tokens[tok_idx]
        seat = seat_for_token(tok_idx)
        bidder_dir = bidder_direction_for_token(dealer, lp, tok_idx)
        step_data.append((prefix, next_bid, seat, bidder_dir, i))  # use row index i, not deal_idx

elapsed = time.perf_counter() - t0
print(f"  Extracted {len(step_data):,} step tuples from {n_deals:,} deals in {fmt_elapsed(elapsed)}")
print(f"  Rate: {len(step_data)/elapsed:,.0f} tuples/sec")

# Free deals_df memory - no longer needed
del deals_df, deal_indices, dealers, bids_col
gc.collect()


Extracting auction steps from deals...


Parsing deals:   0%|          | 0/15994827 [00:00<?, ?deal/s]

  Extracted 153,739,362 step tuples from 15,994,827 deals in 2.6m
  Rate: 1,002,305 tuples/sec


17

In [9]:
# takes 4m (builds BOTH steps_df AND prefix_index in single pass)
# This avoids double iteration over 153M items
print("Building step DataFrame and prefix index (single pass)...")
t0 = time.perf_counter()

# Single pass: build both lists for DataFrame AND prefix_index simultaneously
prefixes: List[str] = []
next_bids_list: List[str] = []
seats_list: List[int] = []
bidder_dirs_list: List[str] = []
row_idxs: List[int] = []

# Also build prefix_index during the same pass
prefix_index: Dict[str, Dict[str, Dict[str, List]]] = defaultdict(lambda: defaultdict(lambda: {"row_indices": [], "bidder_dirs": []}))

for s in tqdm(step_data, desc="Processing steps", unit="row"):
    p, nb, seat, bd, ridx = s
    # For DataFrame
    prefixes.append(p)
    next_bids_list.append(nb)
    seats_list.append(seat)
    bidder_dirs_list.append(bd)
    row_idxs.append(ridx)
    # For prefix_index
    prefix_index[p][nb]["row_indices"].append(ridx)
    prefix_index[p][nb]["bidder_dirs"].append(bd)

# Build DataFrame
steps_df = pl.DataFrame({
    "prefix": prefixes,
    "next_bid": next_bids_list,
    "seat": seats_list,
    "bidder_dir": bidder_dirs_list,
    "_row_idx": row_idxs,
})

# Convert prefix_index to regular dict for faster access
prefix_index = {k: dict(v) for k, v in prefix_index.items()}

# Free memory
del step_data, prefixes, next_bids_list, seats_list, bidder_dirs_list, row_idxs
gc.collect()

elapsed = time.perf_counter() - t0
print(f"  Built steps_df: {steps_df.height:,} rows")
print(f"  Built prefix_index: {len(prefix_index):,} prefixes, {sum(len(v) for v in prefix_index.values()):,} (prefix, next_bid) pairs")
print(f"  Time: {fmt_elapsed(elapsed)}")


Building step DataFrame and prefix index (single pass)...


Processing steps:   0%|          | 0/153739362 [00:00<?, ?row/s]

  Built steps_df: 153,739,362 rows
  Built prefix_index: 3,371,488 prefixes, 4,172,691 (prefix, next_bid) pairs
  Time: 4.1m


In [10]:
# SKIPPED: prefix_index now built in Cell 11 (single pass optimization)
# Verify it exists
print(f"prefix_index already built: {len(prefix_index):,} prefixes, {sum(len(v) for v in prefix_index.values()):,} (prefix, next_bid) pairs")


prefix_index already built: 3,371,488 prefixes, 4,172,691 (prefix, next_bid) pairs


## Phase 3: Identify Groups with Sufficient Data


In [11]:
# takes 5s
# Group by (prefix, next_bid) and count
print("Grouping by (prefix, next_bid)...")
t0 = time.perf_counter()

group_counts = (
    steps_df
    .group_by(["prefix", "next_bid"])
    .agg([
        pl.len().alias("pos_count"),
        pl.col("seat").first().alias("seat"),  # seat is deterministic from prefix length
    ])
    .sort("pos_count", descending=True)
)

print(f"  Found {group_counts.height:,} unique (prefix, next_bid) groups in {fmt_elapsed(time.perf_counter() - t0)}")
print(f"  Top 20 groups by count:")
print(group_counts.head(20))


Grouping by (prefix, next_bid)...
  Found 4,172,691 unique (prefix, next_bid) groups in 4.6s
  Top 20 groups by count:
shape: (20, 4)
┌─────────┬──────────┬───────────┬──────┐
│ prefix  ┆ next_bid ┆ pos_count ┆ seat │
│ ---     ┆ ---      ┆ ---       ┆ ---  │
│ str     ┆ str      ┆ u64       ┆ i64  │
╞═════════╪══════════╪═══════════╪══════╡
│         ┆ 1d       ┆ 3507593   ┆ 1    │
│         ┆ 1c       ┆ 3446023   ┆ 1    │
│         ┆ 1n       ┆ 2518162   ┆ 1    │
│         ┆ 1s       ┆ 2497723   ┆ 1    │
│         ┆ 1h       ┆ 2363106   ┆ 1    │
│ …       ┆ …        ┆ …         ┆ …    │
│ 1d-p-1h ┆ p        ┆ 607804    ┆ 4    │
│ 1c-p    ┆ 1s       ┆ 590588    ┆ 3    │
│ 1n-p    ┆ 2c       ┆ 590415    ┆ 3    │
│ 1n-p-2c ┆ p        ┆ 567114    ┆ 4    │
│ 1n-p    ┆ p        ┆ 560160    ┆ 3    │
└─────────┴──────────┴───────────┴──────┘


In [12]:
# takes 10s
# For each prefix, compute total deals (for negative set calculation)
print("Computing prefix totals for negative sets...")
t0 = time.perf_counter()

prefix_totals = (
    steps_df
    .group_by("prefix")
    .agg(pl.len().alias("prefix_total"))
)

# Join to get neg_count = prefix_total - pos_count
group_counts = (
    group_counts
    .join(prefix_totals, on="prefix", how="left")
    .with_columns(
        (pl.col("prefix_total") - pl.col("pos_count")).alias("neg_count")
    )
)

print(f"  Computed in {fmt_elapsed(time.perf_counter() - t0)}")
print(group_counts.head(10))

# Free steps_df memory - no longer needed (prefix_index has all we need for scoring)
del steps_df
gc.collect()
print("  Freed steps_df memory")


Computing prefix totals for negative sets...
  Computed in 3.8s
shape: (10, 6)
┌────────┬──────────┬───────────┬──────┬──────────────┬───────────┐
│ prefix ┆ next_bid ┆ pos_count ┆ seat ┆ prefix_total ┆ neg_count │
│ ---    ┆ ---      ┆ ---       ┆ ---  ┆ ---          ┆ ---       │
│ str    ┆ str      ┆ u64       ┆ i64  ┆ u64          ┆ u64       │
╞════════╪══════════╪═══════════╪══════╪══════════════╪═══════════╡
│        ┆ 1d       ┆ 3507593   ┆ 1    ┆ 15994827     ┆ 12487234  │
│        ┆ 1c       ┆ 3446023   ┆ 1    ┆ 15994827     ┆ 12548804  │
│        ┆ 1n       ┆ 2518162   ┆ 1    ┆ 15994827     ┆ 13476665  │
│        ┆ 1s       ┆ 2497723   ┆ 1    ┆ 15994827     ┆ 13497104  │
│        ┆ 1h       ┆ 2363106   ┆ 1    ┆ 15994827     ┆ 13631721  │
│ 1n     ┆ p        ┆ 2152534   ┆ 2    ┆ 2517958      ┆ 365424    │
│ 1d     ┆ p        ┆ 2096452   ┆ 2    ┆ 3505293      ┆ 1408841   │
│ 1c     ┆ p        ┆ 1905720   ┆ 2    ┆ 3443818      ┆ 1538098   │
│ 1s     ┆ p        ┆ 1808043   ┆ 2  

In [13]:
# Filter to groups with sufficient positive and negative samples
learnable_groups = group_counts.filter(
    (pl.col("pos_count") >= MIN_POS_SAMPLES) &
    (pl.col("neg_count") > 0)
).sort("pos_count", descending=True)

print(f"Groups with >= {MIN_POS_SAMPLES} positives and > 0 negatives: {learnable_groups.height:,}")
print(f"Top 10 learnable groups:")
print(learnable_groups.head(10))


Groups with >= 100 positives and > 0 negatives: 66,927
Top 10 learnable groups:
shape: (10, 6)
┌────────┬──────────┬───────────┬──────┬──────────────┬───────────┐
│ prefix ┆ next_bid ┆ pos_count ┆ seat ┆ prefix_total ┆ neg_count │
│ ---    ┆ ---      ┆ ---       ┆ ---  ┆ ---          ┆ ---       │
│ str    ┆ str      ┆ u64       ┆ i64  ┆ u64          ┆ u64       │
╞════════╪══════════╪═══════════╪══════╪══════════════╪═══════════╡
│        ┆ 1d       ┆ 3507593   ┆ 1    ┆ 15994827     ┆ 12487234  │
│        ┆ 1c       ┆ 3446023   ┆ 1    ┆ 15994827     ┆ 12548804  │
│        ┆ 1n       ┆ 2518162   ┆ 1    ┆ 15994827     ┆ 13476665  │
│        ┆ 1s       ┆ 2497723   ┆ 1    ┆ 15994827     ┆ 13497104  │
│        ┆ 1h       ┆ 2363106   ┆ 1    ┆ 15994827     ┆ 13631721  │
│ 1n     ┆ p        ┆ 2152534   ┆ 2    ┆ 2517958      ┆ 365424    │
│ 1d     ┆ p        ┆ 2096452   ┆ 2    ┆ 3505293      ┆ 1408841   │
│ 1c     ┆ p        ┆ 1905720   ┆ 2    ┆ 3443818      ┆ 1538098   │
│ 1s     ┆ p        ┆

## Phase 4: Load Bitmap Data


In [14]:
# Determine which bitmap columns we need
print("Determining required bitmap columns...")

required_bitmap_cols = set()
for c in candidates:
    for d in DIRECTIONS:
        col = candidate_to_bitmap_col(c, d)
        if col and col in bitmap_cols_set:
            required_bitmap_cols.add(col)

required_bitmap_cols = sorted(required_bitmap_cols)
print(f"  Need {len(required_bitmap_cols)} bitmap columns")


Determining required bitmap columns...
  Need 804 bitmap columns


In [15]:
# takes 15s
#  Load bitmap data (only required columns)
print(f"Loading bitmap data ({len(required_bitmap_cols)} columns)...")
t0 = time.perf_counter()

bitmap_scan = pl.scan_parquet(BITMAP_FILE).select(required_bitmap_cols)
if MAX_DEALS:
    bitmap_scan = bitmap_scan.head(MAX_DEALS)
bitmap_df = bitmap_scan.collect()

print(f"  Loaded bitmap_df: {bitmap_df.shape} in {fmt_elapsed(time.perf_counter() - t0)}")

# CRITICAL OPTIMIZATION: Convert to numpy arrays for fast indexing
# Polars .gather() is extremely slow when called millions of times
print("Converting bitmap columns to numpy arrays...")
t0 = time.perf_counter()

bitmap_arrays: Dict[str, np.ndarray] = {}
for col in tqdm(bitmap_df.columns, desc="Converting to numpy", unit="col"):
    bitmap_arrays[col] = bitmap_df[col].to_numpy()

print(f"  Converted {len(bitmap_arrays)} columns to numpy in {fmt_elapsed(time.perf_counter() - t0)}")

# Free Polars DataFrame memory - we only need numpy arrays now
del bitmap_df
gc.collect()

# Build column lookup: (criterion_text, direction) -> column name
col_lookup: Dict[Tuple[str, str], str] = {}
for c in candidates:
    for d in DIRECTIONS:
        col = candidate_to_bitmap_col(c, d)
        if col and col in bitmap_arrays:
            col_lookup[(c.text, d)] = col

print(f"  Built lookup with {len(col_lookup)} (criterion, direction) -> column mappings")


Loading bitmap data (804 columns)...
  Loaded bitmap_df: (15994827, 804) in 0.8s
Converting bitmap columns to numpy arrays...


Converting to numpy:   0%|          | 0/804 [00:00<?, ?col/s]

  Converted 804 columns to numpy in 9.1s
  Built lookup with 804 (criterion, direction) -> column mappings


## Phase 5: Compute Lift Scores


In [16]:
def compute_lift(pos_rate: float, neg_rate: float) -> float:
    """Compute lift = P(criterion | positive) / P(criterion | negative)."""
    if neg_rate <= 0:
        return float("inf") if pos_rate > 0 else 1.0
    return pos_rate / neg_rate

def score_criteria_for_group_fast(
    prefix: str,
    next_bid: str,
    prefix_index: Dict[str, Dict[str, Dict[str, List]]],
    bitmap_arrays: Dict[str, np.ndarray],
    candidates: List[CandidateCriterion],
    col_lookup: Dict[Tuple[str, str], str],
) -> List[Tuple[str, float, float, float]]:
    """
    Score all candidate criteria for a (prefix, next_bid) group.
    Uses prefix_index for O(1) lookups and numpy arrays for fast bitmap access.
    
    Returns list of (criterion_text, lift, pos_rate, neg_rate) sorted by lift descending.
    """
    # O(1) lookup from pre-built index
    prefix_data = prefix_index.get(prefix)
    if not prefix_data:
        return []
    
    pos_data = prefix_data.get(next_bid)
    if not pos_data:
        return []
    
    pos_row_indices = pos_data["row_indices"]
    pos_bidder_dirs = pos_data["bidder_dirs"]
    
    if not pos_row_indices:
        return []
    
    # Collect negative data (all other next_bids for this prefix)
    neg_row_indices: List[int] = []
    neg_bidder_dirs: List[str] = []
    for nb, data in prefix_data.items():
        if nb != next_bid:
            neg_row_indices.extend(data["row_indices"])
            neg_bidder_dirs.extend(data["bidder_dirs"])
    
    if not neg_row_indices:
        return []
    
    # Group by bidder direction for efficient bitmap lookup
    # Convert to numpy arrays for faster indexing
    pos_by_dir: Dict[str, np.ndarray] = {}
    neg_by_dir: Dict[str, np.ndarray] = {}
    
    for d in DIRECTIONS:
        pos_by_dir[d] = np.array([idx for idx, dir in zip(pos_row_indices, pos_bidder_dirs) if dir == d], dtype=np.int64)
        neg_by_dir[d] = np.array([idx for idx, dir in zip(neg_row_indices, neg_bidder_dirs) if dir == d], dtype=np.int64)
    
    results: List[Tuple[str, float, float, float]] = []
    
    for c in candidates:
        pos_true = 0
        pos_tot = 0
        neg_true = 0
        neg_tot = 0
        
        for d in DIRECTIONS:
            col = col_lookup.get((c.text, d))
            if not col:
                continue
            
            arr = bitmap_arrays[col]
            pos_idx = pos_by_dir[d]
            neg_idx = neg_by_dir[d]
            
            if len(pos_idx) > 0:
                pos_true += arr[pos_idx].sum()
                pos_tot += len(pos_idx)
            
            if len(neg_idx) > 0:
                neg_true += arr[neg_idx].sum()
                neg_tot += len(neg_idx)
        
        if pos_tot <= 0 or neg_tot <= 0:
            continue
        
        pos_rate = pos_true / pos_tot
        neg_rate = neg_true / neg_tot
        
        # Filter by minimum support
        if pos_rate < MIN_SUPPORT:
            continue
        
        lift = compute_lift(pos_rate, neg_rate)
        
        # Filter by minimum lift
        if lift < MIN_LIFT:
            continue
        
        results.append((c.text, lift, pos_rate, neg_rate))
    
    # Sort by lift descending, then by pos_rate descending
    results.sort(key=lambda x: (x[1], x[2]), reverse=True)
    return results[:TOP_K_CRITERIA]


In [None]:
# takes 32m
# Process all learnable groups (FAST version using prefix_index)
print(f"Processing {learnable_groups.height:,} learnable groups...")

# learnable_groups = learnable_groups.head(100)  # Uncomment for testing

t0 = time.perf_counter()

results: List[Dict[str, Any]] = []

group_rows = learnable_groups.to_dicts()

for i, g in enumerate(tqdm(group_rows, desc="Scoring groups", unit="group")):
    prefix = g["prefix"]
    next_bid = g["next_bid"]
    seat = g["seat"]
    pos_count = g["pos_count"]
    neg_count = g["neg_count"]
    
    # Score criteria using FAST O(1) lookup + numpy version
    scored = score_criteria_for_group_fast(
        prefix, next_bid,
        prefix_index, bitmap_arrays, candidates, col_lookup
    )
    
    if not scored:
        continue
    
    # Build step auction string
    step_auction = f"{prefix}-{next_bid}" if prefix else next_bid
    
    # Extract discovered criteria (names only)
    discovered = [s[0] for s in scored]
    
    # Build criteria_with_metrics: list of dicts with full metrics for each criterion
    # scored contains: (criterion_text, lift, pos_rate, neg_rate)
    criteria_with_metrics = []
    for s in scored:
        lift_val = s[1]
        # Handle infinity - store as None for JSON compatibility
        if lift_val == float("inf"):
            lift_val = None
        criteria_with_metrics.append({
            "criterion": s[0],
            "lift": lift_val,
            "pos_rate": round(s[2], 4),
            "neg_rate": round(s[3], 4),
        })
    
    # Handle infinity in top_lift (use None for inf, Polars handles None properly)
    raw_lift = scored[0][1] if scored else None
    top_lift = None if raw_lift is None or raw_lift == float("inf") else float(raw_lift)
    
    results.append({
        "prefix": prefix,
        "next_bid": next_bid,
        "step_auction": step_auction,
        "seat": seat,
        "pos_count": pos_count,
        "neg_count": neg_count,
        "discovered_criteria": discovered,
        "criteria_with_metrics": criteria_with_metrics,
        "top_lift": top_lift,
    })

elapsed = time.perf_counter() - t0
print(f"  Processed {len(group_rows):,} groups in {fmt_elapsed(elapsed)}")
print(f"  Found {len(results):,} groups with discovered criteria")
print(f"  Rate: {len(group_rows)/elapsed:.1f} groups/sec")


Processing 66,927 learnable groups...


Scoring groups:   0%|          | 0/66927 [00:00<?, ?group/s]

  Processed 66,927 groups in 32.6m
  Found 66,814 groups with discovered criteria
  Rate: 34.2 groups/sec


In [18]:
# Preview results
print("Preview of discovered criteria:")
results_df = pl.DataFrame(results)
print(results_df.select(["step_auction", "seat", "pos_count", "neg_count", "discovered_criteria", "top_lift"]).head(20))

# Show criteria_with_metrics for first few rows
print("\nCriteria with metrics (first 5 rows):")
for i, row in enumerate(results[:5]):
    print(f"\n{row['step_auction']} (seat {row['seat']}):")
    for m in row["criteria_with_metrics"]:
        lift_str = f"{m['lift']:.2f}" if m['lift'] is not None else "inf"
        print(f"  {m['criterion']:20s}  lift={lift_str:>6s}  pos_rate={m['pos_rate']:.2%}  neg_rate={m['neg_rate']:.2%}")


Preview of discovered criteria:
shape: (20, 6)
┌──────────────┬──────┬───────────┬───────────┬─────────────────────────────────┬───────────┐
│ step_auction ┆ seat ┆ pos_count ┆ neg_count ┆ discovered_criteria             ┆ top_lift  │
│ ---          ┆ ---  ┆ ---       ┆ ---       ┆ ---                             ┆ ---       │
│ str          ┆ i64  ┆ i64       ┆ i64       ┆ list[str]                       ┆ f64       │
╞══════════════╪══════╪═══════════╪═══════════╪═════════════════════════════════╪═══════════╡
│ 1d           ┆ 1    ┆ 3507593   ┆ 12487234  ┆ ["SL_D >= 6", "SL_D >= 5", … "… ┆ 8.140321  │
│ 1c           ┆ 1    ┆ 3446023   ┆ 12548804  ┆ ["SL_C >= 6", "SL_C >= 5", … "… ┆ 15.379386 │
│ 1n           ┆ 1    ┆ 2518162   ┆ 13476665  ┆ ["HCP >= 15", "HCP >= 16", … "… ┆ 2.641696  │
│ 1s           ┆ 1    ┆ 2497723   ┆ 13497104  ┆ ["SL_S >= 5", "SL_S >= 6", … "… ┆ 22.25641  │
│ 1h           ┆ 1    ┆ 2363106   ┆ 13631721  ┆ ["SL_H >= 5", "SL_H >= 6", … "… ┆ 18.684015 │
│ …          

## Phase 6: Merge with BT and Build New_Rules


In [None]:
# takes 100m
# OPTIMIZED: Use Polars join instead of Python dict (461M iter_rows would take days!)
# Just need: _auction_lower -> bt_index + Agg_Expr columns
# BT has multiple rows per auction (different seats), so we must dedupe to avoid Cartesian explosion
print("Preparing BT for join...")
t0 = time.perf_counter()

# Select only columns we need, then dedupe by auction to get ONE row per unique auction
# Use group_by().first() to get deterministic results (keeps first occurrence)
bt_for_join = (
    bt_df
    .select(["_auction_lower", "bt_index"] + AGG_EXPR_COLS)
#    .unique(subset=["_auction_lower"]) # todo: benchmark this alternative.
    .group_by("_auction_lower")
    .first()
)

# Free bt_df memory - no longer needed
del bt_df
gc.collect()

print(f"  BT for join: {bt_for_join.height:,} rows (deduped by auction) in {fmt_elapsed(time.perf_counter() - t0)}")
print("  Freed bt_df memory")


Preparing BT for join...
  BT for join: 461,681,310 rows (deduped by auction) in 1.6h
  Freed bt_df memory


In [None]:
# takes 2m
# FULLY VECTORIZED: Use Polars expressions instead of Python loops
print("Building New_Rules using vectorized operations...")
t0 = time.perf_counter()

# Create DataFrame from results
results_df = pl.DataFrame(results)
print(f"  Results: {results_df.height:,} rows")

# Add lowercase step_auction for join
results_df = results_df.with_columns(
    pl.col("step_auction").str.to_lowercase().alias("_step_lower")
)

# Join with BT to get bt_index and Agg_Expr columns
joined = results_df.join(
    bt_for_join,
    left_on="_step_lower",
    right_on="_auction_lower",
    how="left"
)

print(f"  Joined: {joined.height:,} rows, BT matches: {joined['bt_index'].drop_nulls().len():,}")

# VECTORIZED: Select correct Agg_Expr column based on seat using pl.when()
# Create base_rules column by selecting the right Agg_Expr_Seat_{seat} column
final_df = joined.with_columns([
    # Select base_rules based on seat (when bt_index is not null)
    pl.when(pl.col("bt_index").is_null())
    .then(pl.lit(None).cast(pl.List(pl.Utf8)))
    .when(pl.col("seat") == 1).then(pl.col("Agg_Expr_Seat_1"))
    .when(pl.col("seat") == 2).then(pl.col("Agg_Expr_Seat_2"))
    .when(pl.col("seat") == 3).then(pl.col("Agg_Expr_Seat_3"))
    .when(pl.col("seat") == 4).then(pl.col("Agg_Expr_Seat_4"))
    .otherwise(pl.lit(None).cast(pl.List(pl.Utf8)))
    .alias("base_rules"),
    
    # bt_row_found flag
    pl.col("bt_index").is_not_null().alias("bt_row_found"),
]).with_columns([
    # Ensure base_rules is never null (use empty list instead)
    pl.col("base_rules").fill_null([]).alias("base_rules"),
]).with_columns([
    # New_Rules = base_rules + discovered_criteria, then unique (preserves order in recent Polars)
    pl.col("base_rules").list.concat(pl.col("discovered_criteria")).list.unique(maintain_order=True).alias("New_Rules"),
]).select([
    # Final column selection
    "step_auction",
    "bt_index",
    "seat",
    "prefix",
    "next_bid",
    "pos_count",
    "neg_count",
    "base_rules",
    pl.col("discovered_criteria").alias("discovered_rules"),
    "criteria_with_metrics",
    "New_Rules",
    "top_lift",
    "bt_row_found",
])

print(f"  Built {final_df.height:,} New_Rules entries in {fmt_elapsed(time.perf_counter() - t0)}")


Building New_Rules using vectorized operations...
  Results: 66,814 rows
  Joined: 66,814 rows, BT matches: 66,798
  Built 66,814 New_Rules entries in 1.8m


In [21]:
# Sort and inspect final DataFrame
print("Sorting final DataFrame...")

# Sort by pos_count descending for easier inspection
final_df = final_df.sort("pos_count", descending=True)

print(f"Final DataFrame: {final_df.shape}")
print(f"Columns: {final_df.columns}")
print(f"\nBT row found: {final_df['bt_row_found'].sum():,} / {final_df.height:,}")


Sorting final DataFrame...
Final DataFrame: (66814, 13)
Columns: ['step_auction', 'bt_index', 'seat', 'prefix', 'next_bid', 'pos_count', 'neg_count', 'base_rules', 'discovered_rules', 'criteria_with_metrics', 'New_Rules', 'top_lift', 'bt_row_found']

BT row found: 66,798 / 66,814


In [22]:
# Preview
print("Preview of final output:")
display_cols = ["step_auction", "bt_index", "seat", "pos_count", "base_rules", "discovered_rules", "New_Rules", "top_lift"]
print(final_df.select([c for c in display_cols if c in final_df.columns]).head(30))


Preview of final output:
shape: (30, 8)
┌─────────────┬───────────┬──────┬───────────┬─────────────┬─────────────┬─────────────┬───────────┐
│ step_auctio ┆ bt_index  ┆ seat ┆ pos_count ┆ base_rules  ┆ discovered_ ┆ New_Rules   ┆ top_lift  │
│ n           ┆ ---       ┆ ---  ┆ ---       ┆ ---         ┆ rules       ┆ ---         ┆ ---       │
│ ---         ┆ u32       ┆ i64  ┆ i64       ┆ list[str]   ┆ ---         ┆ list[str]   ┆ f64       │
│ str         ┆           ┆      ┆           ┆             ┆ list[str]   ┆             ┆           │
╞═════════════╪═══════════╪══════╪═══════════╪═════════════╪═════════════╪═════════════╪═══════════╡
│ 1d          ┆ 151867501 ┆ 1    ┆ 3507593   ┆ ["HCP <=    ┆ ["SL_D >=   ┆ ["HCP <=    ┆ 8.140321  │
│             ┆           ┆      ┆           ┆ 21", "HCP   ┆ 6", "SL_D   ┆ 21", "HCP   ┆           │
│             ┆           ┆      ┆           ┆ >= 11", …   ┆ >= 5", … "… ┆ >= 11", …   ┆           │
│             ┆           ┆      ┆           ┆ "…  

## Phase 7: Save Output


In [23]:
# takes 0s
# Save to parquet
print(f"Saving to {OUTPUT_FILE}...")
t0 = time.perf_counter()

final_df.write_parquet(OUTPUT_FILE)

file_size_mb = OUTPUT_FILE.stat().st_size / (1024 * 1024)
print(f"  Saved {final_df.height:,} rows to {OUTPUT_FILE}")
print(f"  File size: {file_size_mb:.2f} MB")
print(f"  Time: {fmt_elapsed(time.perf_counter() - t0)}")


Saving to E:\bridge\data\bbo\bidding\bbo_bt_new_rules.parquet...
  Saved 66,814 rows to E:\bridge\data\bbo\bidding\bbo_bt_new_rules.parquet
  File size: 8.80 MB
  Time: 0.3s


## Summary Statistics


In [25]:
print("=" * 60)
print("SUMMARY")
print("=" * 60)
#print(f"Deals processed: {deals_df.height:,}")
print(f"Unique (prefix, next_bid) groups: {group_counts.height:,}")
print(f"Learnable groups (>= {MIN_POS_SAMPLES} pos): {learnable_groups.height:,}")
print(f"Groups with discovered criteria: {len(results):,}")
print(f"Final New_Rules entries: {final_df.height:,}")
print(f"  - With BT row: {final_df['bt_row_found'].sum():,}")
print(f"  - Without BT row: {(~final_df['bt_row_found']).sum():,}")
print(f"\nOutput file: {OUTPUT_FILE}")
print(f"File size: {file_size_mb:.2f} MB")


SUMMARY
Unique (prefix, next_bid) groups: 4,172,691
Learnable groups (>= 100 pos): 66,927
Groups with discovered criteria: 66,814
Final New_Rules entries: 66,814
  - With BT row: 66,798
  - Without BT row: 16

Output file: E:\bridge\data\bbo\bidding\bbo_bt_new_rules.parquet
File size: 8.80 MB


In [26]:
# Distribution of discovered criteria count
print("\nDistribution of discovered criteria count per step:")
discovered_counts = final_df.select(
    pl.col("discovered_rules").list.len().alias("n_discovered")
).group_by("n_discovered").agg(pl.len().alias("count")).sort("n_discovered")
print(discovered_counts)



Distribution of discovered criteria count per step:
shape: (6, 2)
┌──────────────┬───────┐
│ n_discovered ┆ count │
│ ---          ┆ ---   │
│ u64          ┆ u64   │
╞══════════════╪═══════╡
│ 1            ┆ 227   │
│ 2            ┆ 340   │
│ 3            ┆ 576   │
│ 4            ┆ 879   │
│ 5            ┆ 1212  │
│ 6            ┆ 63580 │
└──────────────┴───────┘


In [27]:
# Top discovered criteria overall
print("\nTop 20 most frequently discovered criteria:")
all_discovered = final_df.select(pl.col("discovered_rules").explode().alias("criterion")).drop_nulls()
crit_counts = all_discovered.group_by("criterion").agg(pl.len().alias("count")).sort("count", descending=True)
print(crit_counts.head(20))



Top 20 most frequently discovered criteria:
shape: (20, 2)
┌────────────────────┬───────┐
│ criterion          ┆ count │
│ ---                ┆ ---   │
│ str                ┆ u64   │
╞════════════════════╪═══════╡
│ Total_Points <= 13 ┆ 12841 │
│ Total_Points <= 14 ┆ 11367 │
│ HCP <= 11          ┆ 10627 │
│ HCP <= 12          ┆ 10042 │
│ Total_Points <= 12 ┆ 9312  │
│ …                  ┆ …     │
│ HCP <= 8           ┆ 5430  │
│ SL_D >= 4          ┆ 5376  │
│ SL_H >= 4          ┆ 5348  │
│ SL_S <= 2          ┆ 5328  │
│ SL_C <= 1          ┆ 5293  │
└────────────────────┴───────┘


In [28]:
# Lift distribution
print("\nLift distribution (top_lift):")
lift_stats = final_df.select(pl.col("top_lift")).describe()
print(lift_stats)



Lift distribution (top_lift):
shape: (9, 2)
┌────────────┬─────────────┐
│ statistic  ┆ top_lift    │
│ ---        ┆ ---         │
│ str        ┆ f64         │
╞════════════╪═════════════╡
│ count      ┆ 37214.0     │
│ null_count ┆ 29600.0     │
│ mean       ┆ 46.45303    │
│ std        ┆ 780.979983  │
│ min        ┆ 1.300221    │
│ 25%        ┆ 3.665746    │
│ 50%        ┆ 7.025837    │
│ 75%        ┆ 16.557446   │
│ max        ┆ 89302.37548 │
└────────────┴─────────────┘


## Sample Inspection

Inspect some specific cases to validate the discovered rules.


In [29]:
# Inspect opening bids (most common)
print("Opening bids (prefix=''):")
openings = final_df.filter(pl.col("prefix") == "").sort("pos_count", descending=True)
print(openings.select(["step_auction", "pos_count", "base_rules", "discovered_rules", "top_lift"]).head(15))


Opening bids (prefix=''):
shape: (15, 5)
┌──────────────┬───────────┬─────────────────────────────┬─────────────────────────────┬───────────┐
│ step_auction ┆ pos_count ┆ base_rules                  ┆ discovered_rules            ┆ top_lift  │
│ ---          ┆ ---       ┆ ---                         ┆ ---                         ┆ ---       │
│ str          ┆ i64       ┆ list[str]                   ┆ list[str]                   ┆ f64       │
╞══════════════╪═══════════╪═════════════════════════════╪═════════════════════════════╪═══════════╡
│ 1d           ┆ 3507593   ┆ ["HCP <= 21", "HCP >= 11",  ┆ ["SL_D >= 6", "SL_D >= 5",  ┆ 8.140321  │
│              ┆           ┆ … "…                        ┆ … "…                        ┆           │
│ 1c           ┆ 3446023   ┆ ["HCP <= 21", "HCP >= 11",  ┆ ["SL_C >= 6", "SL_C >= 5",  ┆ 15.379386 │
│              ┆           ┆ … "…                        ┆ … "…                        ┆           │
│ 1n           ┆ 2518162   ┆ ["HCP <= 17", "HCP >=

In [30]:
# Inspect responses to 1NT
print("Responses after 1N:")
resp_1n = final_df.filter(pl.col("prefix") == "1n").sort("pos_count", descending=True)
print(resp_1n.select(["step_auction", "pos_count", "base_rules", "discovered_rules", "top_lift"]).head(15))


Responses after 1N:
shape: (15, 5)
┌──────────────┬───────────┬────────────────────────────┬────────────────────────────┬─────────────┐
│ step_auction ┆ pos_count ┆ base_rules                 ┆ discovered_rules           ┆ top_lift    │
│ ---          ┆ ---       ┆ ---                        ┆ ---                        ┆ ---         │
│ str          ┆ i64       ┆ list[str]                  ┆ list[str]                  ┆ f64         │
╞══════════════╪═══════════╪════════════════════════════╪════════════════════════════╪═════════════╡
│ 1n-p         ┆ 2152534   ┆ ["Total_Points <= 20"]     ┆ ["Total_Points <= 5",      ┆ 3779.71546  │
│              ┆           ┆                            ┆ "Total_P…                  ┆             │
│ 1n-2c        ┆ 143828    ┆ ["HCP <= 14",              ┆ ["SL_C >= 6", "SL_D >= 6", ┆ 9.41197     │
│              ┆           ┆ "Total_Points >=…          ┆ … "…                       ┆             │
│ 1n-2s        ┆ 60051     ┆ ["SL_H <= 3", "SL_S >= 4", 

In [31]:
# Inspect responses to 1S
print("Responses after 1S:")
resp_1s = final_df.filter(pl.col("prefix") == "1s").sort("pos_count", descending=True)
print(resp_1s.select(["step_auction", "pos_count", "base_rules", "discovered_rules", "top_lift"]).head(15))


Responses after 1S:
shape: (15, 5)
┌──────────────┬───────────┬────────────────────────────┬────────────────────────────┬─────────────┐
│ step_auction ┆ pos_count ┆ base_rules                 ┆ discovered_rules           ┆ top_lift    │
│ ---          ┆ ---       ┆ ---                        ┆ ---                        ┆ ---         │
│ str          ┆ i64       ┆ list[str]                  ┆ list[str]                  ┆ f64         │
╞══════════════╪═══════════╪════════════════════════════╪════════════════════════════╪═════════════╡
│ 1s-p         ┆ 1808043   ┆ ["Total_Points <= 16"]     ┆ ["Total_Points <= 6",      ┆ 92.60118    │
│              ┆           ┆                            ┆ "Total_P…                  ┆             │
│ 1s-2h        ┆ 142001    ┆ ["HCP >= 10", "SL_H >= 5", ┆ ["SL_H >= 6", "SL_H >= 5", ┆ 8.5488      │
│              ┆           ┆ … "…                       ┆ … "…                       ┆             │
│ 1s-d         ┆ 138312    ┆ ["SL_C <= 5", "SL_C >= 3", 

In [32]:
# High lift cases (most discriminative criteria)
# Note: top_lift is None for infinite lift (neg_rate was 0)
print("Top 20 highest lift cases:")
high_lift = final_df.filter(pl.col("top_lift").is_not_null()).sort("top_lift", descending=True)
print(high_lift.select(["step_auction", "pos_count", "discovered_rules", "top_lift"]).head(20))


Top 20 highest lift cases:
shape: (20, 4)
┌───────────────────┬───────────┬─────────────────────────────────┬──────────────┐
│ step_auction      ┆ pos_count ┆ discovered_rules                ┆ top_lift     │
│ ---               ┆ ---       ┆ ---                             ┆ ---          │
│ str               ┆ i64       ┆ list[str]                       ┆ f64          │
╞═══════════════════╪═══════════╪═════════════════════════════════╪══════════════╡
│ 1d-p-p            ┆ 151180    ┆ ["Total_Points <= 3", "Total_P… ┆ 89302.37548  │
│ 1c-1d-p           ┆ 53032     ┆ ["Total_Points <= 3", "HCP <= … ┆ 63221.987668 │
│ 1c-p-p            ┆ 123297    ┆ ["Total_Points <= 4", "Total_P… ┆ 40161.908819 │
│ 2n-p-3h-p-3s-p-4h ┆ 1599      ┆ ["SL_H >= 5", "SL_H >= 4", … "… ┆ 38422.477799 │
│ 1d-1s-p           ┆ 103900    ┆ ["Total_Points <= 3", "Total_P… ┆ 37493.814822 │
│ …                 ┆ …         ┆ …                               ┆ …            │
│ 1d-3d             ┆ 2106      ┆ ["SL_D >= 8

In [33]:
# Inspect criteria_with_metrics for detailed analysis
# This column allows you to examine each criterion's lift, pos_rate, and neg_rate
# to decide whether to include/exclude it from New_Rules

print("Inspecting criteria_with_metrics for selected auctions:")
print("=" * 80)

# Example: Look at a common opening bid
sample_auctions = ["1n", "1s", "1h", "1d", "1c"]
for auc in sample_auctions:
    row = final_df.filter(pl.col("step_auction") == auc).to_dicts()
    if row:
        row = row[0]
        print(f"\n{auc} (seat {row['seat']}, {row['pos_count']:,} deals):")
        print(f"  Base rules: {row['base_rules']}")
        print(f"  Discovered criteria with metrics:")
        for m in row["criteria_with_metrics"]:
            lift_str = f"{m['lift']:.2f}" if m['lift'] is not None else "inf"
            print(f"    {m['criterion']:25s}  lift={lift_str:>8s}  pos={m['pos_rate']:>6.2%}  neg={m['neg_rate']:>6.2%}")


Inspecting criteria_with_metrics for selected auctions:

1n (seat 1, 2,518,162 deals):
  Base rules: ['HCP <= 17', 'HCP >= 15', 'SL_C <= 5', 'SL_C >= 2', 'SL_D <= 5', 'SL_D >= 2', 'SL_H <= 5', 'SL_H >= 2', 'SL_S <= 5', 'SL_S >= 2', 'Total_Points <= 18']
  Discovered criteria with metrics:
    HCP >= 15                  lift=    2.64  pos=87.78%  neg=33.23%
    HCP >= 16                  lift=    2.17  pos=54.79%  neg=25.29%
    HCP >= 14                  lift=    2.00  pos=96.92%  neg=48.43%
    Total_Points >= 16         lift=    1.85  pos=83.28%  neg=45.13%
    Total_Points >= 15         lift=    1.59  pos=96.38%  neg=60.44%
    Total_Points >= 17         lift=    1.54  pos=53.85%  neg=34.93%

1s (seat 1, 2,497,723 deals):
  Base rules: ['HCP <= 21', 'HCP >= 11', 'SL_S >= 5', 'Total_Points <= 22', 'Total_Points >= 12']
  Discovered criteria with metrics:
    SL_S >= 5                  lift=   22.26  pos=99.32%  neg= 4.46%
    SL_S >= 6                  lift=   13.55  pos=32.57%  neg=

In [34]:
print('Program elapsed time in seconds:', time.time()-program_start_time)
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

Program elapsed time in seconds: 10604.69614315033
2025-12-27 15:03:34
