# Count how many Offer win 

In [None]:
import pandas as pd
import numpy as np

# --- Config ---
CSV_PATH = "Panel_for_Model2.csv"  # change if needed

# Read
df = pd.read_csv(CSV_PATH, low_memory=False)

# Resolve columns case-insensitively
cols_lc = {c.lower(): c for c in df.columns}
def need(colname):
    if colname not in cols_lc:
        raise KeyError(f"Column '{colname}' not found. Available: {list(df.columns)}")
    return cols_lc[colname]

price_col        = need("price")
offer_col        = need("highest_offer")
time_of_sale_col = need("time_of_sale")

# Coerce to numeric
price = pd.to_numeric(df[price_col], errors="coerce")
offer = pd.to_numeric(df[offer_col], errors="coerce")

# Valid rows have both numbers present
valid = price.notna() & offer.notna()

def summarize_for(time_flag_value: str):
    # 1) Build a boolean mask for the time slice you care about (case/space insensitive).
    mask_time = (
        df[time_of_sale_col]              # the "time_of_sale" column (string-like)
          .astype(str)                    # make sure it's strings
          .str.strip()                    # trim spaces
          .str.lower()                    # lowercase for robust comparison
          .eq(time_flag_value.lower())    # == the target value (e.g., "time_n_sale")
    )

    # 2) Combine that with 'valid' (rows where both price and highest_offer are numeric).
    pool = valid & mask_time

    # 3) Count rows in-scope for this time slice (and with both values present).
    total = int(pool.sum())               # sum(True/False) == count of True

    # 4) Count exact mismatches: price != highest_offer (no tolerance), only within 'pool'.
    mismatches_exact = int(((price != offer) & pool).sum())

    # 5) Exact equals is just what's left.
    equals_exact = total - mismatches_exact

    # --- Optional tolerance version (commented out) ---
    # If price/offer are floats in ETH, tiny rounding noise can trip !=.
    # np.isclose treats near-equal numbers as equal (rtol/atol can be tuned).
    # near_equal = (np.isclose(price, offer, rtol=1e-09, atol=1e-12) & pool).sum()
    # mismatches_by_tol = total - int(near_equal)

    # 6) Percentage of mismatches among rows with both values, guard against divide-by-zero.
    pct_mismatch = (mismatches_exact / total * 100.0) if total else np.nan

    # 7) Return a small dict thatâ€™s easy to DataFrame-ify later.
    return {
        "time_of_sale_value": time_flag_value,
        "rows_with_both_values": total,
        "price_neq_highest_offer": mismatches_exact,
        "price_eq_highest_offer": equals_exact,
        "pct_mismatch_of_valid": pct_mismatch,
    }


out = pd.DataFrame([
    summarize_for("time_n_sale"),
    summarize_for("time_n-1_sale"),
])

print(out.to_string(index=False))


time_of_sale_value  rows_with_both_values  price_neq_highest_offer  price_eq_highest_offer  pct_mismatch_of_valid
       time_n_sale                   9254                     9190                      64              99.308407
     time_n-1_sale                   9254                     7818                    1436              84.482386
