In [1]:
import pandas as pd

final = pd.read_csv("dominicks_with_maxwell.csv")
final["market_size_month"] = 6.685 * final["custcount_monthly"]*0.199
final["prod_mkt_share"] = final["total_packs"] / final["market_size_month"]

In [14]:
interaction_vars = ["tar_mean", "nic_mean", "co_mean", "value", "premium", "carton"]

In [68]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from linearmodels.iv import IV2SLS

# -----------------------
# helpers
# -----------------------
def series_flag(df, col, dtype="int8"):
    if col in df.columns:
        return pd.to_numeric(df[col], errors="coerce").fillna(0).astype(dtype)
    return pd.Series(0, index=df.index, dtype=dtype)

def demean_within(frame, cols, keys):
    means = frame.groupby(keys, observed=True)[cols].transform("mean")
    return frame[cols] - means

def prune_instruments_for_full_rank(exog_df, Z_df, tol=1e-10):
    W, keep = exog_df.copy(), []
    for c in Z_df.columns:
        r_old = np.linalg.matrix_rank(W.to_numpy(), tol)
        W_try = pd.concat([W, Z_df[[c]]], axis=1)
        r_new = np.linalg.matrix_rank(W_try.to_numpy(), tol)
        if r_new > r_old:
            keep.append(c); W = W_try
    return Z_df[keep]

def standardize_cols(df):
    out = df.copy()
    for c in out.columns:
        s = float(out[c].std(skipna=True))
        if np.isfinite(s) and s > 0:
            out[c] = out[c] / s
    return out

# ---------------------------------------------------
# helper: run nested-logit estimation once
# ---------------------------------------------------
def run_nested_logit_block(df2, Xnames, Z_raw, mkt, label):
    """
    df2 : cleaned product-level df (has ln_s, ln_s_within, price, etc.)
    Xnames : list of regressors (including price, ln_s_within)
    Z_raw : DataFrame of raw instruments, index aligned with df2
    mkt : list of FE keys ["store","month_idx"]
    label : string label for printing
    """

    # 3) ONE FE removal for y, X, Z
    X_tilde = demean_within(df2, Xnames, mkt)
    y_tilde = (df2["ln_s"] -
               df2.groupby(mkt, observed=True)["ln_s"].transform("mean")).rename("ln_s")
    Z_tilde = demean_within(
        pd.concat([df2[mkt], Z_raw], axis=1),
        list(Z_raw.columns),
        mkt
    )

    all_parts = (
        pd.concat([y_tilde, X_tilde, Z_tilde], axis=1)
          .replace([np.inf, -np.inf], np.nan)
          .dropna()
    )

    y_iv  = all_parts["ln_s"]
    X_iv  = all_parts[Xnames]
    Z_iv  = all_parts[Z_tilde.columns]

    # drop zero-variance cols; standardize IVs; prune for rank
    X_iv = X_iv.loc[:, X_iv.apply(lambda s: np.nanstd(s.to_numpy()) > 0)]
    Z_iv = standardize_cols(Z_iv.loc[:, Z_iv.apply(lambda s: np.nanstd(s.to_numpy()) > 0)])

    # endogenous: price & ln_s_within
    exog  = X_iv.drop(columns=["price", "ln_s_within"])
    Z_iv  = prune_instruments_for_full_rank(exog, Z_iv)

    clusters_iv = (
        pd.to_numeric(df2.loc[all_parts.index, "store"], errors="coerce")
          .astype(int)
          .to_numpy()
    )

    # 4) Estimation
    print(f"\n==============================")
    print(f"[OLS Nested Logit | {label}]")
    print("==============================")
    ols = sm.OLS(y_iv, X_iv).fit(
        cov_type="cluster",
        cov_kwds={"groups": clusters_iv}
    )
    print(ols.summary().tables[1])

    print(f"\n[IV Nested Logit | {label}]")
    iv = IV2SLS(
        dependent=y_iv,
        exog=exog,
        endog=X_iv[["price","ln_s_within"]],
        instruments=Z_iv
    ).fit(cov_type="clustered", clusters=clusters_iv)
    # compact print: just coeff table
    print(iv.summary)

    # quick elasticity diagnostic
    sigma = float(iv.params.get("ln_s_within", np.nan))
    alpha = float(iv.params.get("price", np.nan))

    sub = df2.loc[all_parts.index].copy()
    sbar = float(sub["prod_mkt_share"].mean())
    sjgbar = float(
        (sub["prod_mkt_share"] /
         sub.groupby(mkt + ["nest"], observed=True)["prod_mkt_share"].transform("sum")).mean()
    )
    pbar = float(sub["price"].mean())
    eps  = -alpha * pbar * (1 - sigma*(1 - sjgbar) - sbar)
    print(f"\nσ (nesting): {sigma:.3f} | implied avg own-price elasticity ≈ {eps:.2f}\n")

    return ols, iv

# ========================
# 0) data & keys
# ========================
df = final.copy()
df = df.rename(columns={"avg_pack_price": "price"})
df["month_dt"] = pd.to_datetime(df["month_dt"])

# ---- restrict to data up to and including 1993 ----
df = df[df["month_dt"].dt.year <= 1993].copy()

mkt = ["store","month_idx"]

# ---- merge external wholesale instruments from `merged` (market-level) ----
# assumes you have `merged` in memory with the listed columns
ext_iv_cols = [
    "wholesale_discount_pp1000",
    "wholesale_premium_pp1000",
    "wholesale_deep_discount_pp1000",
    "tobacco_price_index",
]
try:
    merged_iv = (
        merged[["store", "month_idx"] + ext_iv_cols]
        .drop_duplicates(subset=["store","month_idx"])
    )
    df = df.merge(merged_iv, on=["store","month_idx"], how="left")
except NameError:
    # if `merged` is not defined, we proceed with whatever is in `final`
    pass

# inside/outside shares
sum_inside = df.groupby(mkt, observed=True)["prod_mkt_share"].transform("sum")
df = df[(df["prod_mkt_share"] > 0) & (sum_inside < 1)].copy()
df["s0"] = np.clip(1.0 - sum_inside, 1e-12, 1 - 1e-12)

# ids
if "prod_id" not in df.columns:
    if "upc_norm" in df.columns:
        df["prod_id"] = df["upc_norm"].astype("string")
    elif "upc" in df.columns:
        df["prod_id"] = df["upc"].astype("string").str.replace(r"\D","", regex=True)
    else:
        df["prod_id"] = df.index.astype("string")

# brand vs generic; nest = branded/generic
brand_raw = df.get("brand", pd.Series("", index=df.index))
brand_key = (brand_raw.astype("string").str.strip().str.lower()
             .mask(lambda s: s.eq("") | s.isna(), "generic"))
is_generic = (series_flag(df, "generic_hardcoded") > 0) | brand_key.eq("generic")
brand_key  = brand_key.mask(is_generic, "generic")
df["nest"] = np.where(brand_key.eq("generic"), "generic", "branded")

# ========================
# 1) nested-logit shares (RAW)
# ========================
# ln s_j - ln s_0
df["ln_s"] = np.log(df["prod_mkt_share"]) - np.log(df["s0"])
# within-nest share s_{j|g} and its log
sg = df.groupby(mkt + ["nest"], observed=True)["prod_mkt_share"].transform("sum")
df = df[sg > 0].copy()
df["ln_s_within"] = np.log(df["prod_mkt_share"]) - np.log(sg)

# regressors
dummy_cols = [c for c in ["dlx","supslim","slim","value","premium","flavored","carton"] if c in df.columns]
cont_cols  = [c for c in ["tar_mean","nic_mean","co_mean"] if c in df.columns]
Xnames     = ["price","ln_s_within"] + dummy_cols + cont_cols

need = ["ln_s","price","store","month_idx","nest","prod_id"] + Xnames
df2  = df.dropna(subset=need).copy()
df2["brand_for_iv"] = brand_key.loc[df2.index].astype("string")

# ========================
# 2) instruments (RAW) – Hausman + BLP(cont) + Nest(cont)
# ========================
# --- Hausman (coalesced z1 -> z2 -> z3) ---
g_uq = df2.groupby(["prod_id","month_idx"], observed=True)
cnt_uq = g_uq["price"].transform("count"); sum_uq = g_uq["price"].transform("sum")
z1_raw = np.where(cnt_uq.gt(1), (sum_uq - df2["price"]) / (cnt_uq - 1), np.nan)

g_u  = df2.groupby(["prod_id"], observed=True)
cnt_u = g_u["price"].transform("count"); sum_u = g_u["price"].transform("sum")
g_us = df2.groupby(["prod_id","store"], observed=True)
cnt_us = g_us["price"].transform("count"); sum_us = g_us["price"].transform("sum")
z2_raw = np.where((cnt_u - cnt_us).gt(0), (sum_u - sum_us) / (cnt_u - cnt_us), np.nan)

gbm  = df2.groupby(["brand_for_iv","month_idx"], observed=True)
cnt_bm = gbm["price"].transform("count"); sum_bm = gbm["price"].transform("sum")
gbms = df2.groupby(["brand_for_iv","month_idx","store"], observed=True)
cnt_bms = gbms["price"].transform("count"); sum_bms = gbms["price"].transform("sum")
z3_raw = np.where((cnt_bm - cnt_bms).gt(0), (sum_bm - sum_bms) / (cnt_bm - cnt_bms), np.nan)

z_haus_raw = pd.Series(z1_raw, index=df2.index)
z_haus_raw = z_haus_raw.where(z_haus_raw.notna(), pd.Series(z2_raw, index=df2.index))
z_haus_raw = z_haus_raw.where(z_haus_raw.notna(), pd.Series(z3_raw, index=df2.index))

Z_base_raw = pd.DataFrame({"z_haus": z_haus_raw}, index=df2.index)

# --- BLP(cont) by brand (as in simple logit) ---
gm  = df2.groupby(mkt, observed=True)
gfb = df2.groupby(mkt + ["brand_for_iv"], observed=True)
for c in cont_cols:
    tot = gm[c].transform("sum")
    own = gfb[c].transform("sum")
    Z_base_raw[f"iv_brand_riv_sum_{c}"] = tot - own
    Z_base_raw[f"iv_brand_own_sum_{c}"] = own - df2[c]

# rival count in market (helps 1st stage)
Z_base_raw["iv_rival_count"] = gm["price"].transform("size") - gfb["price"].transform("size")

# --- Nest(cont) proxies for ln_s_within (same-nest & other-nest sums/counts) ---
gmn = df2.groupby(mkt + ["nest"], observed=True)
for c in cont_cols:
    nest_tot = gmn[c].transform("sum")
    Z_base_raw[f"iv_nest_same_sum_{c}"]  = nest_tot - df2[c]
    Z_base_raw[f"iv_nest_other_sum_{c}"] = gm[c].transform("sum") - nest_tot

Z_base_raw["iv_nest_same_cnt"]  = gmn["price"].transform("size") - 1
Z_base_raw["iv_nest_other_cnt"] = gm["price"].transform("size") - gmn["price"].transform("size")

# ========================
# 2b) AUGMENT Z with external wholesale instruments
# ========================
# keep only external IVs that actually exist in df2
ext_iv_cols_in_df = [c for c in ext_iv_cols if c in df2.columns]
df2[ext_iv_cols_in_df] = df2[ext_iv_cols_in_df].apply(pd.to_numeric, errors="coerce")

# prefix to avoid name clashes
Z_aug_raw = pd.concat(
    [Z_base_raw, df2[ext_iv_cols_in_df].add_prefix("iv_ext_")],
    axis=1
)

Z_aug_only = pd.concat(
    [ df2[ext_iv_cols_in_df].add_prefix("iv_ext_")],
    axis=1
)

Z_aug_nest = pd.concat(
    [Z_base_raw["iv_nest_other_cnt"],  Z_base_raw["iv_nest_same_cnt"] ,  df2[ext_iv_cols_in_df].add_prefix("iv_ext_")],
    axis=1
)
# =========================================================
# 2c) INTERACT EXTERNAL WHOLESALE IVs WITH PRODUCT FEATURES
# =========================================================

# choose product characteristics that vary within a market
interaction_vars = ["tar_mean", "nic_mean", "co_mean", "value", "premium", "carton"]

# keep external IVs that actually exist
ext_iv_cols_in_df = [c for c in ext_iv_cols if c in df2.columns]

# raw external IV DF (still market-level)
extIV = df2[ext_iv_cols_in_df].rename(columns=lambda s: f"ext_{s}")

# interaction DF
interIV = pd.DataFrame(index=df2.index)

for iv in extIV.columns:
    for v in interaction_vars:
        interIV[f"{iv}_x_{v}"] = extIV[iv] * df2[v]

# full augmented IV set:
# baseline BLP/Hausman/Nest + external IVs + interactions
Z_interact_raw = pd.concat([Z_base_raw, extIV, interIV], axis=1)



# =========================================================
# 3) RUN BASELINE + EXTERNAL IVs ***WITHOUT FE***
# =========================================================

def run_nested_logit_no_FE(df2, Xnames, Z_raw, label):
    """
    Same model but WITHOUT market fixed effects.
    No demeaning. No FE removal. No clustering needed per market.
    """

    # dependent and regressors
    y = df2["ln_s"].replace([np.inf, -np.inf], np.nan)
    X = df2[Xnames].replace([np.inf, -np.inf], np.nan)

    Z = Z_raw.replace([np.inf, -np.inf], np.nan)

    # merge & clean
    all_parts = pd.concat([y, X, Z], axis=1).dropna()
    y_iv = all_parts["ln_s"]
    X_iv = all_parts[Xnames]
    Z_iv = Z.loc[all_parts.index]

    # drop zero-variance & standardize instruments
    X_iv = X_iv.loc[:, X_iv.apply(lambda s: np.nanstd(s.to_numpy()) > 0)]
    Z_iv = Z_iv.loc[:, Z_iv.apply(lambda s: np.nanstd(s.to_numpy()) > 0)]
    Z_iv = standardize_cols(Z_iv)

    # prune for rank
    endog = X_iv[["price", "ln_s_within"]]
    exog  = X_iv.drop(columns=["price", "ln_s_within"])
    Z_iv  = prune_instruments_for_full_rank(exog, Z_iv)

    # OLS
    print("\n==============================================")
    print(f"[OLS Nested Logit | {label}]")
    print("==============================================")
    ols = sm.OLS(y_iv, X_iv).fit()
    print(ols.summary().tables[1])

    # IV
    print("\n[IV Nested Logit | {label}]")
    iv = IV2SLS(
        dependent=y_iv,
        exog=exog,
        endog=endog,
        instruments=Z_iv
    ).fit()
    print(iv.summary)

    # elasticities
    sigma = float(iv.params.get("ln_s_within", np.nan))
    alpha = float(iv.params.get("price", np.nan))
    sub = df2.loc[all_parts.index].copy()
    sbar = float(sub["prod_mkt_share"].mean())
    sg = sub.groupby(["store","month_idx","nest"], observed=True)["prod_mkt_share"].transform("sum")
    sjgbar = float((sub["prod_mkt_share"]/sg).mean())
    pbar = float(sub["price"].mean())
    eps  = -alpha * pbar * (1 - sigma*(1 - sjgbar) - sbar)

    print(f"\nσ (nesting): {sigma:.3f} | implied avg own-price elasticity ≈ {eps:.2f}\n")

    return ols, iv

# =========================================================
# STORE FIXED EFFECTS ONLY
# =========================================================
def run_nested_logit_store_FE(df2, Xnames, Z_raw, label):
    """
    Demean within store only (remove permanent store effects).
    Time variation (month_idx) remains → external IVs survive.
    """

    FEkey = ["store"]

    # Demean Y, X, Z within store only
    X_tilde = demean_within(df2, Xnames, FEkey)
    y_tilde = df2["ln_s"] - df2.groupby(FEkey, observed=True)["ln_s"].transform("mean")
    Z_tilde = demean_within(
        pd.concat([df2[FEkey], Z_raw], axis=1),
        list(Z_raw.columns),
        FEkey
    )

    all_parts = (
        pd.concat([y_tilde, X_tilde, Z_tilde], axis=1)
          .replace([np.inf, -np.inf], np.nan)
          .dropna()
    )

    y_iv = all_parts["ln_s"]
    X_iv = all_parts[Xnames]
    Z_iv = all_parts[Z_tilde.columns]

    # Standard cleanup exactly as before
    X_iv = X_iv.loc[:, X_iv.apply(lambda s: np.nanstd(s.to_numpy()) > 0)]
    Z_iv = Z_iv.loc[:, Z_iv.apply(lambda s: np.nanstd(s.to_numpy()) > 0)]
    Z_iv = standardize_cols(Z_iv)
    exog = X_iv.drop(columns=["price", "ln_s_within"])
    Z_iv = prune_instruments_for_full_rank(exog, Z_iv)

    clusters = df2.loc[all_parts.index, "store"].astype(int).to_numpy()

    print("\n==============================================")
    print(f"[OLS Nested Logit | {label}]")
    print("==============================================")
    ols = sm.OLS(y_iv, X_iv).fit(cov_type="cluster", cov_kwds={"groups": clusters})
    print(ols.summary().tables[1])

    print("\n[IV Nested Logit | {label}]")
    iv = IV2SLS(
        dependent=y_iv,
        exog=exog,
        endog=X_iv[["price","ln_s_within"]],
        instruments=Z_iv
    ).fit(cov_type="clustered", clusters=clusters)
    print(iv.summary)

    # Elasticity diagnostic
    sigma = float(iv.params.get("ln_s_within", np.nan))
    alpha = float(iv.params.get("price", np.nan))
    sub = df2.loc[all_parts.index].copy()
    sbar = float(sub["prod_mkt_share"].mean())
    sg = sub.groupby(["store","month_idx","nest"], observed=True)["prod_mkt_share"].transform("sum")
    sjgbar = float((sub["prod_mkt_share"]/sg).mean())
    pbar = float(sub["price"].mean())
    eps = -alpha * pbar * (1 - sigma*(1 - sjgbar) - sbar)

    print(f"\nσ (nesting): {sigma:.3f} | implied avg own-price elasticity ≈ {eps:.2f}\n")

    return ols, iv


In [70]:
# =========================================================
# YEAR FIXED EFFECTS ONLY
# =========================================================
def run_nested_logit_year_FE(df2, Xnames, Z_raw, label):

    # create YEAR variable from month_dt
    df2 = df2.copy()
    df2["year_FE"] = df2["month_dt"].dt.year

    FEkey = ["year_FE"]  # <-- KEY CHANGE

    # demeaning within YEAR
    X_tilde = demean_within(df2, Xnames, FEkey)
    y_tilde = df2["ln_s"] - df2.groupby(FEkey, observed=True)["ln_s"].transform("mean")
    Z_tilde = demean_within(
        pd.concat([df2[FEkey], Z_raw], axis=1),
        list(Z_raw.columns),
        FEkey
    )

    all_parts = (
        pd.concat([y_tilde, X_tilde, Z_tilde], axis=1)
          .replace([np.inf, -np.inf], np.nan)
          .dropna()
    )

    y_iv = all_parts["ln_s"]
    X_iv = all_parts[Xnames]
    Z_iv = all_parts[Z_tilde.columns]

    # clean
    X_iv = X_iv.loc[:, X_iv.apply(lambda s: np.nanstd(s) > 0)]
    Z_iv = Z_iv.loc[:, Z_iv.apply(lambda s: np.nanstd(s) > 0)]
    Z_iv = standardize_cols(Z_iv)

    exog = X_iv.drop(columns=["price","ln_s_within"])
    Z_iv = prune_instruments_for_full_rank(exog, Z_iv)

    # cluster by store (robust)
    clusters = df2.loc[all_parts.index,"store"].astype(int).to_numpy()

    print("\n==============================================")
    print(f"[OLS Nested Logit | {label}]")
    print("==============================================")
    ols = sm.OLS(y_iv, X_iv).fit(
        cov_type="cluster", 
        cov_kwds={"groups": clusters}
    )
    print(ols.summary().tables[1])

    print("\n[IV Nested Logit | {label}]")
    iv = IV2SLS(
        dependent=y_iv,
        exog=exog,
        endog=X_iv[["price","ln_s_within"]],
        instruments=Z_iv
    ).fit(cov_type="clustered", clusters=clusters)
    print(iv.summary)

    # elasticity diagnostic
    sigma = float(iv.params.get("ln_s_within", np.nan))
    alpha = float(iv.params.get("price", np.nan))

    sub = df2.loc[all_parts.index].copy()
    sbar = float(sub["prod_mkt_share"].mean())
    sg = sub.groupby(["store","month_idx","nest"], observed=True)["prod_mkt_share"].transform("sum")
    sjgbar = float((sub["prod_mkt_share"]/sg).mean())
    pbar = float(sub["price"].mean())
    eps  = -alpha * pbar * (1 - sigma*(1 - sjgbar) - sbar)

    print(f"\nσ (nesting): {sigma:.3f} | implied avg own-price elasticity ≈ {eps:.2f}\n")

    return ols, iv


In [72]:
def run_nested_logit_store_year_nest_FE(df2, Xnames, Z_raw, label):

    df2 = df2.copy()
    df2["year_FE"] = df2["month_dt"].dt.year

    # define FE groups
    FEkey = ["store", "year_FE", "nest"]

    # demean all relevant variables by store-year-nest cells
    X_tilde = demean_within(df2, Xnames, FEkey)
    y_tilde = df2["ln_s"] - df2.groupby(FEkey)["ln_s"].transform("mean")

    Z_tilde = demean_within(
        pd.concat([df2[FEkey], Z_raw], axis=1),
        list(Z_raw.columns),
        FEkey
    )

    all_parts = (
        pd.concat([y_tilde, X_tilde, Z_tilde], axis=1)
        .replace([np.inf, -np.inf], np.nan)
        .dropna()
    )

    y_iv = all_parts["ln_s"]
    X_iv = all_parts[Xnames]
    Z_iv = all_parts[Z_tilde.columns]

    # keep only columns with real variation
    X_iv = X_iv.loc[:, X_iv.apply(lambda s: np.nanstd(s) > 0)]
    Z_iv = Z_iv.loc[:, Z_iv.apply(lambda s: np.nanstd(s) > 0)]
    Z_iv = standardize_cols(Z_iv)

    exog = X_iv.drop(columns=["price","ln_s_within"])
    Z_iv = prune_instruments_for_full_rank(exog, Z_iv)

    clusters = df2.loc[all_parts.index, "store"].astype(int).to_numpy()

    print("\n==============================================")
    print(f"[OLS Nested Logit | {label}]")
    print("==============================================")
    ols = sm.OLS(y_iv, X_iv).fit(
        cov_type="cluster",
        cov_kwds={"groups": clusters}
    )
    print(ols.summary())

    print("\n[IV Nested Logit | {label}]")
    iv = IV2SLS(
        dependent=y_iv,
        exog=exog,
        endog=X_iv[["price","ln_s_within"]],
        instruments=Z_iv
    ).fit(cov_type="clustered", clusters=clusters)
    print(iv.summary)

    sigma = float(iv.params.get("ln_s_within", np.nan))
    alpha = float(iv.params.get("price", np.nan))

    sub = df2.loc[all_parts.index].copy()
    sbar = float(sub["prod_mkt_share"].mean())
    sg = sub.groupby(["store","month_idx","nest"])["prod_mkt_share"].transform("sum")
    sjgbar = float((sub["prod_mkt_share"]/sg).mean())
    pbar = float(sub["price"].mean())
    eps  = -alpha * pbar * (1 - sigma*(1 - sjgbar) - sbar)

    print(f"\nσ (nesting): {sigma:.3f} | implied own-price elasticity ≈ {eps:.2f}\n")

    return ols, iv


In [88]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from linearmodels.iv import IV2SLS

def demean_within(df, cols, fe_keys):
    means = df.groupby(fe_keys, observed=True)[cols].transform("mean")
    return df[cols] - means

def prune_instruments_for_full_rank(exog_df, Z_df, tol=1e-10):
    W = exog_df.copy()
    keep = []
    for c in Z_df.columns:
        r0 = np.linalg.matrix_rank(W.to_numpy(), tol)
        r1 = np.linalg.matrix_rank(pd.concat([W, Z_df[[c]]], axis=1).to_numpy(), tol)
        if r1 > r0:
            keep.append(c)
            W = pd.concat([W, Z_df[[c]]], axis=1)
    return Z_df[keep]

def run_nested_logit_store_month_FE(df2, Xnames, Z_raw, label):
    FEkeys = ["store", "month_idx"]        # <<<<<< STORE × MONTH FE
    
    # Demean everything within FE
    y_tilde = df2["ln_s"] - df2.groupby(FEkeys)["ln_s"].transform("mean")
    X_tilde = demean_within(df2, Xnames, FEkeys)
    Z_tilde = demean_within(pd.concat([df2[FEkeys], Z_raw], axis=1),
                            Z_raw.columns, FEkeys)

    # Drop NA rows created by demeaning
    data = pd.concat([y_tilde, X_tilde, Z_tilde], axis=1)
    data = data.replace([np.inf, -np.inf], np.nan).dropna()

    y = data["ln_s"]
    X = data[Xnames]

    # endogenous
    endog = X[["price", "ln_s_within"]]
    exog  = X.drop(columns=["price", "ln_s_within"])

    # prune for rank
    Z = data[Z_tilde.columns]
    Z = prune_instruments_for_full_rank(exog, Z)

    clusters = df2.loc[data.index, "store"]

    print("\n==============================")
    print(f"[OLS Nested Logit | {label}]")
    print("==============================")
    ols = sm.OLS(y, X).fit(
        cov_type="cluster",
        cov_kwds={"groups": clusters}
    )
    print(ols.summary().tables[1])

    print("\n[IV Nested Logit | {label}]")
    iv = IV2SLS(
        dependent=y,
        exog=exog,
        endog=endog,
        instruments=Z
    ).fit(cov_type="clustered", clusters=clusters)
    print(iv.summary.table)

    # elasticities
    alpha = float(iv.params.get("price", np.nan))
    sigma = float(iv.params.get("ln_s_within", np.nan))

    sub = df2.loc[data.index]
    pbar = float(sub["price"].mean())
    sj   = float(sub["prod_mkt_share"].mean())
    sg   = float((sub["prod_mkt_share"] /
                  sub.groupby(["store","month_idx","nest"])
                     ["prod_mkt_share"].transform("sum")).mean())

    elastic = -alpha * pbar * (1 - sigma*(1 - sg) - sj)
    print(f"\nσ = {sigma:.3f} | implied elasticity ≈ {elastic:.2f}\n")

    return ols, iv


In [98]:
# ===== 1) BASELINE (exactly your old IV set, with FE, ≤ 1993) =====
ols_combo, iv_combo = run_nested_logit_block(
    df2=df2,
    Xnames=Xnames,
    mkt=mkt,
    Z_raw=Z_base_raw,
    label="BLP + Hausmann+ nest | STORE + MONTH + NEST FE"
)




[OLS Nested Logit | BLP + Hausmann+ nest | STORE + MONTH + NEST FE]
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
price          -1.0826      0.158     -6.835      0.000      -1.393      -0.772
ln_s_within     0.7968      0.015     52.555      0.000       0.767       0.827
value          -0.4775      0.646     -0.739      0.460      -1.743       0.788
carton         -0.1495      0.066     -2.263      0.024      -0.279      -0.020
tar_mean        0.2113      0.049      4.313      0.000       0.115       0.307
nic_mean       -4.1859      2.427     -1.725      0.085      -8.942       0.570
co_mean        -0.1952      0.290     -0.672      0.501      -0.764       0.374

[IV Nested Logit | BLP + Hausmann+ nest | STORE + MONTH + NEST FE]
                          IV-2SLS Estimation Summary                          
Dep. Variable:                   ln_s   R-squared:              

In [100]:

ols_combo, iv_combo = run_nested_logit_no_FE(
    df2=df2,
    Xnames=Xnames,
    Z_raw=Z_aug_nest,
    label=" Wholesale + nest | NO FE "
)



[OLS Nested Logit |  Wholesale + nest | NO FE ]
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
price          -0.9130      0.037    -24.701      0.000      -0.985      -0.841
ln_s_within     0.4244      0.015     28.059      0.000       0.395       0.454
value          -0.3492      0.058     -6.043      0.000      -0.462      -0.236
carton          0.4164      0.036     11.678      0.000       0.347       0.486
tar_mean        0.3932      0.015     25.611      0.000       0.363       0.423
nic_mean       -6.8206      0.815     -8.373      0.000      -8.417      -5.224
co_mean        -0.3117      0.055     -5.657      0.000      -0.420      -0.204

[IV Nested Logit | {label}]
                          IV-2SLS Estimation Summary                          
Dep. Variable:                   ln_s   R-squared:                      0.9777
Estimator:                    IV-2SLS   Adj.