# Packages & Setup

In [None]:
import os
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from statsmodels.robust import scale, norms
import pyreadstat
import statsmodels.api as sm
from patsy import dmatrices
import nbformat
from nbconvert import PythonExporter


os.chdir('/Users/connorbrennan/OneDrive - The University of Chicago/mmb/data')

# Bringing in data

In [2]:
df, meta = pyreadstat.read_dta('derived/MMB_reg_format.dta')
df_labels = pd.DataFrame({
    "Variable": meta.column_names,
    "Description": meta.column_labels
})
df = df.loc[df['y_timing_max'] < 99]
df = df.loc[df['piq_timing_max'] < 99]

print(df_labels)
df['not_pr_ndx'] = 1 - df['pr_ndx']
df['not_wg_ndx'] = 1 - df['wg_ndx']
#df = df.loc[df['sacratio20'] < df['sacratio20'].quantile(0.98)]
df_estimated = df.loc[df['estimated']==1]
df_calibrated = df.loc[df['calibrated']==1]

           Variable           Description
0             model         (first) model
1              rule          (first) rule
2           rule_tr           Taylor Rule
3          rule_itr  Inertial Taylor Rule
4            rule_g           Growth Rule
..              ...                   ...
129   stky_wg_nondx                  None
130        stky_all                  None
131  stky_pr_wg_ndx                  None
132         ndx_all    P and W Indexation
133          ln_neq       ln(Num. of Eq.)

[134 rows x 2 columns]


# Parameterization & specifying variables

In [3]:
# Stepwise regression parameters
alphas = {
    'enter': 0.1,
    'exit': 0.15
}
#horizons = [20, 40]
horizons = [20]

#alpha_enter, alpha_exit = 0.15, 0.15
depvars = ['IScurve', 'infl_per_rr', 'sacratio']

fixed_vars = ['rule_g', 'rule_itr', 'estimated']

# Independent variables
# '''
# frictions_detailed = ['stky_pr_calvo', 'stky_pr_rotemberg', 'stky_pr_other', 'stky_wg', 
#                       'pr_ndx', 'wg_ndx', 'wg_ndx_prprice', 'wg_ndx_mult', 'wg_ndx_other',
#                       'stky_pr_calvo:pr_ndx', 'stky_pr_rotemberg:pr_ndx', 'stky_pr_other:pr_ndx',
#                       'stky_wg:wg_ndx', 'stky_wg:wg_ndx_prprice', 'stky_wg:wg_ndx_mult', 'stky_wg:wg_ndx_other']  

# frictions_simple = ['stky_pr_calvo', 'stky_pr_rotemberg', 'stky_pr_other', 'stky_wg', 
#                     'pr_ndx', 'wg_ndx',
#                     'stky_pr_calvo:pr_ndx', 'stky_pr_rotemberg:pr_ndx', 'stky_pr_other:pr_ndx',
#                     'stky_wg:wg_ndx'] 

# properties = ['estimated', 'est_early', 'est_late', 'vint_mid', 'bnkcrdit', 'ntwrth', 'wlth',
#               'open', 'other_channel', 'cb_authors_ext', 'ln_neq']
# '''


# frictions_detailed = ['stky_pr_calvo', 'stky_pr_rotemberg', 'stky_pr_other', 'stky_wg', 
#                       'pr_ndx', 'wg_ndx', 'wg_ndx_prprice', 'wg_ndx_mult', 'wg_ndx_other',
#                       'stky_pr_calvo:pr_ndx', 'stky_pr_rotemberg:pr_ndx', 'stky_pr_other:pr_ndx',
#                       'stky_wg:wg_ndx', 'stky_wg:wg_ndx_prprice', 'stky_wg:wg_ndx_mult', 'stky_wg:wg_ndx_other',
#                       'bnkcrdit', 'ntwrth', 'wlth', 'open', 'other_channel']  

# frictions_simple = ['stky_pr_calvo', 'stky_pr_rotemberg', 'stky_pr_other', 'stky_wg', 
#                     'pr_ndx', 'wg_ndx',
#                     'stky_pr_calvo:pr_ndx', 'stky_pr_rotemberg:pr_ndx', 'stky_pr_other:pr_ndx',
#                     'stky_wg:wg_ndx',
#                     'bnkcrdit', 'ntwrth', 'wlth', 'open', 'other_channel'] 

# properties = ['estimated', 'est_early', 'est_late', 'vint_late', 'vint_mid', 'vint_early', 'cb_authors_ext', 'ln_neq']


# indepvars_detailed = frictions_detailed + properties
# indepvars_simple = frictions_simple + properties

all_nomrig_vars = ['stky_pr', 'stky_wg',
               'pr_ndx', 'wg_ndx',
               'stky_pr:pr_ndx',
               'stky_wg:wg_ndx',
               'stky_pr:not_pr_ndx',
               'stky_wg:not_wg_ndx',
               'stky_pr:wg_ndx',
               'stky_wg:pr_ndx',
               'stky_pr:not_wg_ndx',
               'stky_wg:not_pr_ndx']

all_realrig_vars = ['wlth', 'ntwrth', 'bnkcrdit', 'open', 'learning',
                'wlth:ntwrth', 'wlth:bnkcrdit', 'wlth:open', 'wlth:learning',
                'ntwrth:bnkcrdit', 'ntwrth:open', 'ntwrth:learning',
                'bnkcrdit:open', 'bnkcrdit:learning',
                'open:learning']

all_nonmod_vars = ['cb_authors_ext', 'ln_neq', 'vint_early', 'vint_mid', 'vint_late', 'est_early', 'est_late',
               'cb_authors_ext:ln_neq', 
               'cb_authors_ext:vint_early', 'cb_authors_ext:vint_mid', 'cb_authors_ext:vint_late',
               'cb_authors_ext:est_early', 'cb_authors_ext:est_late',
               'ln_neq:vint_early', 'ln_neq:vint_mid', 'ln_neq:vint_late',
               'ln_neq:est_early', 'ln_neq:est_late',
               'vint_early:est_early', 'vint_early:est_late',
               'vint_mid:est_early', 'vint_mid:est_late',
               'vint_late:est_early', 'vint_late:est_late']

all_all_vars = all_nomrig_vars + all_realrig_vars + all_nonmod_vars

#Table 7
nomrig_vars = ['stky_wg:wg_ndx', 'stky_wg:not_wg_ndx', 'pr_ndx']

#Table 8
realrig_vars = ['wlth', 'bnkcrdit', 'ntwrth', 'open', 
                'wlth:bnkcrdit', 'ntwrth:bnkcrdit', 'wlth:ntwrth', 'wlth:open']

#Table 10
nonmod_vars = ['cb_authors_ext', 'ln_neq', 'vint_early', 'vint_mid', 'est_early']

all_vars = nomrig_vars + realrig_vars + nonmod_vars

# Stepwise Regression function (Do not Touch)

In [4]:
def stepwise_reg(depvar, covariates, data, alphas, max_steps=200, reenter_factor=0.5, eps=1e-6):
    potential_covariates = list(covariates)
    entered_vars = []
    removed_lockout = set()   # if exited, doubly low threshold to re-enter
    keep_stepping = True

    # add small buffer to thresholds
    enter_thr = alphas['enter'] * (1 - eps)
    exit_thr  = alphas['exit']  * (1 + eps)

    def build_formula(vars_):
        rhs = []
        if vars_:
            rhs.append("+".join(vars_))
        if fixed_vars:
            rhs.append("+".join(fixed_vars))
        return f"{depvar} ~ " + (" + ".join(rhs) if rhs else "1")

    step = 0
    while keep_stepping and step < max_steps:
        step += 1
        keep_stepping = False

        # ----- FORWARD STEP -----
        best_pval_in_step = 1.0
        entering_var = None

        for var in list(potential_covariates):
            # prevent immediate re-entry unless much stronger
            if var in removed_lockout:
                allow_back = False
            else:
                allow_back = True

            trial_vars = entered_vars + [var]
            y, X = dmatrices(build_formula(trial_vars), data=data, return_type='dataframe')
            rank_X, cond_num = np.linalg.matrix_rank(X), np.linalg.cond(X)
            if (rank_X < X.shape[1]) or (cond_num > 1000):
                continue

            # robreg = smf.rlm(build_formula(trial_vars),
            #                  M=sm.robust.norms.TukeyBiweight(),
            #                  data=data).fit(scale_est='mad', cov='H1', update_scale=True, conv='coefs')
            robreg = smf.rlm(
                build_formula(trial_vars),
                M = norms.TukeyBiweight(c=4.685),
                data = data
            ).fit(
                scale_est = scale.HuberScale(),
                update_scale = True,
                cov = 'H1',
                conv='coefs'
            )

            pval = robreg.pvalues.get(var, np.nan)
            if np.isnan(pval):
                continue

            # stricter criterion if trying to re-enter
            ok_to_enter = (pval < enter_thr) if allow_back else (pval < enter_thr * reenter_factor)
            if ok_to_enter and pval < best_pval_in_step:
                best_pval_in_step = pval
                entering_var = var

        if entering_var is not None:
            entered_vars.append(entering_var)
            potential_covariates.remove(entering_var)
            #removed_lockout.discard(entering_var)   # clear lockout on re-entry
            keep_stepping = True

        # ----- BACKWARD STEP -----
        # robreg = smf.rlm(build_formula(entered_vars),
        #                  M=sm.robust.norms.TukeyBiweight(),
        #                  data=data).fit(scale_est='mad', cov='H1', update_scale=True, conv='coefs')
        robreg = smf.rlm(
            build_formula(entered_vars),
            M = norms.TukeyBiweight(c=4.685),
            data = data
        ).fit(
            scale_est = scale.HuberScale(),
            update_scale = True,
            cov = 'H1',
            conv='coefs'
        )

        removal_exclude = set(fixed_vars) | {'Intercept'}
        cand = robreg.pvalues[~robreg.pvalues.index.isin(removal_exclude)].dropna()

        if not cand.empty:
            worst_var = cand.idxmax()
            if cand.loc[worst_var] > exit_thr:
                if worst_var in entered_vars:
                    entered_vars.remove(worst_var)
                    removed_lockout.add(worst_var)     # <-- lock it out to stop immediate re-entry
                    if worst_var not in potential_covariates:
                        potential_covariates.append(worst_var)
                    keep_stepping = True

    # Final regression
    # final_reg = smf.rlm(build_formula(entered_vars),
    #                     M=sm.robust.norms.TukeyBiweight(),
    #                     data=data).fit(scale_est='mad', cov='H1', update_scale=True, conv='coefs')
    final_reg = smf.rlm(
        build_formula(entered_vars),
        data = data,
        M = norms.TukeyBiweight(c=4.685)
    ).fit(
        scale_est = scale.HuberScale(),
        update_scale = True,
        cov = 'H1',
        conv='coefs'
    )

    return final_reg


def get_r2(orig_reg, depvar, data):
    terms = [t for t in orig_reg.params.index if t != 'Intercept']
    formula_str = f'{depvar} ~ ' + ' + '.join(terms)
    y, X = dmatrices(formula_str, data=data, return_type='dataframe')

    valid_index = X.index
    weights_series = orig_reg.weights.loc[valid_index]
    df_sub = data.loc[valid_index]

    wls_reg = smf.wls(formula_str, data=df_sub, weights=weights_series).fit(cov='H1')
    return wls_reg.rsquared_adj

In [None]:
var_labels = {
    'Intercept': "Constant",
    'rule_g': "Rule: Growth",
    'rule_itr': "Rule: Inert. Taylor",
    'stky_pr_calvo': "Sticky Prices (Calvo)",
    'stky_pr_rotemberg': "Sticky Prices (Rotemberg)", 
    'stky_pr_other': "Sticky Prices (Other)", 
    'stky_wg': "Sticky Wages", 
    'pr_ndx': "Price Idx", 
    'wg_ndx': "Wage Idx.", 
    'wg_ndx_prprice': "Wage Idx. (Prev. Price)", 
    'wg_ndx_mult': "Wage Idx. (Mult. Price)", 
    'wg_ndx_other': "Wage Idx. (Other)",

    # interactions (nomrig)
    'stky_pr_calvo:pr_ndx': "Sticky Price (Calvo) $\\times$ Price Idx.", 
    'stky_pr_rotemberg:pr_ndx': "Sticky Price (Rotemberg) $\\times$ Price Idx.", 
    'stky_pr_other:pr_ndx': "Sticky Price (Other) $\\times$ Price Idx.",
    'stky_wg:wg_ndx': "Sticky Wages $\\times$ Wage Idx.", 
    'stky_wg:wg_ndx_prprice': "Sticky Wages $\\times$ Wage Idx. (Prev. Price)", 
    'stky_wg:wg_ndx_mult': "Sticky Wages $\\times$ Wage Idx. (Mult. Price)", 
    'stky_wg:wg_ndx_other': "Sticky Wages $\\times$ Wage Idx. (Other)",

    # added missing nomrig
    'stky_pr': "Sticky Prices",
    'stky_wg': "Sticky Wages",
    'stky_pr:pr_ndx': "Sticky Prices $\\times$ Price Idx.",
    'stky_wg:wg_ndx': "Sticky Wages $\\times$ Wage Idx.",
    'stky_pr:not_pr_ndx': "Sticky Prices $\\times$ Not Price Idx.",
    'stky_wg:not_wg_ndx': "Sticky Wages $\\times$ Not Wage Idx.",
    'stky_pr:wg_ndx': "Sticky Prices $\\times$ Wage Idx.",
    'stky_wg:pr_ndx': "Sticky Wages $\\times$ Price Idx.",
    'stky_pr:not_wg_ndx': "Sticky Prices $\\times$ Not Wage Idx.",
    'stky_wg:not_pr_ndx': "Sticky Wages $\\times$ Not Price Idx.",

    # real rigidities
    'wlth': "Wealth Channel",
    'ntwrth': "Net Worth Channel",
    'bnkcrdit': "Bank Credit Channel",
    'open': "Open Economy",
    'learning': "Learning Channel",

    'wlth:ntwrth': "Wealth $\\times$ Net Worth",
    'wlth:bnkcrdit': "Wealth $\\times$ Bank Credit",
    'wlth:open': "Wealth $\\times$ Open Economy",
    'wlth:learning': "Wealth $\\times$ Learning",
    'ntwrth:bnkcrdit': "Net Worth $\\times$ Bank Credit",
    'ntwrth:open': "Net Worth $\\times$ Open Economy",
    'ntwrth:learning': "Net Worth $\\times$ Learning",
    'bnkcrdit:open': "Bank Credit $\\times$ Open Economy",
    'bnkcrdit:learning': "Bank Credit $\\times$ Learning",
    'open:learning': "Open Economy $\\times$ Learning",

    # non-modeling vars
    'estimated': "Estimated", 
    'est_early': "Early Data", 
    'est_late': "Late Data", 
    'vint_early': "Early Vintage", 
    'vint_mid': "Mid Vintage", 
    'vint_late': "Late Vintage", 
    'cb_authors_ext': "Central Bank Author", 
    'ln_neq': "$\\log(\\text{Num. of Eqs.})$",

    # interactions (nonmod)
    'cb_authors_ext:estimated': "Central Bank Author $\\times$ Estimated",
    'cb_authors_ext:ln_neq': "Central Bank Author $\\times$ $\\log($Num. of Eqs.$)$",
    'cb_authors_ext:vint_early': "Central Bank Author $\\times$ Early Vintage",
    'cb_authors_ext:vint_mid': "Central Bank Author $\\times$ Mid Vintage",
    'cb_authors_ext:vint_late': "Central Bank Author $\\times$ Late Vintage",
    'cb_authors_ext:est_early': "Central Bank Author $\\times$ Early Data",
    'cb_authors_ext:est_late': "Central Bank Author $\\times$ Late Data",

    'estimated:ln_neq': "Estimated $\\times$ $\\log($Num. of Eqs.$)$",
    'estimated:vint_early': "Estimated $\\times$ Early Vintage",
    'estimated:vint_mid': "Estimated $\\times$ Mid Vintage",
    'estimated:vint_late': "Estimated $\\times$ Late Vintage",
    'estimated:est_early': "Estimated $\\times$ Early Data",
    'estimated:est_late': "Estimated $\\times$ Late Data",

    'ln_neq:vint_early': "$\\log($Num. of Eqs.$)$ $\\times$ Early Vintage",
    'ln_neq:vint_mid': "$\\log($Num. of Eqs.$)$ $\\times$ Mid Vintage",
    'ln_neq:vint_late': "$\\log($Num. of Eqs.$)$ $\\times$ Late Vintage",
    'ln_neq:est_early': "$\\log($Num. of Eqs.$)$ $\\times$ Early Data",
    'ln_neq:est_late': "$\\log($Num. of Eqs.$)$ $\\times$ Late Data",

    'vint_early:est_early': "Early Vintage $\\times$ Early Data",
    'vint_early:est_late': "Early Vintage $\\times$ Late Data",
    'vint_mid:est_early': "Mid Vintage $\\times$ Early Data",
    'vint_mid:est_late': "Mid Vintage $\\times$ Late Data",
    'vint_late:est_early': "Late Vintage $\\times$ Early Data",
    'vint_late:est_late': "Late Vintage $\\times$ Late Data"
}



'''
properties = ['estimated', 'est_early', 'est_late', 'vint_mid', 'bnkcrdit', 'ntwrth', 'wlth',
              'open', 'other_channel', 'cb_authors_ext', 'ln_neq']
'''


depvar_labels = {'IScurve': 'IS Curve',
                 'infl_per_rr': 'Pi Curve',
                 'sacratio': 'Sacrifice Ratio'}


def significance_stars(pval):
    if pval < 0.01:
        return "***"
    elif pval < 0.05:
        return "**"
    elif pval < 0.10:
        return "*"
    else:
        return ""


def format_coef(param, pval):
    """
    Formats the coefficient with 3 decimal places plus significance stars.
    """
    return f"{param:.3f}{significance_stars(pval)}"


def format_se(std_err):
    """
    Formats the standard error in parentheses, with 3 decimal places.
    """
    return f"({std_err:.3f})"


def generate_latex_tables(stepwise_regs, r2_values, depvars, horizons, var_labels, depvar_labels, outfile=None):
    """
    Given a dictionary of stepwise regression results (stepwise_regs), a list of
    dependent variables (depvars), and a list of horizons, generate nicely
    formatted LaTeX tables with multirow rows for each variable, variable labels,
    and lines at the bottom for R^2 or nobs.

    We double backslash LaTeX commands to avoid Python interpreting escape chars.
    """
    latex_pieces = []
    for depvar in depvars:
        # 2A) Identify which models belong to this dependent variable
        these_models = {}
        for h in horizons:
            key = f"{depvar}{h}"
            if key in stepwise_regs:
                these_models[h] = stepwise_regs[key]

        # If no models found for this depvar, skip
        if not these_models:
            continue

        # 2B) Collect the union of all variable names across these models
        varset = set()
        for h, model in these_models.items():
            varset = varset.union(model.params.index)
        # Sort them in a consistent order, but keep 'Intercept' on top if you prefer
        varlist = sorted(varset, key=lambda v: (v != 'Intercept', v))

        # 2C) Start building the LaTeX string
        latex_str = []
        latex_str.append("\\begin{table}[h!]")
        latex_str.append("\\centering")
        latex_str.append("\\resizebox{0.8\\textwidth}{!}{%")  # Optional scaling
        latex_str.append("\\begin{tabular}{l" + "c"*len(horizons) + "}")
        latex_str.append("\\hline")

        # 2D) First row: label the dependent variable, spanning all columns
        latex_str.append(
            f"\\multicolumn{{{len(horizons)+1}}}{{l}}{{\\textbf{{Dependent Variable: {depvar_labels[depvar]}}}}} \\\\"
        )
        latex_str.append("\\hline")

        # 2E) Print horizon labels, e.g. & (20) & (40) & (60)
        horizon_header = "\\textbf{{Horizon}} & " + " & ".join([f"{h}" for h in horizons]) + " \\\\"
        latex_str.append(horizon_header)
        latex_str.append("\\hline")

        # 2F) For each variable, produce two rows:
        #     (1) multirow w/ var name + coefficients
        #     (2) blank first cell + std errors
        for var in varlist:
            # Build dict for param/std_err across horizons
            coeffs = []
            std_errs = []
            for h in horizons:
                model = these_models.get(h, None)
                if model is not None and var in model.params.index:
                    param = model.params[var]
                    pval  = model.pvalues[var]
                    stderr= model.bse[var]

                    coeffs.append(format_coef(param, pval))
                    std_errs.append(format_se(stderr))
                else:
                    coeffs.append("")
                    std_errs.append("")

            # 2G) Resolve variable label if available, otherwise default
            if var in var_labels:
                varname_latex = var_labels[var]
            else:
                varname_latex = var
            # Escape underscores for LaTeX
            varname_latex = varname_latex.replace("_", "\\_")

            # Multirow lines
            line1 = f"\\multirow{{2}}{{*}}{{{varname_latex}}} & " + " & ".join(coeffs) + " \\\\"
            line2 = " & " + " & ".join(std_errs) + " \\\\"

            latex_str.append(line1)
            latex_str.append(line2)

        # 2H) Now we add lines for number of observations, and (optionally) R^2
        #     We'll build them across the horizons, e.g.: Observations & n1 & n2 & n3
        #     For RLM, there's no built-in R^2, but we can just show placeholders
        latex_str.append("\\hline")

        # Observations line
        nobs_list = []
        for h in horizons:
            model = these_models.get(h, None)
            if model is not None:
                nobs_list.append(str(int(model.nobs)))
            else:
                nobs_list.append("")
        latex_str.append("Observations & " + " & ".join(nobs_list) + " \\\\")

        # R-squared (placeholder or a custom statistic)
        # For RLM there's no rsquared by default, so we just show an example row:
        # If you compute your own pseudo-R^2, replace "... " with that value
        r2_list = []
        for h in horizons:
            r2 = r2_values.get(f'{depvar}{h}', None)
            if r2 is not None:
                # placeholder; replace with something like "f'{model_custom_r2:.3f}'"
                r2_list.append(str(round(r2,3)))
            else:
                r2_list.append("")
        latex_str.append("$R^2$ from Final Weights& " + f'{" & ".join(r2_list)}' + " \\\\")

        # 2I) Wrap up
        latex_str.append("\\hline")
        latex_str.append("\\end{tabular}")
        latex_str.append("}")  # Closes \\resizebox
        latex_str.append(f"\\caption{{Stepwise RLM results for {depvar_labels[depvar]}}}")
        latex_str.append("\\end{table}")
        latex_str.append("\\newpage")

        # Print the LaTeX code for this table
        table_code = "\n".join(latex_str)
        latex_pieces.append(table_code)
        #print(table_code)
        #print("\n\n")  # some space after each table
    
    all_tables = "\n\n".join(latex_pieces)
    if outfile is not None:
        with open(outfile, "w") as f:
            f.write(all_tables)
        print(f"Saved all tables to {outfile}")
    else:
        print(all_tables)
        print("\n\n")

In [6]:
def generate_latex_master_table(stepwise_regs, r2_values, depvars, horizons,
                                var_labels, depvar_labels, outfile=None,
                                table_caption="Stepwise RLM results (all models)"):
    """
    Build ONE LaTeX table with columns for each (depvar, horizon) model.
    Rows are the union of variables across all models. Blank cells where a
    variable didn't enter. Two rows per variable: coef row, then std. error row.

    Requires helper functions:
        - format_coef(beta, pval) -> str
        - format_se(stderr) -> str
    """

    # 1) Determine which (depvar, horizon) columns actually exist
    columns = []
    for depvar in depvars:
        for h in horizons:
            key = f"{depvar}{h}"
            if key in stepwise_regs:
                columns.append((depvar, h, key))  # keep depvar for header grouping

    if not columns:
        print("No models found in stepwise_regs for the provided depvars/horizons.")
        return

    # 2) Union of all variable names across all models
    varset = set()
    for _, _, key in columns:
        varset |= set(stepwise_regs[key].params.index)

    # Keep Intercept on top, then alphabetical
    varlist = sorted(varset, key=lambda v: (v != 'Intercept', v))

    # 3) Build LaTeX
    latex = []
    latex.append("\\begin{table}[h!]")
    latex.append("\\centering")
    latex.append("\\resizebox{0.95\\textwidth}{!}{%")
    latex.append("\\setlength{\\tabcolsep}{8pt}%")
    latex.append("\\begin{tabular}{l" + "c"*len(columns) + "}")
    latex.append("\\hline")

    # 4) Top header row: group by dependent variable
    #    \multicolumn spans the number of horizons included for each depvar
    # Count how many columns per depvar (among those that exist)
    from collections import OrderedDict
    depvar_to_count = OrderedDict()
    for depvar, h, _ in columns:
        depvar_to_count[depvar] = depvar_to_count.get(depvar, 0) + 1

    header_cells = ["\\textbf{Variable}"]
    for depvar, count in depvar_to_count.items():
        label = depvar_labels.get(depvar, depvar).replace("_", "\\_")
        header_cells.append(f"\\multicolumn{{{count}}}{{c}}{{\\textbf{{{label}}}}}")
    latex.append(" & ".join(header_cells) + " \\\\")
    latex.append("\\hline")

    # 5) Second header row: the horizon labels under each depvar group
    subhdr = ["\\textbf{Horizon}"]
    for depvar, count in depvar_to_count.items():
        # For this depvar, list horizons that actually exist in order of `horizons`
        hs_for_dep = [h for d, h, _ in columns if d == depvar]
        # Sort by the order in provided `horizons`
        hs_for_dep = sorted(hs_for_dep, key=lambda x: horizons.index(x) if x in horizons else 10**9)
        subhdr += [str(h) for h in hs_for_dep]
    latex.append(" & ".join(subhdr) + " \\\\")
    latex.append("\\hline")

    # 6) Body: for each variable, print coefficients row and s.e. row
    for var in varlist:
        # Coefs row
        coef_cells = []
        # First cell is the variable's label
        varname = var_labels.get(var, var).replace("_", "\\_")
        coef_cells.append(varname)

        # Follow with one cell per (depvar,h) column in the exact columns order
        for depvar, h, key in columns:
            model = stepwise_regs[key]
            if var in model.params.index:
                beta = model.params[var]
                pval = model.pvalues.get(var, None)
                coef_cells.append(format_coef(beta, pval))
            else:
                coef_cells.append("")  # blank if var not included
        latex.append(" & ".join(coef_cells) + " \\\\")

        # Std error row
        se_cells = [""]  # empty first cell under the var label
        for depvar, h, key in columns:
            model = stepwise_regs[key]
            if var in model.params.index:
                se = model.bse.get(var, None)
                se_cells.append(format_se(se))
            else:
                se_cells.append("")
        latex.append(" & ".join(se_cells) + " \\\\")

    latex.append("\\hline")

    # 7) Observations row
    obs_cells = ["Observations"]
    for _, _, key in columns:
        model = stepwise_regs[key]
        obs_cells.append(str(int(getattr(model, "nobs", "")) or ""))
    latex.append(" & ".join(obs_cells) + " \\\\")

    # 8) R^2 (or user-supplied pseudo-R^2) row
    r2_cells = ["$R^2$ from Final Weights"]
    for depvar, h, key in columns:
        r2 = r2_values.get(key, None)
        r2_cells.append("" if r2 is None else f"{r2:.3f}")
    latex.append(" & ".join(r2_cells) + " \\\\")

    latex.append("\\hline")
    latex.append("\\end{tabular}")
    latex.append("}% end resizebox")
    latex.append(f"\\caption{{{table_caption}}}")
    latex.append("\\end{table}")
    latex.append("\\newpage")

    table_code = "\n".join(latex)

    if outfile is not None:
        with open(outfile, "w") as f:
            f.write(table_code)
        print(f"Saved master table to {outfile}")
    else:
        print(table_code)

# Actual Running of Code

## Variables from only Tables 7 (nominal Rigidities), 8 (real ridigities), and 10 (nonmodel attributes)

### Nominal Rigidities

In [7]:
stepwise_nomrig = {}
r2_nomrig = {}
for depvar in depvars:
    for horizon in horizons:
        stepwise_nomrig[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', nomrig_vars, df, alphas)
        r2_nomrig[f'{depvar}{horizon}'] = get_r2(stepwise_nomrig[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
        print(f'Completed nominal rigidities stepwise regressions for {depvar}{horizon}!')
generate_latex_master_table(stepwise_nomrig, r2_nomrig, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/nominal_rigidities.txt')
#generate_latex_tables(stepwise_nomrig, r2_nomrig, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/nominal_rigidities.txt')

Completed nominal rigidities stepwise regressions for IScurve20!
Completed nominal rigidities stepwise regressions for infl_per_rr20!
Completed nominal rigidities stepwise regressions for sacratio20!
Saved master table to ../output/stepwise_regressions/nominal_rigidities.txt


### Real Rigidities

In [8]:
stepwise_realrig = {}
r2_realrig = {}
for depvar in depvars:
    for horizon in horizons:
        stepwise_realrig[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', realrig_vars, df, alphas)
        r2_realrig[f'{depvar}{horizon}'] = get_r2(stepwise_realrig[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
        print(f'Completed real rigidities stepwise regressions for {depvar}{horizon}!')
generate_latex_master_table(stepwise_realrig, r2_realrig, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/real_rigidities.txt')
# generate_latex_tables(stepwise_realrig, r2_realrig, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/real_rigidities.txt')

Completed real rigidities stepwise regressions for IScurve20!
Completed real rigidities stepwise regressions for infl_per_rr20!
Completed real rigidities stepwise regressions for sacratio20!
Saved master table to ../output/stepwise_regressions/real_rigidities.txt


### Nonmodel Attributes

In [9]:
stepwise_nonmod = {}
r2_nonmod = {}
for depvar in depvars:
    for horizon in horizons:
        stepwise_nonmod[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', nonmod_vars, df, alphas)
        r2_nonmod[f'{depvar}{horizon}'] = get_r2(stepwise_nonmod[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
        print(f'Completed nonmodel attributes stepwise regressions for {depvar}{horizon}!')
generate_latex_master_table(stepwise_nonmod, r2_nonmod, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/nonmod_attributes.txt')
# generate_latex_tables(stepwise_nonmod, r2_nonmod, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/nonmod_attributes.txt')

Completed nonmodel attributes stepwise regressions for IScurve20!
Completed nonmodel attributes stepwise regressions for infl_per_rr20!
Completed nonmodel attributes stepwise regressions for sacratio20!
Saved master table to ../output/stepwise_regressions/nonmod_attributes.txt


### Everything!

In [10]:
stepwise_all = {}
r2_all = {}
for depvar in depvars:
    for horizon in horizons:
        stepwise_all[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', all_vars, df, alphas)
        r2_all[f'{depvar}{horizon}'] = get_r2(stepwise_all[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
        print(f'Completed all variables stepwise regressions for {depvar}{horizon}!')
generate_latex_master_table(stepwise_all, r2_all, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/all.txt')
# generate_latex_tables(stepwise_all, r2_all, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/all.txt')

Completed all variables stepwise regressions for IScurve20!
Completed all variables stepwise regressions for infl_per_rr20!
Completed all variables stepwise regressions for sacratio20!
Saved master table to ../output/stepwise_regressions/all.txt


## All variables and combinations in Nominal Rigidities, Real Rigidities, and Nonmodel Attributes

### Nominal Rigidities

In [11]:
stepwise_nomrig = {}
r2_nomrig = {}
for depvar in depvars:
    for horizon in horizons:
        stepwise_nomrig[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', all_nomrig_vars, df, alphas)
        r2_nomrig[f'{depvar}{horizon}'] = get_r2(stepwise_nomrig[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
        print(f'Completed nominal rigidities stepwise regressions for {depvar}{horizon}!')
generate_latex_master_table(stepwise_nomrig, r2_nomrig, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/all_nominal_rigidities.txt')
#generate_latex_tables(stepwise_nomrig, r2_nomrig, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/nominal_rigidities.txt')

Completed nominal rigidities stepwise regressions for IScurve20!
Completed nominal rigidities stepwise regressions for infl_per_rr20!
Completed nominal rigidities stepwise regressions for sacratio20!
Saved master table to ../output/stepwise_regressions/all_nominal_rigidities.txt


### Real Rigidities

In [12]:
stepwise_realrig = {}
r2_realrig = {}
for depvar in depvars:
    for horizon in horizons:
        stepwise_realrig[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', all_realrig_vars, df, alphas)
        r2_realrig[f'{depvar}{horizon}'] = get_r2(stepwise_realrig[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
        print(f'Completed real rigidities stepwise regressions for {depvar}{horizon}!')
generate_latex_master_table(stepwise_realrig, r2_realrig, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/all_real_rigidities.txt')
# generate_latex_tables(stepwise_realrig, r2_realrig, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/real_rigidities.txt')

Completed real rigidities stepwise regressions for IScurve20!
Completed real rigidities stepwise regressions for infl_per_rr20!
Completed real rigidities stepwise regressions for sacratio20!
Saved master table to ../output/stepwise_regressions/all_real_rigidities.txt


### Nonmodel Attributes

In [13]:
stepwise_nonmod = {}
r2_nonmod = {}
for depvar in depvars:
    for horizon in horizons:
        stepwise_nonmod[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', all_nonmod_vars, df, alphas)
        r2_nonmod[f'{depvar}{horizon}'] = get_r2(stepwise_nonmod[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
        print(f'Completed nonmodel attributes stepwise regressions for {depvar}{horizon}!')
generate_latex_master_table(stepwise_nonmod, r2_nonmod, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/all_nonmod_attributes.txt')
# generate_latex_tables(stepwise_nonmod, r2_nonmod, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/nonmod_attributes.txt')

Completed nonmodel attributes stepwise regressions for IScurve20!
Completed nonmodel attributes stepwise regressions for infl_per_rr20!
Completed nonmodel attributes stepwise regressions for sacratio20!
Saved master table to ../output/stepwise_regressions/all_nonmod_attributes.txt


### Everything!

In [14]:
stepwise_all = {}
r2_all = {}
for depvar in depvars:
    for horizon in horizons:
        stepwise_all[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', all_all_vars, df, alphas)
        r2_all[f'{depvar}{horizon}'] = get_r2(stepwise_all[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
        print(f'Completed all variables stepwise regressions for {depvar}{horizon}!')
generate_latex_master_table(stepwise_all, r2_all, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/all_all.txt')
# generate_latex_tables(stepwise_all, r2_all, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/all.txt')

Completed all variables stepwise regressions for IScurve20!
Completed all variables stepwise regressions for infl_per_rr20!
Completed all variables stepwise regressions for sacratio20!
Saved master table to ../output/stepwise_regressions/all_all.txt


## Old Simple vs Detailed

In [15]:
# stepwise_regs_smp = {}
# r2_values_smp = {}
# for depvar in depvars:
#     for horizon in horizons:
#         stepwise_regs_smp[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', indepvars_simple, df, alphas)
#         r2_values_smp[f'{depvar}{horizon}'] = get_r2(stepwise_regs_smp[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
#         print(f'Completed simple stepwise regressions for {depvar}{horizon}!')


# stepwise_regs_dtd = {}
# r2_values_dtd = {}
# for depvar in depvars:
#     for horizon in horizons:
#         stepwise_regs_dtd[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', indepvars_detailed, df, alphas)
#         r2_values_dtd[f'{depvar}{horizon}'] = get_r2(stepwise_regs_dtd[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
#         print(f'Completed detailed stepwise regressions for {depvar}{horizon}!')

In [16]:
# generate_latex_tables(stepwise_regs_dtd, r2_values_dtd, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/stepwise_together_detailed.txt')

In [17]:
# generate_latex_tables(stepwise_regs_smp, r2_values_smp, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/stepwise_together_simple.txt')

In [18]:
# stepwise_regs_frics_smp = {}
# r2_values_frics_smp = {}
# for depvar in depvars:
#     for horizon in horizons:
#         stepwise_regs_frics_smp[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', frictions_simple, df, alphas)
#         r2_values_frics_smp[f'{depvar}{horizon}'] = get_r2(stepwise_regs_frics_smp[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
#         print(f'Completed frictions simple stepwise regressions for {depvar}{horizon}!')

# generate_latex_tables(stepwise_regs_frics_smp, r2_values_frics_smp, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/stepwise_frictions_simple.txt')


# stepwise_regs_frics_dtd = {}
# r2_values_frics_dtd = {}
# for depvar in depvars:
#     for horizon in horizons:
#         stepwise_regs_frics_dtd[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', frictions_detailed, df, alphas)
#         r2_values_frics_dtd[f'{depvar}{horizon}'] = get_r2(stepwise_regs_frics_dtd[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
#         print(f'Completed frictions detailed stepwise regressions for {depvar}{horizon}!')

# generate_latex_tables(stepwise_regs_frics_dtd, r2_values_frics_dtd, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/stepwise_frictions_detailed.txt')

In [19]:
# stepwise_regs_props_smp = {}
# r2_values_props_smp = {}
# for depvar in depvars:
#     for horizon in horizons:
#         stepwise_regs_props_smp[f'{depvar}{horizon}'] = stepwise_reg(f'{depvar}{horizon}', properties, df, alphas)
#         r2_values_props_smp[f'{depvar}{horizon}'] = get_r2(stepwise_regs_props_smp[f'{depvar}{horizon}'], f'{depvar}{horizon}', df)
#         print(f'Completed properties stepwise regressions for {depvar}{horizon}!')

# generate_latex_tables(stepwise_regs_props_smp, r2_values_props_smp, depvars, horizons, var_labels, depvar_labels, '../output/stepwise_regressions/stepwise_properties.txt')

In [20]:
# 
# notebook_path = "../code/MMB_robreg.ipynb"  # replace with your actual filename

# # Load the notebook
# with open(notebook_path, 'r', encoding='utf-8') as f:
#     nb_node = nbformat.read(f, as_version=4)

# # Convert to Python script
# exporter = PythonExporter()
# source, _ = exporter.from_notebook_node(nb_node)

# # Save as .py file
# py_path = notebook_path.replace(".ipynb", ".py")
# py_path = py_path.replace("MMB_robreg", "do_not_change_this_file_plz_make_all_changes_to_ipynb_version_MMB_robreg")
# with open(py_path, 'w', encoding='utf-8') as f:
#     f.write(source)

# print(f"Notebook exported as {py_path}")
# 

