# Load necessary packages

In [7]:
# import of packages
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.formula.api as smf
from stargazer.stargazer import Stargazer
from random import choices  # To randomly choose clusters
from sklearn.utils import resample 


# Import raw data

In [8]:
# import data
baseline = pd.read_stata('data/baseline.dta')
bok_inflation = pd.read_stata('data/BOK_inflation.dta')
cleanpricedata_y1y2 = pd.read_stata('data/cleanPriceData_Y1Y2.dta')
intensity_obs_short = pd.read_stata('data/intensity_obs_short.dta')
lrfu_select_dataset = pd.read_stata('data/LRFU_select_dataset.dta')
ms1ms2_pooled = pd.read_stata('data/MS1MS2_pooled.dta')
repayment_datay1 = pd.read_stata('data/repayment_dataY1.dta')

# Creating the Tables

## Create Table 1

We start by cleaning the ms1ms2_pooled and baseline data.

In [9]:
# clean ms1ms2_pooled (drop if MS !=2, keep columns oafid and treatMS1MS2, group by oafid and take mean and rename)
ms1ms2_pooled_tab1 = ms1ms2_pooled[ms1ms2_pooled['MS']==2]
ms1ms2_pooled_tab1 = ms1ms2_pooled_tab1[['oafid', 'treatMS1MS2']]
ms1ms2_pooled_tab1 = ms1ms2_pooled_tab1.groupby('oafid', as_index=False).mean()
ms1ms2_pooled_tab1.rename(columns={'treatMS1MS2': 'treat13'}, inplace=True)
print(ms1ms2_pooled_tab1.shape[0]) # checking we have the right number of observations as described in the original article

1019


For the baseline data we note that some of the columns have already been renamed with the suffix `_base` however and thus need to account for this. We however, assume that the data have not been altered in any other way compared to what the do in the `do` file.

In [10]:
# clean baseline data (the stata code indicates that the variables columns 'businessprofitmonth' and 'delta' should be kept, however they have already been renamed to 'businessprofitmonth_base' and 'delta_base')
base_cols = ['oafid', 'logtotcons_base', 'male', 'num_adults', 'num_schoolchildren', 'finished_primary',
                   'finished_secondary', 'cropland', 'num_rooms', 'schoolfees', 'totcons_base', 'logpercapcons_base',
                   'total_cash_savings_base', 'total_cash_savings_trimmed', 'has_savings_acct', 'taken_bank_loan',
                   'taken_informal_loan', 'liquidWealth', 'wagepay', 'businessprofitmonth_base', 'price_avg_diff_pct',
                   'price_expect_diff_pct', 'harvest2011', 'netrevenue2011', 'netseller2011', 'autarkic2011',
                   'maizelostpct2011', 'harvest2012', 'correct_interest', 'digit_recall', 'maizegiver', 'delta_base', 'treatment']
baseline_clean = baseline[base_cols].copy()

# rename columns
baseline_clean.columns = [col + '_base' if not col.endswith('_base') and col != 'oafid' and col != 'treatment' else col for col in baseline_clean.columns]
baseline_clean.rename(columns={'treatment': 'treatment2012'}, inplace=True)

# generate treat12 as bool for treatment and control in 2012
baseline_clean['treat12'] = baseline_clean['treatment2012'].apply(lambda x: x in ['T1', 'T2'])
baseline_clean.loc[baseline_clean['treatment2012'] == '', 'treat12'] = np.nan

  baseline_clean.loc[baseline_clean['treatment2012'] == '', 'treat12'] = np.nan


Now we can merge the two datasets.

In [11]:
# merge baseline_clean and ms1ms2_pooled_clean on oafid
base_ms1ms2_pool = pd.merge(baseline_clean, ms1ms2_pooled_tab1, on='oafid', how='left')

### Create Table 1.

In [12]:
# create table 1
# copy in case we need this later
df_tab1 = base_ms1ms2_pool.copy()
df_tab1['schoolfees_base'] = df_tab1['schoolfees_base']*1000

# var list for table 1
vars_list = [
    "male_base", "num_adults_base", "num_schoolchildren_base", "finished_primary_base",
    "finished_secondary_base", "cropland_base", "num_rooms_base", "schoolfees_base",
    "totcons_base", "logpercapcons_base", "total_cash_savings_base",
    "total_cash_savings_trimmed_base", "has_savings_acct_base", "taken_bank_loan_base",
    "taken_informal_loan_base", "liquidWealth_base", "wagepay_base",
    "businessprofitmonth_base", "price_avg_diff_pct_base",
    "price_expect_diff_pct_base", "harvest2011_base", "netrevenue2011_base",
    "netseller2011_base", "autarkic2011_base", "maizelostpct2011_base",
    "harvest2012_base", "correct_interest_base", "digit_recall_base",
    "maizegiver_base"
]

renaming = {
    "male_base": "Male",
    "num_adults_base": "Number of adults",
    "num_schoolchildren_base": "Children in school",
    "finished_primary_base": "Finished primary school",
    "finished_secondary_base": "Finished secondary school",
    "cropland_base": "Total cropland (acres)",
    "num_rooms_base": "Number of rooms in household",
    "schoolfees_base": "Total school fees",
    "totcons_base": "Average monthly consumption (Ksh)",
    "logpercapcons_base": "Average monthly consumption/capita (log)",
    "total_cash_savings_base": "Total cash savings (Ksh)",
    "total_cash_savings_trimmed_base": "Total cash savings (trim)",
    "has_savings_acct_base": "Has bank savings acct",
    "taken_bank_loan_base": "Taken bank loan",
    "taken_informal_loan_base": "Taken informal loan",
    "liquidWealth_base": "Liquid wealth (Ksh)",
    "wagepay_base": "Off-farm wages (Ksh)",
    "businessprofitmonth_base": "Business profit (Ksh)",
    "price_avg_diff_pct_base": "Avg $\%\Delta$ price Sep-Jun",
    "price_expect_diff_pct_base": "Expect $\%\Delta$ price Sep12-Jun13",
    "harvest2011_base": "2011 LR harvest (bags)",
    "netrevenue2011_base": "Net revenue 2011 (Ksh)",
    "netseller2011_base": "Net seller 2011",
    "autarkic2011_base": "Autarkic 2011",
    "maizelostpct2011_base": "\% maize lost 2011",
    "harvest2012_base": "2012 LR harvest (bags)",
    "correct_interest_base": "Calculated interest correctly",
    "digit_recall_base": "Digit span recall",
    "maizegiver_base": "Maize giver"
}

# function to perform t-tests
def t_test_by_group(df, var, group_var='treat12'):
    group1 = df[df[group_var] == 0][var].dropna()
    group2 = df[df[group_var] == 1][var].dropna()
    t_stat, p_val = stats.ttest_ind(group1, group2, equal_var=False)
    return group1.mean(), group2.mean(), len(group1) + len(group2), t_stat, p_val

# applying t-tests and collecting results
results = []
for var in vars_list:
    control_mean, treat_mean, obs, t_stat, p_val = t_test_by_group(df_tab1, var)
    std_diff = (treat_mean - control_mean) / np.sqrt(((len(df_tab1[df_tab1['treat12'] == 0][var]) - 1) * np.std(df_tab1[df_tab1['treat12'] == 0][var], ddof=1) ** 2 + (len(df_tab1[df_tab1['treat12'] == 1][var]) - 1) * np.std(df_tab1[df_tab1['treat12'] == 1][var], ddof=1) ** 2) / (len(df_tab1[df_tab1['treat12'] == 0][var]) + len(df_tab1[df_tab1['treat12'] == 1][var]) - 2))
    results.append([var, treat_mean, control_mean, obs, std_diff, p_val])

# convert results to a df to use pandas output to latex
results_df = pd.DataFrame(results, columns=['Variable', 'Treat Mean', 'Control Mean', 'Observations', 'Std Diff', 'P-value'])
results_df['Variable'] = results_df['Variable'].map(renaming)
results_df = results_df.rename(columns={
    'Variable':'Baseline characteristic', 
    'Treat Mean':'Treat', 
    'Control Mean':'Control', 
    'Observations':'Obs', 
    'Std Diff':'Std diff', 
    'P-value':'P-val'})

latex_table1 = results_df.to_latex(index=False, float_format="%.3f")
print(latex_table1)

\begin{tabular}{lrrrrr}
\toprule
Baseline characteristic & Treat & Control & Obs & Std diff & P-val \\
\midrule
Male & 0.296 & 0.334 & 1589 & -0.083 & 0.109 \\
Number of adults & 3.004 & 3.196 & 1510 & -0.099 & 0.067 \\
Children in school & 2.998 & 3.072 & 1589 & -0.038 & 0.454 \\
Finished primary school & 0.718 & 0.772 & 1490 & -0.122 & 0.019 \\
Finished secondary school & 0.253 & 0.270 & 1490 & -0.039 & 0.460 \\
Total cropland (acres) & 2.441 & 2.398 & 1512 & 0.014 & 0.796 \\
Number of rooms in household & 3.073 & 3.252 & 1511 & -0.072 & 0.219 \\
Total school fees & 27239.693 & 29813.631 & 1589 & -0.068 & 0.191 \\
Average monthly consumption (Ksh) & 14970.862 & 15371.378 & 1437 & -0.032 & 0.550 \\
Average monthly consumption/capita (log) & 7.975 & 7.963 & 1434 & 0.019 & 0.721 \\
Total cash savings (Ksh) & 5157.396 & 8021.499 & 1572 & -0.128 & 0.028 \\
Total cash savings (trim) & 4731.623 & 5389.836 & 1572 & -0.050 & 0.343 \\
Has bank savings acct & 0.419 & 0.425 & 1589 & -0.012 & 0.8

## Creating Table 5

In [None]:
ms1ms2_pooled_tab5 = ms1ms2_pooled.copy(deep=True)

## Creating Table 6

In [230]:
cleanpricedata_y1y2_tab6 = cleanpricedata_y1y2.copy(deep=True)
cleanpricedata_y1y2_tab6 = cleanpricedata_y1y2_tab6[['salesPrice_trim','hi_1km_wt','hi_3km_wt','hi_5km_wt','monthnum','subloc_1km_wt_grp','subloc_3km_wt_grp','subloc_5km_wt_grp', 'in_sample','MS','lean']]
cleanpricedata_y1y2_tab6['hi'] = pd.NA
cleanpricedata_y1y2_tab6['interact'] = pd.NA
cleanpricedata_y1y2_tab6['interact_lean'] = pd.NA

In [242]:
results = {}
for dist in ['1km_wt', '3km_wt', '5km_wt']:
    df = cleanpricedata_y1y2_tab6.copy(deep=True)
    df.dropna(subset=[f'hi_{dist}','salesPrice_trim','monthnum'], inplace=True)
    mean_price = df[(df['monthnum'] == 0) & (df[f'hi_{dist}'] == 0)]['salesPrice_trim'].mean()
    norm = 100 / mean_price

    # normalize price
    df['salesPrice_trim_norm'] = df['salesPrice_trim'] * norm

    # create hi variable
    df['hi'] = df[f'hi_{dist}']
    df['interact'] = df['monthnum'] * df['hi']

    # regression
    formula = 'salesPrice_trim_norm ~ hi + monthnum + interact'

    for ms in [1,2,3]: # 3 is pooled
        if ms == 3:
            df_filt = df[(df['in_sample'] == 1)]
        else:
            df_filt = df[(df['MS'] == ms) & (df['in_sample'] == 1)]
        model = smf.ols(formula=formula, data=df_filt).fit(cov_type='cluster', cov_kwds={'groups': df_filt[f'subloc_{dist}_grp']})
        results[(dist, ms)] = model

In [292]:
pvals = pd.DataFrame()
# storeing pval in a df
for dv in ['hi', 'monthnum', 'interact']:
    val = {(k[0], k[1]): np.round(v.pvalues[dv],3) for k, v in results.items()}
    pvals[dv] = pd.Series(val)
    
# keep only columns 3km_wt and 3rd column in 1km_wt and 5km_wt
pvals = pvals.T
pvals = pvals[[('3km_wt', 1), ('3km_wt', 2), ('3km_wt', 3), ('1km_wt', 3), ('5km_wt', 3)]]

### Bootstraping

In [167]:
def wild_bootstrap(data, model, n_bootstraps, dv, clust_var):
    """
    Wild Cluster Bootstrap-t with random signs within clusters
    """
    cluster_var = data[clust_var]
    unique_clusters = cluster_var.unique()
    boot_results = []
    
    for _ in range(n_bootstraps):
        boot_data = data.copy()
        # resample residuals within each cluster
        for cluster in unique_clusters:
            cluster_indices = data[cluster_var == cluster].index

            # multiply residuals by random signs, either -1 or 1, within each cluster
            signs = np.random.choice([-1, 1], size=len(cluster_indices))
            boot_data.loc[cluster_indices, dv] = model.predict(data.loc[cluster_indices]) + signs * model.resid.loc[cluster_indices]

        # Refit model on bootstrapped data
        boot_model = smf.ols(model.model.formula, data=boot_data).fit()
        boot_results.append(boot_model.params)
    
    return np.array(boot_results)[:,1:4]


In [290]:
bootstrap_res = {}
bootstrap_pvals = pd.DataFrame(index=pd.MultiIndex.from_product([['1km_wt', '3km_wt', '5km_wt'], [1, 2, 3]], names=['dist', 'ms']), columns=['hi', 'monthnum', 'interact'])
n_bootstraps = 999

for dist  in ['1km_wt', '3km_wt', '5km_wt']:
    df = cleanpricedata_y1y2_tab6.copy(deep=True)
    df.dropna(subset=[f'hi_{dist}','salesPrice_trim','monthnum'], inplace=True)
    mean_price = df[(df['monthnum'] == 0) & (df[f'hi_{dist}'] == 0)]['salesPrice_trim'].mean()
    norm = 100 / mean_price

    # normalize price
    df['salesPrice_trim_norm'] = df['salesPrice_trim'] * norm
    df['salesPrice_trim_norm'] = df['salesPrice_trim_norm'].astype(float)

    # create hi variable
    df['hi'] = df[f'hi_{dist}']
    df['interact'] = df['monthnum'] * df['hi']

    # regression
    formula = 'salesPrice_trim_norm ~ hi + monthnum + interact'

    for ms in [1,2,3]: # 3 is pooled
        if ms == 3:
            df_filt = df[(df['in_sample'] == 1)]
        else:
            df_filt = df[(df['MS'] == ms) & (df['in_sample'] == 1)]
        res = wild_bootstrap(df_filt, results[(dist, ms)], n_bootstraps, 'salesPrice_trim_norm', f'subloc_{dist}_grp')
        bootstrap_res[(dist,ms)] = res

        model = results[(dist, ms)]

        for i, var in enumerate(['hi', 'monthnum', 'interact']):
            observed_coef = model.params[var]
            # calculating p-values as proportion of bootstrap coefs where abs(boot_coef) >= abs(obs_coef)
            p_value = np.round(np.mean(np.abs(bootstrap_res[(dist,ms)][:,i]) >= np.abs(observed_coef)),3)
            
            # store p-value in df 
            bootstrap_pvals.loc[(dist,ms),var] = p_value

In [296]:
bootstrap_pvals = bootstrap_pvals.T
bootstrap_pvals = bootstrap_pvals[[('3km_wt', 1), ('3km_wt', 2), ('3km_wt', 3), ('1km_wt', 3), ('5km_wt', 3)]]

In [301]:
# use stargazer to create a table
result_list = [results[('3km_wt', 1)], results[('3km_wt', 2)], results[('3km_wt', 3)], results[('1km_wt', 3)], results[('5km_wt', 3)]]
stargazer = Stargazer(result_list)

# configure Stargazer object for output
stargazer.custom_columns(['Main Specification (3km)', 'Robustness (Pooled)'], [3, 2])
stargazer.rename_covariates({'hi': 'High', 'monthnum': 'Month', 'interact': 'High x Month'})
stargazer.show_degrees_of_freedom(False)
stargazer.significant_digits(3)
stargazer.covariate_order(['hi', 'monthnum', 'interact'])
# add p-values as a rows 
stargazer.add_line('P-value High', pvals.loc['hi'].values.tolist())
stargazer.add_line('P-value Treat Bootstrap', bootstrap_pvals.loc['hi'].values.tolist())
stargazer.add_line('P-value Month', pvals.loc['monthnum'].values.tolist())
stargazer.add_line('P-value High Bootstrap', bootstrap_pvals.loc['monthnum'].values.tolist())
stargazer.add_line('P-value High x Month', pvals.loc['interact'].values.tolist())
stargazer.add_line('P-value Treat x High Bootstrap', bootstrap_pvals.loc['interact'].values.tolist())


latex_table6 = stargazer.render_latex()

# edit the latex table to add row for telling if Y1 Y2 or Pooled after \\[-1.8ex] & (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) \\
latex_table6 = latex_table6.replace("\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) \\",
                                "\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) \n \\\ & Y1 & Y2 & Pooled & 1km & 5km \\")
latex_table6 = latex_table6.replace("Adjusted $R^2$", "% Adjusted $R^2$")
latex_table6 = latex_table6.replace("Residual Std. Error", "% Residual Std. Error")
latex_table6 = latex_table6.replace("F Statistic", "% F Statistic")
latex_table6 = latex_table6.replace("\\textit{Note","% \\textit{Note")
latex_table6 = latex_table6.replace("\\begin{table}[!htbp] \\centering", "")
latex_table6 = latex_table6.replace("\\end{table}", "")
latex_table6 = latex_table6.replace('salesPrice_trim_norm', 'Normalized sales price')

print(latex_table6)


\begin{tabular}{@{\extracolsep{5pt}}lccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{5}{c}{\textit{Dependent variable: Normalized sales price}} \
\cr \cline{2-6}
\\[-1.8ex] & \multicolumn{3}{c}{Main Specification (3km)} & \multicolumn{2}{c}{Robustness (Pooled)}  \\
\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) 
 \\ & Y1 & Y2 & Pooled & 1km & 5km \\
\hline \\[-1.8ex]
 High & 4.410$^{**}$ & 2.855$^{}$ & 3.970$^{**}$ & 2.787$^{}$ & 3.766$^{**}$ \\
& (2.091) & (1.992) & (1.817) & (1.719) & (1.822) \\
 Month & 1.189$^{***}$ & 1.224$^{***}$ & 1.364$^{***}$ & 1.327$^{***}$ & 1.537$^{***}$ \\
& (0.363) & (0.377) & (0.350) & (0.339) & (0.291) \\
 High x Month & -0.574$^{}$ & -0.476$^{}$ & -0.573$^{}$ & -0.520$^{}$ & -0.835$^{**}$ \\
& (0.422) & (0.459) & (0.386) & (0.390) & (0.366) \\
 P-value High & 0.035 & 0.152 & 0.029 & 0.105 & 0.039 \\
 P-value Treat Bootstrap & 0.505 & 0.537 & 0.477 & 0.493 & 0.494 \\
 P-value Month & 0.001 & 0.001 & 0.0 & 0.0 & 0.0 \\
 P-value High Bootstrap & 0.486

## Creating Table 7

In [14]:
# copy the raw data and create columns for treatment and interaction variable
ms1ms2_pooled_tab7 = ms1ms2_pooled.copy(deep=True)
# filter relevant columns
ms1ms2_pooled_tab7 = ms1ms2_pooled_tab7[['oafid', # id
                                         'treat12', 'treat13', 'treatMS1MS2', # treatment variables
                                         'inventory_trim', 'netrevenue_trim', 'logtotcons_trim', # outcome variables
                                         'Y1round2', 'Y1round3', 'Y2round1', 'Y2round2', 'Y2round3','hi','subloc','interviewdate']] # independent variables

ms1ms2_pooled_tab7.sort_index(inplace=True)
ms1ms2_pooled_tab7['z'] = pd.NA
ms1ms2_pooled_tab7['z_hi'] = pd.NA

### Running the first set of regressions

In [225]:
# list of treaments
treatments = ['treat12', 'treat13', 'treatMS1MS2']

# list of dependent variables
dependent_vars = ['inventory_trim', 'netrevenue_trim', 'logtotcons_trim']

# list of changeing independent variables depending on the treatment
independent_vars = {
    'treat12': 'Y1round2 + Y1round3',
    'treat13': 'Y2round2 + Y2round3',
    'treatMS1MS2': 'Y1round2 + Y1round3 + Y2round1 + Y2round2 + Y2round3'
    }

# empty dictionary to store results
results = {}
pvals = {var: [] for var in ['z', 'hi', 'z_hi','z+z_hi']}

# Simulating the loop to replace variables and run regressions
for dv in dependent_vars:
    for treat in treatments:
        # Stata automatically omits the missing values in the regression – here we have to do it manually so we copy the data and drop variables
        df = ms1ms2_pooled_tab7.copy(deep=True)
        df = df.dropna(subset=[dv, treat, 'hi', 'subloc','interviewdate'])
        # setting treament variable
        df['z'] = df[treat] # setting z to the treatment variable
        
        # setting interaction variable
        df['z_hi'] = df[treat]*df['hi'] # setting z_hi to the interaction of the treatment hi saturation
        
        # setting the formula to run the regression
        formula = f'{dv} ~ z + hi + z_hi + interviewdate + {independent_vars[treat]}'

        # Run the regression
        model_key = f'model_{dependent_vars.index(dv)*len(treatments) + treatments.index(treat)}'
        results[model_key] = smf.ols(formula, data=df).fit(cov_type='cluster', cov_kwds={'groups': df['subloc']})
        # print(results[f'model_{i}'].summary())

        # test the hypothesis that z + z_hi = 0
        hypothesis = 'z + z_hi = 0'
        t_test = results[model_key].t_test(hypothesis)

        # store p-value round to 3 decimals
        pvals['z+z_hi'].append(np.round(t_test.pvalue,3))
        pvals['z'].append(np.round(results[model_key].pvalues['z'],3))
        pvals['hi'].append(np.round(results[model_key].pvalues['hi'],3))
        pvals['z_hi'].append(np.round(results[model_key].pvalues['z_hi'],3))

### Running bootstrap regressions

In [226]:
def wild_bootstrap(data, model, n_bootstraps, dv, clust_var):
    """
    Wild Cluster Bootstrap-t with random signs within clusters
    """
    cluster_var = data[clust_var]
    unique_clusters = cluster_var.unique()
    boot_results = []
    
    for _ in range(n_bootstraps):
        boot_data = data.copy()
        # resample residuals within each cluster
        for cluster in unique_clusters:
            cluster_indices = data[cluster_var == cluster].index

            # multiply residuals by random signs, either -1 or 1, within each cluster
            signs = np.random.choice([-1, 1], size=len(cluster_indices))
            boot_data.loc[cluster_indices, dv] = model.predict(data.loc[cluster_indices]) + signs * model.resid.loc[cluster_indices]

        # Refit model on bootstrapped data
        boot_model = smf.ols(model.model.formula, data=boot_data).fit()
        boot_results.append(boot_model.params)
    
    return np.array(boot_results)[:,1:4]

In [227]:
n_bootstraps = 10  # reported data is based on 1000 iterations
bootstrap_res = {}
bootstrap_pvals = {var: [] for var in ['z', 'hi', 'z_hi']}

for dv in dependent_vars:
    for treat in treatments:
        df = ms1ms2_pooled_tab7.copy(deep=True)
        df = df.dropna(subset=[dv, treat, 'hi', 'subloc', 'interviewdate'])
        df['z'] = df[treat]
        df['z_hi'] = df[treat] * df['hi']
        df[dv] = df[dv].astype(float)
        
        formula = f'{dv} ~ z + hi + z_hi + interviewdate + {independent_vars[treat]}'
        model_key = f'model_{dependent_vars.index(dv)*len(treatments) + treatments.index(treat)}'
        model = results[model_key]

        # Wild bootstrap
        res = wild_bootstrap(df, model, n_bootstraps, dv, 'subloc')
        bootstrap_res[model_key] = res

        for i, var in enumerate(['z', 'hi', 'z_hi']):
            observed_coef = model.params[var]
            # calculating p-values as proportion of bootstrap coefs where abs(boot_coef) >= abs(obs_coef)
            p_value = np.mean(np.abs(bootstrap_res[model_key][:,i]) >= np.abs(observed_coef))
            bootstrap_pvals[var].append(p_value)



### Output code to LaTeX

In [100]:
# use stargazer to create a table
result_list = list(results.values())
stargazer = Stargazer(result_list)

# configure Stargazer object for output
stargazer.custom_columns(['Inventory', 'Net Revenues', 'Consumption'], [3, 3, 3])
stargazer.rename_covariates({'z': 'Treat', 'hi': 'High', 'z_hi': 'Treat x High'})
stargazer.show_degrees_of_freedom(False)
stargazer.significant_digits(3)
stargazer.covariate_order(['z', 'hi', 'z_hi'])
# add p-values as a rows 
stargazer.add_line('P-value T + TH = 0', pvals['z+z_hi'])
stargazer.add_line('P-value Treat', pvals['z'])
stargazer.add_line('P-value Treat Bootstrap', bootstrap_pvals['z'])
stargazer.add_line('P-value High', pvals['hi'])
stargazer.add_line('P-value High Bootstrap', bootstrap_pvals['hi'])
stargazer.add_line('P-value Treat x High', pvals['z_hi'])
stargazer.add_line('P-value Treat x High Bootstrap', bootstrap_pvals['z_hi'])


latex_table7 = stargazer.render_latex()

# edit the latex table to add row for telling if Y1 Y2 or Pooled after \\[-1.8ex] & (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) \\
latex_table7 = latex_table7.replace("\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) \\",
                                "\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) \n \\\ & Y1 & Y2 & Pooled & Y1 & Y2 & Pooled & Y1 & Y2 & Pooled \\")
latex_table7 = latex_table7.replace("Adjusted $R^2$", "% Adjusted $R^2$")
latex_table7 = latex_table7.replace("Residual Std. Error", "% Residual Std. Error")
latex_table7 = latex_table7.replace("F Statistic", "% F Statistic")
latex_table7 = latex_table7.replace("\\textit{","% \\textit{")
latex_table7 = latex_table7.replace("\\begin{table}[!htbp] \\centering", "")
latex_table7 = latex_table7.replace("\\end{table}", "")

print(latex_table7)


\begin{tabular}{@{\extracolsep{5pt}}lccccccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
\\[-1.8ex] & \multicolumn{3}{c}{Inventory} & \multicolumn{3}{c}{Net Revenues} & \multicolumn{3}{c}{Consumption}  \\
\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) 
 \\ & Y1 & Y2 & Pooled & Y1 & Y2 & Pooled & Y1 & Y2 & Pooled \\
\hline \\[-1.8ex]
 Treat & 0.759$^{***}$ & 0.546$^{***}$ & 0.740$^{***}$ & 1059.602$^{**}$ & 1193.768$^{*}$ & 1101.389$^{**}$ & 0.012$^{}$ & -0.051$^{}$ & -0.011$^{}$ \\
& (0.189) & (0.185) & (0.155) & (437.732) & (685.048) & (430.091) & (0.040) & (0.040) & (0.023) \\
 High & 0.124$^{}$ & -0.028$^{}$ & 0.017$^{}$ & 533.903$^{}$ & -152.603$^{}$ & 164.936$^{}$ & -0.003$^{}$ & -0.084$^{}$ & -0.047$^{}$ \\
& (0.355) & (0.219) & (0.241) & (551.179) & (558.948) & (479.685) & (0.051) & (0.053) & (0.043) \\
 Treat*High & -0.333$^{}$ & -0.065$^{}$ & -0.291$^{}$ & -1114.628$^{**}$ & -555.215$^{}$ & -816.770$^{}$ & -0.013$^{}$ & 0.174$^{***}$ & 0.067$^{*}$ \\
& (0.229) & (