# Table 1

`pip install pandas statsmodels linearmodels`

In [37]:
import pandas as pd
from linearmodels.panel import PanelOLS

df = pd.read_stata('dataset_for_aer_pnp.dta')

df = df.set_index(['fips', 'year'])

independent_vars_black = ['b_urban', 'b_highschoolgrad', 'b_femalehead', 'b_age', 'b_agesq']
independent_vars_white = ['w_urban', 'w_highschoolgrad', 'w_femalehead', 'w_age', 'w_agesq']
independent_vars_gap = ['black_white_urban', 'black_white_highschool', 'black_white_female', 'black_white_mean_age', 'black_white_mean_agesq']

df['constant'] = 1

# Run panel regression
def run_regression(dependent_var, independent_vars, data):
    independent_vars_with_free = independent_vars + ['free', 'constant']
    model = PanelOLS(data[dependent_var], data[independent_vars_with_free], entity_effects=True, time_effects=True)
    results = model.fit(cov_type='clustered', cluster_entity=True)
    print(results.summary)

# Filter out states with low black populations
def filter_low_black_population(data):
    return data[data['lowblack2pct'] != 1]

# Run the regressions for all states (columns 1, 2, and 3)
print("Running regressions for all states")
run_regression('ln_realb_medianhhincome', independent_vars_black, df)
run_regression('ln_realw_medianhhincome', independent_vars_white, df)
run_regression('ln_gap', independent_vars_gap, df)

# Run the regressions excluding states with low black populations (columns 4, 5, and 6)
df_filtered = filter_low_black_population(df)
print("Running regressions excluding low black population states")
run_regression('ln_realb_medianhhincome', independent_vars_black, df_filtered)
run_regression('ln_realw_medianhhincome', independent_vars_white, df_filtered)
run_regression('ln_gap', independent_vars_gap, df_filtered)

Running regressions for all states
                             PanelOLS Estimation Summary                             
Dep. Variable:     ln_realb_medianhhincome   R-squared:                        0.0622
Estimator:                        PanelOLS   R-squared (Between):             -0.1317
No. Observations:                      200   R-squared (Within):              -0.1236
Date:                     Fri, May 24 2024   R-squared (Overall):             -0.1289
Time:                             13:46:56   Log-likelihood                    158.86
Cov. Estimator:                  Clustered                                           
                                             F-statistic:                      1.5585
Entities:                               50   P-value                           0.1636
Avg Obs:                            4.0000   Distribution:                   F(6,141)
Min Obs:                            4.0000                                           
Max Obs:           

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
