In [None]:

# Importing necessary libraries
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

# Enhanced function for downloading data from a URL, returning a pandas DataFrame
def download_dataset(url):
    """
    Downloads a dataset from a specified URL and returns a pandas DataFrame.

    Parameters:
    - url (str): The URL of the CSV file to be downloaded.

    Returns:
    - DataFrame: The dataset loaded into a pandas DataFrame.
    """
    return pd.read_csv(url)

# URLs for datasets to be downloaded
dataset_urls = {
    "data_1980_ages_30_39": "https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/main/1980_30_39_all_data.csv",
    "data_1980_ages_40_49_base": "https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/main/1980_40_49_base_data.csv",
    "data_1980_ages_40_49_extra": "https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/main/1980_40_49_extra_data.csv",
    "data_1980_ages_50_59_base": "https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/main/1980_50_59_base_data.csv"
}

# Downloading and storing datasets in a dictionary for easy access
datasets = {name: download_dataset(url) for name, url in dataset_urls.items()}

# Example usage of a dataset
df_example = datasets["data_1980_ages_30_39"]

In [None]:
# Assuming pandas (pd) and matplotlib.pyplot (plt) have already been imported

# Function to load and preprocess the dataset
def preprocess_data(url):
    """
    Loads the dataset from the specified URL and preprocesses it by calculating
    the average years of education for each birth quarter of each year.
    
    Parameters:
    - url (str): URL of the dataset to load.
    
    Returns:
    - DataFrame: Preprocessed dataset with an additional column for the YearQuarterIndex.
    """
    df = pd.read_csv(url)
    grouped = df.groupby(['BIRTHYR', 'BIRTHQTR'])['YRSED'].mean().reset_index()
    grouped['YearQuarterIndex'] = grouped['BIRTHYR'] + (grouped['BIRTHQTR'] - 1) / 4
    return grouped

# Function to plot the data
def plot_education_by_birth_quarter(data):
    """
    Plots the average years of education by birth quarter and year.
    
    Parameters:
    - data (DataFrame): The preprocessed dataset with YearQuarterIndex.
    """
    plt.figure(figsize=(12, 8))
    plt.plot(data['YearQuarterIndex'], data['YRSED'], '-o', label='Average Years of Education')
    for idx, row in data.iterrows():
        quarter_annotation = f'Q{int(row["BIRTHQTR"])}'
        plt.annotate(quarter_annotation, (row['YearQuarterIndex'], row['YRSED']), textcoords="offset points", xytext=(0,5), ha='center')
    plt.xlabel('Year and Quarter')
    plt.ylabel('Average Years of Education')
    plt.title('Average Years of Education by Birth Quarter and Year')
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# URL of the dataset
url = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_30_39_all_data.csv'

# Processing and plotting
data = preprocess_data(url)
plot_education_by_birth_quarter(data)


In [None]:

# Assuming the preprocess_data and plot_education_by_birth_quarter functions have already been defined

# URL of the new dataset
url_new_dataset = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_40_49_base_data.csv'

# Processing and plotting for the new dataset
data_new = preprocess_data(url_new_dataset)
plot_education_by_birth_quarter(data_new)


In [None]:

# Assuming the preprocess_data and plot_education_by_birth_quarter functions have already been defined

# URL of the dataset for the years 1950-1959
url_1950s_dataset = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_50_59_base_data.csv'

# Processing and plotting for the 1950s dataset
data_1950s = preprocess_data(url_1950s_dataset)
plot_education_by_birth_quarter(data_1950s)


In [None]:

# Assuming pandas (pd), numpy (np), and statsmodels (sm) have already been imported

def load_and_prepare_data(url):
    """Load data and drop rows with missing values for specified columns."""
    df = pd.read_csv(url)
    df.dropna(subset=['BIRTHQTR', 'WEEKERN', 'YRSED'], inplace=True)
    return df

def calculate_means(df):
    """Calculate means for specific groups and differences."""
    first_quarter = df[df['BIRTHQTR'] == 1]
    last_three_quarters = df[df['BIRTHQTR'].isin([2, 3, 4])]
    mean_ln_earnings_fq = np.log(first_quarter['WEEKERN']).mean()
    mean_ln_earnings_l3q = np.log(last_three_quarters['WEEKERN']).mean()
    mean_education_fq = first_quarter['YRSED'].mean()
    mean_education_l3q = last_three_quarters['YRSED'].mean()
    return mean_ln_earnings_fq, mean_ln_earnings_l3q, mean_education_fq, mean_education_l3q

def calculate_wald_estimate(mean_ln_earnings_fq, mean_ln_earnings_l3q, mean_education_fq, mean_education_l3q):
    """Calculate the Wald estimate based on differences in means."""
    difference_ln_earnings = mean_ln_earnings_fq - mean_ln_earnings_l3q
    difference_education = mean_education_fq - mean_education_l3q
    wald_estimate = difference_ln_earnings / difference_education
    return wald_estimate, difference_ln_earnings, difference_education

def perform_ols_regression(df):
    """Perform OLS regression of log weekly earnings on years of education."""
    y = np.log(df['WEEKERN'])
    X = df['YRSED']
    X = sm.add_constant(X)  # Add constant term to the predictor
    model = sm.OLS(y, X).fit()
    return model.params['YRSED']

# Load and prepare data
url = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_30_39_all_data.csv'
df = load_and_prepare_data(url)

# Calculate means and differences
mean_ln_earnings_fq, mean_ln_earnings_l3q, mean_education_fq, mean_education_l3q = calculate_means(df)
wald_estimate, difference_ln_earnings, difference_education = calculate_wald_estimate(mean_ln_earnings_fq, mean_ln_earnings_l3q, mean_education_fq, mean_education_l3q)

# Perform OLS regression
ols_estimate = perform_ols_regression(df)

# Compile results
results = pd.DataFrame({
    'Statistic': ['ln Weekly Wage', 'Education', 'Wald Estimate', 'OLS Estimate'],
    'First Quarter': [mean_ln_earnings_fq, mean_education_fq, '-', '-'],
    'Last Three Quarters': [mean_ln_earnings_l3q, mean_education_l3q, '-', '-'],
    'Difference': [difference_ln_earnings, difference_education, wald_estimate, ols_estimate]
})

# Display results
print(results)


In [None]:
""""

*OLS*
"""

In [None]:
# Assuming pandas (pd), numpy (np), and statsmodels.api (sm) have already been imported

def load_and_clean_data(url):
    """Load data and drop rows with missing values."""
    df = pd.read_csv(url)
    df.dropna(inplace=True)
    return df

def add_birth_year_dummies(df, start_year, end_year):
    """Add birth year dummy variables to the DataFrame."""
    for year in range(start_year, end_year+1):
        df[f'BirthYearDummy_{year}'] = (df['BIRTHYR'] == year).astype(int)

def perform_ols_regression(df, dependent_var, independent_vars):
    """Perform OLS regression and return the coefficient and standard error for 'YRSED'."""
    Y = np.log(df[dependent_var])
    X = df[independent_vars]
    X = sm.add_constant(X)
    model = sm.OLS(Y, X).fit()
    return round(model.params['YRSED'], 4), round(model.bse['YRSED'], 4)

# Load and clean data
url = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_30_39_all_data.csv'
df = load_and_clean_data(url)

# Add birth year dummies
add_birth_year_dummies(df, 1930, 1939)

# Define the dependent and independent variables
dependent_var = 'WEEKERN'
independent_vars = ['YRSED'] + [f'BirthYearDummy_{year}' for year in range(1930, 1940)]

# Perform OLS regression
YRSED_coef_ols, YRSED_std_err_ols = perform_ols_regression(df, dependent_var, independent_vars)

# Print the results
print(f'Coefficient for YRSED: {YRSED_coef_ols}, Standard Error: {YRSED_std_err_ols}')


In [None]:
# Assuming pandas (pd), numpy (np), and statsmodels.api (sm) have already been imported

def load_clean_data(url):
    """Load and clean data."""
    df = pd.read_csv(url)
    df.dropna(inplace=True)
    return df

def first_stage_regression(df, dependent_var, instrument_var):
    """Perform the first stage of TSLS."""
    endog = df[dependent_var]
    exog = sm.add_constant(df[instrument_var])
    model = sm.OLS(endog, exog).fit(cov_type='HC0')
    df[f'{dependent_var}_predicted'] = model.predict()
    return df

def second_stage_regression(df, dependent_var, independent_vars):
    """Perform the second stage of TSLS."""
    Y = np.log(df[dependent_var])
    X = sm.add_constant(df[independent_vars])
    model = sm.OLS(Y, X).fit(cov_type='HC0')
    return round(model.params[f'{independent_vars[0]}'], 4), round(model.bse[f'{independent_vars[0]}'], 4)

# Load and clean data
url = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_30_39_all_data.csv'
df = load_clean_data(url)

# Perform first stage regression
df = first_stage_regression(df, 'YRSED', 'BIRTHQTR')

# Add birth year dummies
for year in range(1930, 1940):
    df[f'BirthYearDummy_{year}'] = (df['BIRTHYR'] == year).astype(int)

# Perform second stage regression
YRSED_coef, YRSED_std_err = second_stage_regression(df, 'WEEKERN', ['YRSED_predicted'] + [f'BirthYearDummy_{year}' for year in range(1930, 1940)])

# Prepare and display results
data = {
    'Coefficient': [YRSED_coef],
    'Standard Error': [YRSED_std_err]
}
results_df = pd.DataFrame(data, index=['TSLS Estimate'])
print(results_df)


In [None]:
import statsmodels.api as sm

# Load the dataset
url = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_30_39_all_data.csv'
df = pd.read_csv(url)

# Drop rows with missing values
df.dropna(inplace=True)

# Create birth year dummy variables
for year in range(1930, 1940):
    df[f'BirthYearDummy_{year}'] = (df['BIRTHYR'] == year).astype(int)

# Define the dependent variable (Y) as the natural logarithm of WEEKERN
Y = np.log(df['WEEKERN'])
# Include 'YRSED', birth year dummies, 'AGE', and 'AGESQR' as independent variables
independent_vars = ['YRSED'] + [f'BirthYearDummy_{year}' for year in range(1930, 1940)] + ['AGE', 'AGESQR']
X = df[independent_vars]

# Add a constant term to the independent variables
X = sm.add_constant(X)

# Fit the OLS regression model
ols_model = sm.OLS(Y, X).fit()

# Extract the coefficient and standard error for 'YRSED'
YRSED_coef_ols_2 = round(ols_model.params['YRSED'], 4)
YRSED_std_err_ols_2 = round(ols_model.bse['YRSED'], 4)

# Extract coefficients and standard errors for 'AGE' and 'AGESQR'
age_coef = round(ols_model.params['AGE'], 4)
age_std_err = round(ols_model.bse['AGE'], 4)

agesqr_coef = round(ols_model.params['AGESQR'], 4)
agesqr_std_err = round(ols_model.bse['AGESQR'], 4)

# The code hints at a follow-up TSLS analysis, indicated by the comment at the end.


In [None]:
# Load the dataset
url = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_30_39_all_data.csv'
df = pd.read_csv(url)

# Drop rows with missing values
df.dropna(inplace=True)

# First stage: Regress 'YRSED' on 'BIRTHQTR' and a constant to use 'BIRTHQTR' as an instrumental variable
endog_first_stage = df['YRSED']
exog_first_stage = sm.add_constant(df['BIRTHQTR'])
first_stage_model = sm.OLS(endog_first_stage, exog_first_stage).fit(cov_type='HC0')  # Using robust standard errors

# Predict 'YRSED' using the fitted first-stage model
df['YRSED_predicted'] = first_stage_model.predict()

# Second stage: Regress the natural logarithm of 'WEEKERN' on the predicted 'YRSED' from the first stage
# Create birth year dummy variables
for year in range(1930, 1940):
    df[f'BirthYearDummy_{year}'] = (df['BIRTHYR'] == year).astype(int)

# Define dependent and independent variables for the second stage, including 'AGE', 'AGESQR', and birth year dummies
covariate_columns = ['YRSED_predicted', 'AGE', 'AGESQR'] + [f'BirthYearDummy_{year}' for year in range(1930, 1940)]
endog_second_stage = np.log(df['WEEKERN'])  # Natural logarithm of 'WEEKERN'
exog_second_stage = sm.add_constant(df[covariate_columns])
second_stage_model = sm.OLS(endog_second_stage, exog_second_stage).fit(cov_type='HC0')  # Using robust standard errors

# Extract 'YRSED' coefficient and standard error from the second stage
YRSED_coef_second_stage_2 = round(second_stage_model.params['YRSED_predicted'], 4)
YRSED_std_err_second_stage_2 = round(second_stage_model.bse['YRSED_predicted'], 4)

# Extract coefficients and standard errors for 'AGE' and 'AGESQR' from the second stage model
age_coef_second_stage = round(second_stage_model.params['AGE'], 4)
age_std_err_second_stage = round(second_stage_model.bse['AGE'], 4)
agesqr_coef_second_stage = round(second_stage_model.params['AGESQR'], 4)
agesqr_std_err_second_stage = round(second_stage_model.bse['AGESQR'], 4)

# Create a DataFrame for displaying results
data = {
    'OLS': [YRSED_coef_ols_2, YRSED_std_err_ols_2, age_coef, age_std_err, agesqr_coef, agesqr_std_err, "-", "-", "-", "-", "-", "-"],
    'TSLS': [YRSED_coef_second_stage_2, YRSED_std_err_second_stage_2, age_coef_second_stage, age_std_err_second_stage, agesqr_coef_second_stage, agesqr_std_err_second_stage, "-", "-", "-", "-", "-", "-"]
}
index = ['Years of Education', '', 'AGE', '', 'AGESQR', '', 'Married (1= married)', '', 'Race (1= black)', '', 'Place of Work (1= center city)', '']
df_2_output = pd.DataFrame(data, index=index)

# Display the DataFrame
print(df_2_output)


In [None]:
# Load the dataset
url = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_30_39_all_data.csv'
df = pd.read_csv(url)

# Drop rows with missing values
df.dropna(inplace=True)

# Create birth year dummy variables
for year in range(1930, 1940):
    df[f'BirthYearDummy_{year}'] = (df['BIRTHYR'] == year).astype(int)

# Create dummy variables for marital status, race, place of work type, and region
df['MARST_dummy'] = (df['MARST'] == 1).astype(int)
df['Race_dummy'] = (df['RACE'] == 2).astype(int)
df['PWTYPE_dummy'] = df['PWTYPE'].isin([1, 2, 3]).astype(int)
df['MW_dummy'] = (df['REGION2'] == 'MW').astype(int)
df['South_dummy'] = (df['REGION2'] == 'South').astype(int)
df['NE_dummy'] = (df['REGION2'] == 'NE').astype(int)
df['West_dummy'] = (df['REGION2'] == 'West').astype(int)

# First stage: Regress 'YRSED' on 'BIRTHQTR' with a constant to use 'BIRTHQTR' as an instrumental variable
endog_first_stage = df['YRSED']
exog_first_stage = sm.add_constant(df['BIRTHQTR'])
first_stage_model = sm.OLS(endog_first_stage, exog_first_stage).fit(cov_type='HC0')  # Using robust standard errors

# Predict 'YRSED' using the fitted first-stage model
df['YRSED_predicted'] = first_stage_model.predict()

# Second stage: Regress the natural logarithm of 'WEEKERN' on predicted 'YRSED' and other variables
covariate_columns = ['YRSED_predicted', 'MARST_dummy', 'Race_dummy', 'PWTYPE_dummy', 'MW_dummy', 'South_dummy', 'NE_dummy', 'West_dummy'] + [f'BirthYearDummy_{year}' for year in range(1930, 1940)]

# Select explanatory variables for the second stage
exog_second_stage = sm.add_constant(df[covariate_columns])
endog_second_stage = np.log(df['WEEKERN'])  # Natural logarithm of 'WEEKERN'

second_stage_model = sm.OLS(endog_second_stage, exog_second_stage).fit(cov_type='HC0')  # Using robust standard errors

# Extract coefficients and standard errors from the second stage
YRSED_coef_second_stage_3 = round(second_stage_model.params['YRSED_predicted'], 4)
YRSED_std_err_second_stage_3 = round(second_stage_model.bse['YRSED_predicted'], 4)

# Extract coefficients and standard errors for dummy variables
MARST_dummy_coef_1 = round(second_stage_model.params['MARST_dummy'], 4)
MARST_dummy_std_err_1 = round(second_stage_model.bse['MARST_dummy'], 4)
Race_dummy_coef_1 = round(second_stage_model.params['Race_dummy'], 4)
Race_dummy_std_err_1 = round(second_stage_model.bse['Race_dummy'], 4)
PWTYPE_dummy_coef_1 = round(second_stage_model.params['PWTYPE_dummy'], 4)
PWTYPE_dummy_std_err_1 = round(second_stage_model.bse['PWTYPE_dummy'], 4)

# Create DataFrame for displaying results
data = {
    'OLS': [YRSED_coef_ols_3, YRSED_std_err_ols_3, "-", "-", "-", "-", MARST_dummy_coef_ols_1, MARST_dummy_std_err_ols_1, Race_dummy_coef_ols_1, Race_dummy_std_err_ols_1, PWTYPE_dummy_coef_ols_1, PWTYPE_dummy_std_err_ols_1],
    'TSLS': [YRSED_coef_second_stage_3, YRSED_std_err_second_stage_3, "-", "-", "-", "-", MARST_dummy_coef_1, MARST_dummy_std_err_1, Race_dummy_coef_1, Race_dummy_std_err_1, PWTYPE_dummy_coef_1, PWTYPE_dummy_std_err_1]
}
index = ['Years of Education', '', 'AGE', '', 'AGESQR', '', 'Married (1= married)', '', 'Race (1= black)', '', 'Place of Work (1= center city)', '']
df_3_output = pd.DataFrame(data, index=index)

# Display DataFrame
print(df_3_output)


In [None]:
# Load the dataset
url = 'https://raw.githubusercontent.com/mgstevens02/Compulsory-Education-IV-Replication/cb1cddf3b7ed8b127357f5aea2caf64b97b5ef7b/1980_30_39_all_data.csv'
df = pd.read_csv(url)

# Drop rows with missing values
df.dropna(inplace=True)

# Create birth year dummy variables
for year in range(1930, 1940):
    df[f'BirthYearDummy_{year}'] = (df['BIRTHYR'] == year).astype(int)

# Create dummy variables for marital status, race, and place of work type
df['MARST_dummy'] = (df['MARST'] == 1).astype(int)
df['Race_dummy'] = (df['RACE'] == 2).astype(int)
df['PWTYPE_dummy'] = df['PWTYPE'].isin([1, 2, 3]).astype(int)

# Define the dependent variable (Y) as the natural logarithm of WEEKERN and the independent variables (X)
Y = np.log(df['WEEKERN'])
independent_vars = ['YRSED'] + [f'BirthYearDummy_{year}' for year in range(1930, 1940)] + ['MARST_dummy', 'Race_dummy', 'PWTYPE_dummy', 'AGE', 'AGESQR']
X = df[independent_vars]

# Add a constant term to the independent variables
X = sm.add_constant(X)

# Fit the OLS regression model
ols_model = sm.OLS(Y, X).fit()

# Extract the coefficient and standard error for 'YRSED'
YRSED_coef_ols_4 = round(ols_model.params['YRSED'], 4)
YRSED_std_err_ols_4 = round(ols_model.bse['YRSED'], 4)

# Extract coefficients and standard errors for 'AGE' and 'AGESQR'
age_coef_2 = round(ols_model.params['AGE'], 4)
age_std_err_2 = round(ols_model.bse['AGE'], 4)
agesqr_coef_2 = round(ols_model.params['AGESQR'], 4)
agesqr_std_err_2 = round(ols_model.bse['AGESQR'], 4)

# Extract coefficients and standard errors for the dummy variables
MARST_dummy_coef_ols_2 = round(ols_model.params['MARST_dummy'], 4)
MARST_dummy_std_err_ols_2 = round(ols_model.bse['MARST_dummy'], 4)
Race_dummy_coef_ols_2 = round(ols_model.params['Race_dummy'], 4)
Race_dummy_std_err_ols_2 = round(ols_model.bse['Race_dummy'], 4)
PWTYPE_dummy_coef_ols_2 = round(ols_model.params['PWTYPE_dummy'], 4)
PWTYPE_dummy_std_err_ols_2 = round(ols_model.bse['PWTYPE_dummy'], 4)


In [None]:
# Load the dataset
url = 'YOUR_DATASET_URL_HERE'
df = pd.read_csv(url)

# Drop rows with missing values
df.dropna(inplace=True)

# Function to create dummy variables
def create_dummies(df, column_name, prefix):
    unique_values = df[column_name].unique()
    for value in unique_values:
        df[f'{prefix}_{value}'] = (df[column_name] == value).astype(int)

# Create dummy variables for categorical features
create_dummies(df, 'BIRTHYR', 'BirthYearDummy')
create_dummies(df, 'MARST', 'MARST_dummy')
create_dummies(df, 'RACE', 'Race_dummy')
create_dummies(df, 'PWTYPE', 'PWTYPE_dummy')
create_dummies(df, 'REGION2', 'Region_dummy')

# First stage regression
# Define endogenous and exogenous variables
endog_first_stage = df['YRSED']
exog_first_stage = sm.add_constant(df['BIRTHQTR'])

# Fit the first stage model
first_stage_model = sm.OLS(endog_first_stage, exog_first_stage).fit(cov_type='HC0')

# Predict YRSED using the fitted model
df['YRSED_predicted'] = first_stage_model.predict()

# Second stage regression
# Define dependent variable and independent variables including predicted YRSED
Y = np.log(df['WEEKERN'])
independent_vars = ['YRSED_predicted'] + [col for col in df.columns if 'dummy' in col]
X = sm.add_constant(df[independent_vars])

# Fit the second stage model
second_stage_model = sm.OLS(Y, X).fit(cov_type='HC0')

# Extract coefficients and standard errors
results = {
    'Coefficient': second_stage_model.params.round(4),
    'Standard Error': second_stage_model.bse.round(4)
}
results_df = pd.DataFrame(results)

# Display the results
print(results_df)


In [None]:
# Assuming df_1_output, df_2_output, df_3_output, and df_4_output are your DataFrames

# Rename the "OLS" and "TSLS" columns for each DataFrame to distinguish between different model results
df_1_output.rename(columns={'OLS': 'OLS (1)', 'TSLS': 'TSLS (1)'}, inplace=True)
df_2_output.rename(columns={'OLS': 'OLS (2)', 'TSLS': 'TSLS (2)'}, inplace=True)
df_3_output.rename(columns={'OLS': 'OLS (3)', 'TSLS': 'TSLS (3)'}, inplace=True)
df_4_output.rename(columns={'OLS': 'OLS (4)', 'TSLS': 'TSLS (4)'}, inplace=True)

# Reset the index of each DataFrame to ensure clean concatenation
df_1_output.reset_index(drop=True, inplace=True)
df_2_output.reset_index(drop=True, inplace=True)
df_3_output.reset_index(drop=True, inplace=True)
df_4_output.reset_index(drop=True, inplace=True)

# Concatenate the DataFrames along the columns axis (axis=1) to create a combined DataFrame
combined_df = pd.concat([df_1_output, df_2_output, df_3_output, df_4_output], axis=1)

# Add appropriate row names to enhance readability
row_names = ['Years of Education', '', 'AGE', '', 'AGESQR', '', 'Married (1= married)', '', 'Race (1= black)', '', 'Place of Work (1= center city)', '']
combined_df.index = row_names[:len(combined_df.index)]

# Print the combined DataFrame to display results from all models and specifications
print(combined_df)


In [None]:
# Function to rename columns and reset index for a given dataframe
def prepare_dataframe(df, ols_suffix, tsls_suffix):
    """
    Renames OLS and TSLS columns with specified suffixes and resets the dataframe index.
    
    Parameters:
    - df: The dataframe to modify.
    - ols_suffix (str): Suffix to append to 'OLS' column names.
    - tsls_suffix (str): Suffix to append to 'TSLS' column names.
    
    Returns:
    - DataFrame: The modified dataframe.
    """
    df.rename(columns={'OLS': f'OLS {ols_suffix}', 'TSLS': f'TSLS {tsls_suffix}'}, inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

# Assuming df_1_output, df_2_output, df_3_output, and df_4_output are your dataframes
# Prepare each dataframe
df_1_prepared = prepare_dataframe(df_1_output, '(1)', '(1)')
df_2_prepared = prepare_dataframe(df_2_output, '(2)', '(2)')
df_3_prepared = prepare_dataframe(df_3_output, '(3)', '(3)')
df_4_prepared = prepare_dataframe(df_4_output, '(4)', '(4)')

# Concatenate the prepared dataframes along the column axis
combined_df = pd.concat([df_1_prepared, df_2_prepared, df_3_prepared, df_4_prepared], axis=1)

# Define row names for the combined dataframe
row_names = ['Years of Education', '', 'AGE', '', 'AGESQR', '', 'Married (1= married)', '', 'Race (1= black)', '', 'Place of Work (1= center city)', '']
combined_df.index = row_names[:len(combined_df.index)]

# Display the combined dataframe
print(combined_df)


