
# Exporting Tables and Figures for Papers & Reports

This notebook demonstrates how to:
- Run regressions
- Create publication-ready regression tables
- Export tables to LaTeX and Excel
- Export figures to PDF and PNG

Audience: Economists with basic Python knowledge


In [None]:

# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from pathlib import Path



## Create Sample Data
Synthetic income-style data for demonstration purposes.


In [None]:

np.random.seed(0)
n = 400

df = pd.DataFrame({
    "education_years": np.random.normal(13, 2, n),
    "age": np.random.randint(25, 60, n),
    "hours": np.random.normal(40, 5, n),
    "male": np.random.binomial(1, 0.5, n)
})

latent = (
    0.06 * df["education_years"]
    + 0.015 * df["age"]
    + 0.02 * df["hours"]
    + 0.10 * df["male"]
    + np.random.normal(0, 0.5, n)
)

df["high_income"] = (latent > latent.mean()).astype(int)
df["log_hours"] = np.log(df["hours"])

df.head()



## Estimate Regression Models


In [None]:

m1 = smf.ols("high_income ~ education_years", data=df).fit(cov_type="HC1")
m2 = smf.ols("high_income ~ education_years + age + male", data=df).fit(cov_type="HC1")
m3 = smf.ols("high_income ~ education_years + age + male + log_hours", data=df).fit(cov_type="HC1")



## Create Regression Table with summary_col


In [None]:

table = summary_col(
    results=[m1, m2, m3],
    model_names=["(1)", "(2)", "(3)"],
    stars=True,
    info_dict={
        "N": lambda x: f"{int(x.nobs)}",
        "RÂ²": lambda x: f"{x.rsquared:.3f}"
    },
    regressor_order=[
        "education_years",
        "age",
        "male",
        "log_hours"
    ]
)

print(table)



## Export Regression Table


In [None]:

Path("outputs/tables").mkdir(parents=True, exist_ok=True)

latex_table = table.as_latex()

with open("outputs/tables/regression_table.tex", "w") as f:
    f.write(latex_table)

print("LaTeX table saved to outputs/tables/regression_table.tex")



## Create and Export Coefficient Plot


In [None]:

coefs = m3.params
ses = m3.bse

coef_df = pd.DataFrame({
    "coef": coefs,
    "se": ses
}).drop("Intercept")


In [None]:

plt.figure(figsize=(6, 4))

plt.errorbar(
    coef_df["coef"],
    coef_df.index,
    xerr=1.96 * coef_df["se"],
    fmt='o'
)

plt.axvline(0)
plt.title("Regression Coefficients with 95% CI")
plt.tight_layout()

Path("outputs/figures").mkdir(parents=True, exist_ok=True)
plt.savefig("outputs/figures/coefficients_plot.pdf")
plt.savefig("outputs/figures/coefficients_plot.png", dpi=300)
plt.show()



## Export Data and Results to Excel


In [None]:
# Import piplite and install libraries for learning environment
import piplite
await piplite.install('openpyxl')

In [None]:

reg_df = pd.DataFrame({
    "Coefficient": m3.params,
    "Std. Error": m3.bse,
    "p-value": m3.pvalues
})

with pd.ExcelWriter("results.xlsx") as writer:
    df.to_excel(writer, sheet_name="Clean Data", index=False)
    reg_df.to_excel(writer, sheet_name="Regression Results")

print("Excel file saved to results.xlsx")


#### To view the excel file download results.xlsx from the sidebar and open it with Microsoft Excel or the program of your choice.


## Key Takeaway

All tables and figures used in a paper should be **generated by code**.
If the data or specification changes, outputs update automatically.
