In [1]:
# summary_col author:  @adrianmoss 
# author of this file: @donbowen, adapting code by @adrianmoss

import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.formula.api as smf
from summary_colFE import summary_col # available at https://github.com/LeDataSciFi/ledatascifi-2024/tree/main/community_codebook
                                        # overrides, so don't use this: from statsmodels.iolib.summary2 import summary_col
                                        # pending PR in statsmodels: https://github.com/statsmodels/statsmodels/pull/9191 

# Load the diamonds dataset
diamonds = sns.load_dataset('diamonds')

# Adapted regressions for the diamonds dataset
regressions = [
    (smf.ols('np.log(price) ~ carat', data=diamonds).fit(), 'log(Price) ~ Carat'),
    (smf.ols('np.log(price) ~ np.log(carat)', data=diamonds).fit(), 'log(Price) ~ log(Carat)'),
    (smf.ols('np.log(price) ~ C(cut)', data=diamonds).fit(), 'log(Price) ~ C(Cut)'),
    (smf.ols('np.log(price) ~ C(clarity)', data=diamonds).fit(), 'log(Price) ~ C(Clarity)'),
    (smf.ols('np.log(price) ~ carat + C(cut) + C(clarity)', data=diamonds).fit(), 'log(Price) ~ Carat + C(Cut) + C(Clarity)')
]

info_dict={
        'No. observations' : lambda x: f"{int(x.nobs):d}"}

summary = summary_col([reg[0] for reg in regressions],
                    model_names=[f'{i}. '+reg[1] for i, reg in enumerate(regressions, 1)],
                    stars=True, info_dict=info_dict, 
                    fixed_effects=['cut', 'clarity'],
                    )
summary

0,1,2,3,4,5
,1. log(Price) ~ Carat,2. log(Price) ~ log(Carat),3. log(Price) ~ C(Cut),4. log(Price) ~ C(Clarity),5. log(Price) ~ Carat + C(Cut) + C(Clarity)
Intercept,6.2150***,8.4487***,7.6395***,7.4052***,6.3613***
,(0.0033),(0.0014),(0.0068),(0.0234),(0.0090)
carat,1.9698***,,,,2.0851***
,(0.0036),,,,(0.0036)
np.log(carat),,1.6758***,,,
,,(0.0019),,,
No. observations,53940,53940,53940,53940,53940
cut FE,,,Yes,,Yes
clarity FE,,,,Yes,Yes
