In [1]:
import pandas as pd
import pickle
import numpy as np

In [2]:
with open('../outputs/model_summaries.pickle', 'rb') as h:
    model_summaries = pickle.load(h)

In [3]:
for model_path, model in model_summaries:
    helper = model_path.split("_")
    output_var = "_".join(helper[:-1])
    input_structure = helper[-1].split(".")[0]
    
    print(output_var)
    print(input_structure)
    

HHVEHCNT_P_CAP_mean_norm
BE and NHTS linear
HHVEHCNT_P_CAP_mean_norm
BE and NHTS quadratic
HHVEHCNT_P_CAP_mean_norm
BE linear
HHVEHCNT_P_CAP_mean_norm
BE quadratic
HHVEHCNT_P_CAP_mean_norm
NHTS linear
HHVEHCNT_P_CAP_mean_norm
NHTS quadratic
HHVEHCNT_mean_norm
BE and NHTS linear
HHVEHCNT_mean_norm
BE and NHTS quadratic
HHVEHCNT_mean_norm
BE linear
HHVEHCNT_mean_norm
BE quadratic
HHVEHCNT_mean_norm
NHTS linear
HHVEHCNT_mean_norm
NHTS quadratic
TRPTRANS_1_mean_norm
BE and NHTS linear
TRPTRANS_1_mean_norm
BE and NHTS quadratic
TRPTRANS_1_mean_norm
BE linear
TRPTRANS_1_mean_norm
BE quadratic
TRPTRANS_1_mean_norm
NHTS linear
TRPTRANS_1_mean_norm
NHTS quadratic
TRPTRANS_2_mean_norm
BE and NHTS linear
TRPTRANS_2_mean_norm
BE and NHTS quadratic
TRPTRANS_2_mean_norm
BE linear
TRPTRANS_2_mean_norm
BE quadratic
TRPTRANS_2_mean_norm
NHTS linear
TRPTRANS_2_mean_norm
NHTS quadratic
TRPTRANS_3_mean_norm
BE and NHTS linear
TRPTRANS_3_mean_norm
BE and NHTS quadratic
TRPTRANS_3_mean_norm
BE linear
TRPTRA

In [4]:
def results_summary_to_dataframe(results):
    '''take the result of an statsmodel results table and transforms it into a dataframe'''
    pvals = results.pvalues
    coeff = results.params
    conf_lower = results.conf_int()[0]
    conf_higher = results.conf_int()[1]

    results_df = pd.DataFrame({"pvals":pvals,
                               "coeff":coeff,
                               "conf_lower":conf_lower,
                               "conf_higher":conf_higher
                                })

    #Reordering...
    results_df = results_df[["coeff","pvals","conf_lower","conf_higher"]]
    return results_df

In [5]:
def condense_df(model_df):
    coeff_list = []
    std_list = []
    for i in range(model_df.shape[0]):
        # coef
        base_coef = '{:.3f}'.format(np.round(model_df['coef'].values[i], decimals = 3))
        if model_df['P>|t|'].values[i] < 0.01:
            base_coef=base_coef+"***"
        elif model_df['P>|t|'].values[i] < 0.05:
            base_coef=base_coef+"**"
        elif model_df['P>|t|'].values[i] < 0.1:
            base_coef=base_coef+"*"
        coeff_list.append(base_coef)
        # std
        base_std = '{:.3f}'.format(np.round(model_df['std err'].values[i], decimals = 3))
        base_std = '('+base_std+')'
        std_list.append(base_std)

    # new model report    
    new_model_report = pd.DataFrame([coeff_list, std_list], index = ['coef', 'std']).T
    return new_model_report

In [6]:
def to_latex_table(model_df):
    res = model_df.round(decimals=2).to_latex(index=True)
    help = {"lrrrrrr": "| l | r | r | r | r | r | r |", "\\toprule": "\\toprule\\hline", 
                "\\midrule":"\\midrule\\hline", 
                "\\bottomrule":"\\bottomrule\\hline",
           "lllllllllll": "| l | r | r | r | r | r | r | r | r | r | r |"}
    for key in help:
        res = res.replace(key,help[key])
    return res

In [10]:
combined_mapper = {"BE linear": [], "NHTS linear": [], "BE and NHTS linear": []}
for model_path, model in model_summaries:
    helper = model_path.split("_")
    output_var = "_".join(helper[:-1])
    input_structure = helper[-1].split(".")[0]
    
    if input_structure in {"BE linear", "NHTS linear", "BE and NHTS linear"}:
        print(output_var)
        print(input_structure)
        #print(model)
        results_as_html = model.tables[1].as_html()
        model_df = condense_df(pd.read_html(results_as_html, header=0, index_col=0)[0])
        name_map = {"BE linear": {0: "const", 1 : "Density", 2: "Diversity", 3: "Design"}, 
                    "NHTS linear": {0:"const", 1: "Age", 2: "Household Size", 
                                    3: "Household Income", 4: "Renter Housed", 
                                    5: "Population Density", 
                                    6: "Housing Density", 7: "Female", 
                                    8: "College and Higher Edu", 9: "Minority Group", 
                                    10:"Home Owner", 11:"Home Renter", 
                                    12:"Rural Area", 13:"Suburban Area", 
                                    14:"Small Town Area", 15:"Urban Area"}, 
                    "BE and NHTS linear": {0:"const", 1: "Age", 2: "Household Size", 
                                    3: "Household Income", 4: "Renter Housed", 
                                    5: "Population Density", 
                                    6: "Housing Density", 7: "Female", 
                                    8: "College and Higher Edu", 9: "Minority Group", 
                                    10:"Home Owner", 11:"Home Renter", 
                                    12:"Rural Area", 13:"Suburban Area", 
                                    14:"Small Town Area", 15:"Urban Area",
                                          16: "Density", 17: "Diversity", 18: "Design"}}
        model_df.rename(name_map[input_structure],axis=0,inplace=True)
        
        
        
        print(model_df)
        print("_________________________")
        res = model_df.round(decimals=2).to_latex(index=True)
        help = {"lrrrrrr": "| l | r | r | r | r | r | r |", "\\toprule": "\\toprule\\hline", 
                "\\midrule":"\\midrule\\hline", 
                "\\bottomrule":"\\bottomrule\\hline",}
        for key in help:
            res = res.replace(key,help[key])
        
        combined_mapper[input_structure].append((output_var,model_df))
        
        #print(model_df.round(decimals=2).to_latex(index=True))

HHVEHCNT_P_CAP_mean_norm
BE and NHTS linear
                             coef      std
const                    1.178***  (0.177)
Age                         0.000  (0.001)
Household Size          -0.462***  (0.012)
Household Income         0.003***  (0.000)
Renter Housed             -0.114*  (0.066)
Population Density        0.000**  (0.000)
Housing Density         -0.000***  (0.000)
Female                  -0.445***  (0.048)
College and Higher Edu      0.019  (0.041)
Minority Group          -0.110***  (0.032)
Home Owner                  0.236  (0.147)
Home Renter               -0.266*  (0.149)
Rural Area               0.353***  (0.042)
Suburban Area           -0.089***  (0.031)
Small Town Area            -0.025  (0.037)
Urban Area                 -0.027  (0.038)
Density                    -0.116  (0.076)
Diversity                  -0.016  (0.030)
Design                  -0.294***  (0.101)
_________________________
HHVEHCNT_P_CAP_mean_norm
BE linear
                coef      std
const

In [11]:
for output_var, elt in combined_mapper["BE linear"]:
    print(output_var)
    print(elt)

HHVEHCNT_P_CAP_mean_norm
                coef      std
const       0.361***  (0.019)
Density    -0.721***  (0.077)
Diversity  -0.196***  (0.033)
Design     -1.022***  (0.085)
HHVEHCNT_mean_norm
                coef      std
const       0.360***  (0.019)
Density    -0.555***  (0.077)
Diversity  -0.100***  (0.033)
Design     -1.165***  (0.086)
TRPTRANS_1_mean_norm
                coef      std
const      -0.269***  (0.019)
Density     1.632***  (0.077)
Diversity     -0.031  (0.033)
Design      0.956***  (0.085)
TRPTRANS_2_mean_norm
                coef      std
const       0.292***  (0.018)
Density    -1.951***  (0.075)
Diversity    0.077**  (0.033)
Design     -0.971***  (0.083)
TRPTRANS_3_mean_norm
                coef      std
const      -0.209***  (0.019)
Density     1.669***  (0.078)
Diversity   -0.080**  (0.034)
Design      0.515***  (0.087)


In [12]:
BE_linear_df = pd.concat([elt for output_var, elt in combined_mapper["BE linear"]],axis=1)

In [13]:
BE_linear_df

Unnamed: 0,coef,std,coef.1,std.1,coef.2,std.2,coef.3,std.3,coef.4,std.4
const,0.361***,(0.019),0.360***,(0.019),-0.269***,(0.019),0.292***,(0.018),-0.209***,(0.019)
Density,-0.721***,(0.077),-0.555***,(0.077),1.632***,(0.077),-1.951***,(0.075),1.669***,(0.078)
Diversity,-0.196***,(0.033),-0.100***,(0.033),-0.031,(0.033),0.077**,(0.033),-0.080**,(0.034)
Design,-1.022***,(0.085),-1.165***,(0.086),0.956***,(0.085),-0.971***,(0.083),0.515***,(0.087)


In [14]:
print(to_latex_table(BE_linear_df))

\begin{tabular}{| l | r | r | r | r | r | r | r | r | r | r |}
\toprule\hline
{} &       coef &      std &       coef &      std &       coef &      std &       coef &      std &       coef &      std \\
\midrule\hline
const     &   0.361*** &  (0.019) &   0.360*** &  (0.019) &  -0.269*** &  (0.019) &   0.292*** &  (0.018) &  -0.209*** &  (0.019) \\
Density   &  -0.721*** &  (0.077) &  -0.555*** &  (0.077) &   1.632*** &  (0.077) &  -1.951*** &  (0.075) &   1.669*** &  (0.078) \\
Diversity &  -0.196*** &  (0.033) &  -0.100*** &  (0.033) &     -0.031 &  (0.033) &    0.077** &  (0.033) &   -0.080** &  (0.034) \\
Design    &  -1.022*** &  (0.085) &  -1.165*** &  (0.086) &   0.956*** &  (0.085) &  -0.971*** &  (0.083) &   0.515*** &  (0.087) \\
\bottomrule\hline
\end{tabular}



In [15]:
NHTS_linear_df = pd.concat([elt for output_var, elt in combined_mapper["NHTS linear"]],axis=1)

In [16]:
print(to_latex_table(NHTS_linear_df))

\begin{tabular}{| l | r | r | r | r | r | r | r | r | r | r |}
\toprule\hline
{} &       coef &      std &       coef &      std &       coef &      std &       coef &      std &       coef &      std \\
\midrule\hline
const                  &   1.111*** &  (0.175) &  -0.583*** &  (0.175) &     -0.027 &  (0.189) &     -0.047 &  (0.178) &     -0.040 &  (0.193) \\
Age                    &      0.001 &  (0.001) &  -0.004*** &  (0.001) &  -0.004*** &  (0.001) &   0.005*** &  (0.001) &     -0.002 &  (0.001) \\
Household Size         &  -0.460*** &  (0.012) &   0.291*** &  (0.012) &  -0.058*** &  (0.013) &   0.059*** &  (0.012) &     0.022* &  (0.013) \\
Household Income       &   0.003*** &  (0.000) &   0.004*** &  (0.000) &   0.001*** &  (0.000) &  -0.002*** &  (0.000) &      0.000 &  (0.000) \\
Renter Housed          &    -0.130* &  (0.066) &  -0.206*** &  (0.066) &   0.416*** &  (0.071) &  -0.328*** &  (0.067) &     -0.028 &  (0.073) \\
Population Density     &     0.000* &  (0.000) &   

In [17]:
BE_NHTS_linear_df = pd.concat([elt for output_var, elt in combined_mapper["BE and NHTS linear"]],axis=1)

In [18]:
print(to_latex_table(BE_NHTS_linear_df))

\begin{tabular}{| l | r | r | r | r | r | r | r | r | r | r |}
\toprule\hline
{} &       coef &      std &       coef &      std &       coef &      std &       coef &      std &       coef &      std \\
\midrule\hline
const                  &   1.178*** &  (0.177) &  -0.525*** &  (0.177) &     -0.048 &  (0.191) &     -0.064 &  (0.179) &      0.049 &  (0.194) \\
Age                    &      0.000 &  (0.001) &  -0.005*** &  (0.001) &  -0.004*** &  (0.001) &   0.004*** &  (0.001) &     -0.002 &  (0.001) \\
Household Size         &  -0.462*** &  (0.012) &   0.289*** &  (0.012) &  -0.056*** &  (0.013) &   0.057*** &  (0.012) &     0.022* &  (0.013) \\
Household Income       &   0.003*** &  (0.000) &   0.004*** &  (0.000) &   0.001*** &  (0.000) &  -0.001*** &  (0.000) &     -0.000 &  (0.000) \\
Renter Housed          &    -0.114* &  (0.066) &  -0.192*** &  (0.066) &   0.394*** &  (0.071) &  -0.309*** &  (0.067) &     -0.032 &  (0.073) \\
Population Density     &    0.000** &  (0.000) &   