In [10]:
from statsmodels.formula.api import ols
import statsmodels.formula.api as smf
import statsmodels.api as sm
from automatedFunction import dataSequence
import pandas as pd
from stargazer.stargazer import Stargazer

In [11]:
nameDict = dict({
    "C(is_efficient)[T.True]"                                                           :   'Is efficient',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.a]"       :   'A',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.b]"       :   'B',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.c]"       :   'C',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.e]"       :   'E',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.f]"       :   'F',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.g]"       :   'G',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na]"      :   'NA',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na-c]"    :   'NA-C',
    })

In [2]:
data = dataSequence(yearBuildBuckets=10,coordinatBucketSize=5,is_age_bucket=True, imputationArea=False, kmeansCluster=400)
data_noNA =data.copy().query("categorizedEnergyLabel_simple != 'na'")

modelspec_baseline_efficient_ml     = ('price_per_meter2_log ~ C(is_efficient) + C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified) + C(kmeans_cluster)')
modelspec_baseline_efficient_grid   = ('price_per_meter2_log ~ C(is_efficient) + C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified) + C(coordinatBucket)')
modelspec_baseline_efficient_city   = ('price_per_meter2_log ~ C(is_efficient) + C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified) + C(address_city)')
modelspec_baseline_efficient_pc4    = ('price_per_meter2_log ~ C(is_efficient) + C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified) + C(pc4)')
modelspec_baseline_efficient_non    = ('price_per_meter2_log ~ C(is_efficient) + C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)')

modelspec_baseline_energyLabel_ml   = ("price_per_meter2_log ~ C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))+ C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)+ C(kmeans_cluster)")
modelspec_baseline_energyLabel_grid = ("price_per_meter2_log ~ C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))+ C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)+ C(coordinatBucket)")
modelspec_baseline_energyLabel_city = ("price_per_meter2_log ~ C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))+ C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)+ C(address_city)")
modelspec_baseline_energyLabel_pc4  = ("price_per_meter2_log ~ C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))+ C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)+ C(pc4)")
modelspec_baseline_energyLabel_non = ("price_per_meter2_log ~ C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))+ C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)")


# RML
isEfficient_rml_ml = smf.rlm(modelspec_baseline_efficient_ml  , M = sm.robust.norms.TukeyBiweight(), data=data_noNA)
isEfficient_rml_grid = smf.rlm(modelspec_baseline_efficient_grid, M = sm.robust.norms.TukeyBiweight(), data=data_noNA)
isEfficient_rml_city = smf.rlm(modelspec_baseline_efficient_city, M = sm.robust.norms.TukeyBiweight(), data=data_noNA)
isEfficient_rml_pc4 = smf.rlm(modelspec_baseline_efficient_pc4 , M = sm.robust.norms.TukeyBiweight(), data=data_noNA)
isEfficient_rml_non = smf.rlm(modelspec_baseline_efficient_non , M = sm.robust.norms.TukeyBiweight(), data=data_noNA)

energyLabel_rml_ml = smf.rlm(modelspec_baseline_energyLabel_ml,M = sm.robust.norms.TukeyBiweight(), data=data)
energyLabel_rml_grid = smf.rlm(modelspec_baseline_energyLabel_grid,M = sm.robust.norms.TukeyBiweight(), data=data)
energyLabel_rml_city = smf.rlm(modelspec_baseline_energyLabel_city,M = sm.robust.norms.TukeyBiweight(), data=data)
energyLabel_rml_pc4 = smf.rlm(modelspec_baseline_energyLabel_pc4,M = sm.robust.norms.TukeyBiweight(), data=data)
energyLabel_rml_non = smf.rlm(modelspec_baseline_energyLabel_non,M = sm.robust.norms.TukeyBiweight(), data=data)

# OLS
isEfficient_ols_ml = ols(modelspec_baseline_efficient_ml  ,  data=data_noNA)
isEfficient_ols_grid = ols(modelspec_baseline_efficient_grid,  data=data_noNA)
isEfficient_ols_city = ols(modelspec_baseline_efficient_city,  data=data_noNA)
isEfficient_ols_pc4 = ols(modelspec_baseline_efficient_pc4 ,  data=data_noNA)
isEfficient_ols_non = ols(modelspec_baseline_efficient_non ,  data=data_noNA)

energyLabel_ols_ml = ols(modelspec_baseline_energyLabel_ml, data=data)
energyLabel_ols_grid = ols(modelspec_baseline_energyLabel_grid, data=data)
energyLabel_ols_city = ols(modelspec_baseline_energyLabel_city, data=data)
energyLabel_ols_pc4 = ols(modelspec_baseline_energyLabel_pc4, data=data)
energyLabel_ols_non = ols(modelspec_baseline_energyLabel_non, data=data)

NameError: name 'dataSequence' is not defined

In [18]:
# RML Location comparisson
baselineComparisson = Stargazer([
    # isEfficient_rml_ml.fit(),
    # isEfficient_rml_grid.fit(),
    # isEfficient_rml_city.fit(),
    # isEfficient_rml_pc4.fit(),
    # isEfficient_rml_non.fit(),

    # isEfficient_ols_ml.fit(cov_type='HC1'),
    # isEfficient_ols_grid.fit(cov_type='HC1'),
    # isEfficient_ols_city.fit(cov_type='HC1'),
    # isEfficient_ols_pc4.fit(cov_type='HC1'),
    # isEfficient_ols_non.fit(cov_type='HC1'),

    # energyLabel_rml_ml.fit(),
    # energyLabel_rml_grid.fit(),
    # energyLabel_rml_city.fit(),
    # energyLabel_rml_pc4.fit(),
    # energyLabel_rml_non.fit(),

    energyLabel_ols_ml.fit(cov_type='HC1'),
    energyLabel_ols_grid.fit(cov_type='HC1'),
    energyLabel_ols_city.fit(cov_type='HC1'),
    energyLabel_ols_pc4.fit(cov_type='HC1'),
    energyLabel_ols_non.fit(cov_type='HC1'),
])

baselineComparisson.covariate_order([
    'Intercept',
    # 'C(is_efficient)[T.True]',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.a]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.b]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.c]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.e]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.f]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.g]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na]",
])
baselineComparisson.rename_covariates(nameDict)
# baselineComparisson.add_line(r"\textit{Control variables}", ['','','','','',''])
# baselineComparisson.add_line('Property type dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Renovation dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Mixed use dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Landownership dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Transaction year dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('5 x 5 km dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Building age dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Summed area', ['Yes','Yes','Yes','Yes','Yes','Yes'])
print(baselineComparisson.render_latex())
# baselineComparisson



\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{5}{c}{\textit{Dependent variable:}} \
\cr \cline{5-6}
\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) \\
\hline \\[-1.8ex]
 Intercept & 12.282$^{***}$ & 12.377$^{***}$ & 12.931$^{***}$ & 14.097$^{***}$ & 12.662$^{***}$ \\
  & (0.407) & (0.301) & (0.291) & (0.522) & (0.281) \\
 A & 0.139$^{}$ & 0.115$^{}$ & 0.051$^{}$ & 0.014$^{}$ & 0.288$^{***}$ \\
  & (0.108) & (0.112) & (0.109) & (0.154) & (0.107) \\
 B & 0.076$^{}$ & 0.095$^{}$ & 0.068$^{}$ & -0.009$^{}$ & 0.280$^{**}$ \\
  & (0.112) & (0.115) & (0.115) & (0.155) & (0.113) \\
 C & 0.214$^{*}$ & 0.259$^{**}$ & 0.199$^{*}$ & 0.078$^{}$ & 0.336$^{***}$ \\
  & (0.113) & (0.119) & (0.116) & (0.155) & (0.113) \\
 E & 0.200$^{}$ & 0.200$^{}$ & 0.199$^{}$ & 0.070$^{}$ & 0.392$^{***}$ \\
  & (0.151) & (0.150) & (0.151) & (0.198) & (0.142) \\
 F & -0.035$^{}$ & -0.081$^{}$ & -0.136$^{}$ & -0.168$^{}$ & 0.014$^{}$ \\
  & (0.14

