In [1]:
from statsmodels.formula.api import ols
import statsmodels.formula.api as smf
import statsmodels.api as sm
from automatedFunction import dataSequence, nameDict
import pandas as pd

In [3]:
data = dataSequence(yearBuildBuckets=10,coordinatBucketSize=5,is_age_bucket=True, imputationArea=False, kmeansCluster=400)
filtered = data.copy().query("categorizedEnergyLabel_simple != 'na'")

modelspec_baseline_efficient = ('price_per_meter2_log ~ C(is_efficient)+ C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)+ C(kmeans_cluster)')
modelspec_baseline_energyLabel =("price_per_meter2_log ~ C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))+ C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)+ C(kmeans_cluster)")

# RML
isEfficient_reg_all_rlm = smf.rlm(modelspec_baseline_efficient, M = sm.robust.norms.TukeyBiweight(), data=filtered)
energyLabel_reg_all_rlm = smf.rlm(modelspec_baseline_energyLabel,M = sm.robust.norms.TukeyBiweight(), data=data)
energyLabel_reg_withouNA_rlm = smf.rlm(modelspec_baseline_energyLabel,M = sm.robust.norms.TukeyBiweight(), data=filtered)

# Normal OLS
isEfficient_reg_all_ols = ols(modelspec_baseline_efficient, data=filtered)
energyLabel_reg_all_ols = ols(modelspec_baseline_energyLabel, data=data)
energyLabel_reg_withouNA_ols = ols(modelspec_baseline_energyLabel, data=filtered)

# Baseline Results Comparison
from stargazer.stargazer import Stargazer
baselineComparisson = Stargazer([
    isEfficient_reg_all_rlm.fit(),
    isEfficient_reg_all_ols.fit(cov_type='HC1'),
    energyLabel_reg_withouNA_rlm.fit(),
    energyLabel_reg_withouNA_ols.fit(cov_type='HC1'),
    energyLabel_reg_all_rlm.fit(),
    energyLabel_reg_all_ols.fit(cov_type='HC1'),
])

baselineComparisson.covariate_order([
    'Intercept',
    'C(is_efficient)[T.True]',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.a]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.b]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.c]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.e]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.f]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.g]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na]",
])
baselineComparisson.rename_covariates(nameDict)

baselineComparisson.add_line(r"\textit{Control variables}", ['','','','','',''])
baselineComparisson.add_line('Property type dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
baselineComparisson.add_line('Renovation dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
baselineComparisson.add_line('Mixed use dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
baselineComparisson.add_line('Landownership dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
baselineComparisson.add_line('Transaction year dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
baselineComparisson.add_line('ML Bucket', ['Yes','Yes','Yes','Yes','Yes','Yes'])
baselineComparisson.add_line('Building age dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
baselineComparisson.add_line('Log Summed area', ['Yes','Yes','Yes','Yes','Yes','Yes'])
print(baselineComparisson.render_latex())
baselineComparisson

\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lcccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{6}{c}{\textit{Dependent variable:}} \
\cr \cline{6-7}
\\[-1.8ex] & (1) & (2) & (3) & (4) & (5) & (6) \\
\hline \\[-1.8ex]
 Intercept & 11.503$^{***}$ & 12.183$^{***}$ & 11.497$^{***}$ & 12.117$^{***}$ & 11.490$^{***}$ & 12.282$^{***}$ \\
  & (0.493) & (0.430) & (0.502) & (0.453) & (0.418) & (0.407) \\
 Is efficient & 0.218$^{***}$ & 0.162$^{**}$ & & & & \\
  & (0.053) & (0.077) & & & & \\
 A & & & 0.179$^{**}$ & 0.179$^{}$ & 0.222$^{***}$ & 0.139$^{}$ \\
  & & & (0.081) & (0.118) & (0.067) & (0.108) \\
 B & & & 0.195$^{**}$ & 0.119$^{}$ & 0.189$^{***}$ & 0.076$^{}$ \\
  & & & (0.086) & (0.118) & (0.073) & (0.112) \\
 C & & & 0.235$^{***}$ & 0.289$^{**}$ & 0.184$^{***}$ & 0.214$^{*}$ \\
  & & & (0.084) & (0.121) & (0.071) & (0.113) \\
 E & & & 0.208$^{**}$ & 0.197$^{}$ & 0.198$^{**}$ & 0.200$^{}$ \\
  & & & (0.104) & (0.166) & (0.087) & (0.151) \\
 F & & & -0.11



0,1,2,3,4,5,6
,,,,,,
,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log
,,,,,,
,(1),(2),(3),(4),(5),(6)
,,,,,,
Intercept,11.503***,12.183***,11.497***,12.117***,11.490***,12.282***
,(0.493),(0.430),(0.502),(0.453),(0.418),(0.407)
Is efficient,0.218***,0.162**,,,,
,(0.053),(0.077),,,,
A,,,0.179**,0.179,0.222***,0.139
