In [1]:
from statsmodels.formula.api import ols
import statsmodels.formula.api as smf
import statsmodels.api as sm
from automatedFunction import dataSequence
import pandas as pd

In [12]:
nameDict = dict({
    "C(is_efficient)[T.True]"                                                           :   'Is efficient',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.a]"       :   'A',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.b]"       :   'B',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.c]"       :   'C',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.e]"       :   'E',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.f]"       :   'F',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.g]"       :   'G',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na]"      :   'NA',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na-c]"    :   'NA-C',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.1]'                :'age bin: (-2.0, -1.0]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.2]'                :'age bin: (-1.0, 0.0]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.3]'                :'age bin: (0.0, 9.0]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.4]'                :'age bin: (9.0, 17.0]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.5]'                :'age bin: (17.0, 23.0]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.6]'                :'age bin: (23.0, 29.0]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.7]'                :'age bin: (29.0, 41.0]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.8]'                :'age bin: (41.0, 68.3]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.9]'                :'age bin: (68.3, 418.0]',
    'C(mixedUseDummy)[T.True]'                                                          :'Mixed use',
    'C(property_land_ownership)[T.owner]'                                               :'Land ownership: Owner',
    'C(property_land_ownership)[T.perpetual_lease]'                                     :'Land ownership: Perpetual lease',
    'C(property_land_ownership)[T.prepaid_lease]'                                       :'Land ownership: prepaid lease',
    'C(property_property_type)[T.housing_care]'                                         :'Property type: Housing care',
    'C(property_property_type)[T.industrial]'                                           :'Property type: Industrial',
    'C(property_property_type)[T.office]'                                               :'Property type: Office',
    'C(property_property_type)[T.other]'                                                :'Property type: Other',
    'C(property_property_type)[T.shop]'                                                 :'Property type: Shop',
    'C(renovated)[T.True]'                                                              :'Renovated',
    'C(transaction_year, Treatment(reference=2017))[T.2008]'                            :'Year: 2008',
    'C(transaction_year, Treatment(reference=2017))[T.2009]'                            :'Year: 2009',
    'C(transaction_year, Treatment(reference=2017))[T.2010]'                            :'Year: 2010',
    'C(transaction_year, Treatment(reference=2017))[T.2011]'                            :'Year: 2011',
    'C(transaction_year, Treatment(reference=2017))[T.2012]'                            :'Year: 2012',
    'C(transaction_year, Treatment(reference=2017))[T.2013]'                            :'Year: 2013',
    'C(transaction_year, Treatment(reference=2017))[T.2014]'                            :'Year: 2014',
    'C(transaction_year, Treatment(reference=2017))[T.2015]'                            :'Year: 2015',
    'C(transaction_year, Treatment(reference=2017))[T.2016]'                            :'Year: 2016',
    'C(transaction_year, Treatment(reference=2017))[T.2018]'                            :'Year: 2018',
    'C(transaction_year, Treatment(reference=2017))[T.2019]'                            :'Year: 2019',
    'C(transaction_year, Treatment(reference=2017))[T.2020]'                            :'Year: 2010',
    'C(transaction_year, Treatment(reference=2017))[T.2021]'                            :'Year: 2021',
    'C(transaction_year, Treatment(reference=2017))[T.2022]'                            :'Year: 2022',
    'C(transaction_year, Treatment(reference=2017))[T.2023]'                            :'Year: 2023',
    'C(transactions_simplified)[T.purchase]'                                            :'Transaction type: Purchase',
    'C(transactions_simplified)[T.sale]'                                                :'Transaction type: Sale',
    })

In [13]:
data = dataSequence(yearBuildBuckets=10,coordinatBucketSize=5,is_age_bucket=True, imputationArea=False, kmeansCluster=400)
filtered = data.copy()

modelspec_baseline_efficient = ('price_per_meter2_log ~ C(is_efficient) + C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)+ C(kmeans_cluster)')
modelspec_baseline_energyLabel = ("price_per_meter2_log ~   C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))+ C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)+ C(kmeans_cluster)")

# RML
isEfficient_reg_all_rlm = smf.rlm(modelspec_baseline_efficient, M = sm.robust.norms.TukeyBiweight(), data=filtered.query("categorizedEnergyLabel_simple != 'na'"))
energyLabel_reg_all_rlm = smf.rlm(modelspec_baseline_energyLabel,M = sm.robust.norms.TukeyBiweight(), data=filtered)
energyLabel_reg_withouNA_rlm = smf.rlm(modelspec_baseline_energyLabel,M = sm.robust.norms.TukeyBiweight(), data=filtered.query("categorizedEnergyLabel_simple != 'na'"))

# Normal OLS
isEfficient_reg_all_ols = ols(modelspec_baseline_efficient, data=filtered.query("categorizedEnergyLabel_simple != 'na'"))
energyLabel_reg_all_ols = ols(modelspec_baseline_energyLabel, data=filtered)
energyLabel_reg_withouNA_ols = ols(modelspec_baseline_energyLabel, data=filtered.query("categorizedEnergyLabel_simple != 'na'"))


# Baseline Results Comparison
from stargazer.stargazer import Stargazer
baselineComparisson = Stargazer([
    isEfficient_reg_all_rlm.fit(),
    energyLabel_reg_withouNA_rlm.fit(),
    energyLabel_reg_all_rlm.fit(),
])

baselineComparisson.covariate_order([
    'Intercept',
    'C(is_efficient)[T.True]',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.a]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.b]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.c]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.e]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.f]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.g]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na]",
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.1]',     
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.2]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.3]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.4]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.5]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.6]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.7]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.8]',
    'C(building_age_at_transaction_bucket, Treatment(reference=0))[T.9]',
    'C(mixedUseDummy)[T.True]',
    'C(property_land_ownership)[T.owner]',
    'C(property_land_ownership)[T.perpetual_lease]',
    'C(property_land_ownership)[T.prepaid_lease]',
    'C(property_property_type)[T.housing_care]',
    'C(property_property_type)[T.industrial]',
    'C(property_property_type)[T.office]',
    'C(property_property_type)[T.other]',
    'C(property_property_type)[T.shop]',
    'C(renovated)[T.True]',
    'C(transaction_year, Treatment(reference=2017))[T.2008]',
    'C(transaction_year, Treatment(reference=2017))[T.2009]',
    'C(transaction_year, Treatment(reference=2017))[T.2010]',
    'C(transaction_year, Treatment(reference=2017))[T.2011]',
    'C(transaction_year, Treatment(reference=2017))[T.2012]',
    'C(transaction_year, Treatment(reference=2017))[T.2013]',
    'C(transaction_year, Treatment(reference=2017))[T.2014]',
    'C(transaction_year, Treatment(reference=2017))[T.2015]',
    'C(transaction_year, Treatment(reference=2017))[T.2016]',
    'C(transaction_year, Treatment(reference=2017))[T.2018]',
    'C(transaction_year, Treatment(reference=2017))[T.2019]',
    'C(transaction_year, Treatment(reference=2017))[T.2020]',
    'C(transaction_year, Treatment(reference=2017))[T.2021]',
    'C(transaction_year, Treatment(reference=2017))[T.2022]',
    'C(transaction_year, Treatment(reference=2017))[T.2023]',
    'C(transactions_simplified)[T.purchase]',
    'C(transactions_simplified)[T.sale]',
])


baselineComparisson.rename_covariates(nameDict)

baselineComparisson

0,1,2,3
,,,
,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log
,,,
,(1),(2),(3)
,,,
Intercept,11.504***,11.499***,11.490***
,(0.493),(0.502),(0.418)
Is efficient,0.218***,,
,(0.053),,
A,,0.179**,0.222***


In [16]:
print(baselineComparisson.render_latex())

\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{3}{c}{\textit{Dependent variable:}} \
\cr \cline{3-4}
\\[-1.8ex] & (1) & (2) & (3) \\
\hline \\[-1.8ex]
 Intercept & 11.504$^{***}$ & 11.499$^{***}$ & 11.490$^{***}$ \\
  & (0.493) & (0.502) & (0.418) \\
 Is efficient & 0.218$^{***}$ & & \\
  & (0.053) & & \\
 A & & 0.179$^{**}$ & 0.222$^{***}$ \\
  & & (0.081) & (0.067) \\
 B & & 0.195$^{**}$ & 0.189$^{***}$ \\
  & & (0.086) & (0.073) \\
 C & & 0.235$^{***}$ & 0.184$^{***}$ \\
  & & (0.084) & (0.071) \\
 E & & 0.208$^{**}$ & 0.198$^{**}$ \\
  & & (0.104) & (0.087) \\
 F & & -0.119$^{}$ & -0.102$^{}$ \\
  & & (0.124) & (0.106) \\
 G & & -0.158$^{}$ & -0.240$^{**}$ \\
  & & (0.116) & (0.097) \\
 NA & & & -0.083$^{}$ \\
  & & & (0.073) \\
 age bin: (-2.0, -1.0] & -0.123$^{}$ & -0.132$^{}$ & -0.157$^{***}$ \\
  & (0.112) & (0.113) & (0.055) \\
 age bin: (-1.0, 0.0] & -0.172$^{}$ & -0.185$^{}$ & -0.190$^{**}$ \\
  & (