In [1]:
from statsmodels.formula.api import ols
import statsmodels.formula.api as smf
import statsmodels.api as sm
from automatedFunction import dataSequence
import pandas as pd
from stargazer.stargazer import Stargazer

In [2]:
nameDict = dict({
    "C(is_efficient)[T.True]"                                                           :   'Is efficient',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.a]"       :   'A',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.b]"       :   'B',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.c]"       :   'C',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.e]"       :   'E',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.f]"       :   'F',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.g]"       :   'G',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na]"      :   'NA',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na-c]"    :   'NA-C',
    })

In [7]:
data = dataSequence(yearBuildBuckets=10,coordinatBucketSize=5,is_age_bucket=True, imputationArea=False, kmeansCluster=400)
data_noShop =data.copy().query("property_property_type != 'shop'")
data_noNA =data.copy().query("categorizedEnergyLabel_simple != 'na'")
data_noNA_noShops =data.copy().query("categorizedEnergyLabel_simple != 'na' & property_property_type != 'shop'")
# data_noNA =data.copy().query("categorizedEnergyLabel_simple != 'na'")

modelspec_baseline_efficient_ml     = ('price_per_meter2_log ~ C(is_efficient) + C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified) + C(kmeans_cluster)')

modelspec_baseline_energyLabel_ml   = ("price_per_meter2_log ~ C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))+ C(property_property_type) + C(transaction_year, Treatment(reference=2017)) + C(building_age_at_transaction_bucket, Treatment(reference=0)) + C(renovated) + C(mixedUseDummy) + C(property_land_ownership) + calculations_sum_area_log + C(transactions_simplified)+ C(kmeans_cluster)")



# RML
isEfficient_rml_ml = smf.rlm(modelspec_baseline_efficient_ml  , M = sm.robust.norms.TukeyBiweight(), data=data_noNA)
isEfficient_rml_ml_noShop = smf.rlm(modelspec_baseline_efficient_ml  , M = sm.robust.norms.TukeyBiweight(), data=data_noNA_noShops)

energyLabel_rml_ml = smf.rlm(modelspec_baseline_energyLabel_ml,M = sm.robust.norms.TukeyBiweight(), data=data)
energyLabel_rml_ml_noShop = smf.rlm(modelspec_baseline_energyLabel_ml,M = sm.robust.norms.TukeyBiweight(), data=data_noShop)


# OLS
# isEfficient_ols_ml = ols(modelspec_baseline_efficient_ml  ,  data=data_noNA)

# energyLabel_ols_ml = ols(modelspec_baseline_energyLabel_ml, data=data)


In [8]:
# RML Location comparisson
baselineComparisson = Stargazer([
    isEfficient_rml_ml.fit(),
    isEfficient_rml_ml_noShop.fit(),
    energyLabel_rml_ml.fit(),
    energyLabel_rml_ml_noShop.fit()
])

baselineComparisson.covariate_order([
    'Intercept',
    'C(is_efficient)[T.True]',
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.a]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.b]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.c]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.e]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.f]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.g]",
    "C(categorizedEnergyLabel_simple_suplemended, Treatment(reference='d'))[T.na]",
])
baselineComparisson.rename_covariates(nameDict)
# baselineComparisson.add_line(r"\textit{Control variables}", ['','','','','',''])
# baselineComparisson.add_line('Property type dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Renovation dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Mixed use dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Landownership dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Transaction year dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('5 x 5 km dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Building age dummy', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# baselineComparisson.add_line('Summed area', ['Yes','Yes','Yes','Yes','Yes','Yes'])
# print(baselineComparisson.render_latex())
baselineComparisson

0,1,2,3,4
,,,,
,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log,Dependent variable:price_per_meter2_log
,,,,
,(1),(2),(3),(4)
,,,,
Intercept,11.503***,10.029***,11.490***,10.449***
,(0.493),(0.323),(0.418),(0.356)
Is efficient,0.218***,0.125***,,
,(0.053),(0.035),,
A,,,0.222***,0.158***
