# SDES-2018 Building stock

In [1]:
import os
import pandas as pd
import json
import sys
from project.utils import plot_attribute, subplots_attributes, subplots_pie, plot_attribute2attribute


In [2]:
sys.path.append('..')
sys.path.append('../..')
from project.utils import reindex_mi

In [3]:
folder_input = 'sdes_2018'
folder_output = 'output'
if not os.path.isdir(folder_output):
    os.mkdir(folder_output)

# Reading input

## Main input
Sources: SDES-2018
Status: Confidential

In [4]:
replace_dict = {r'^(P)$': 'Owner-occupied',
                'Gaz': 'Natural gas',
                'Bois\*': 'Wood fuel',
                'Fioul domestique': 'Oil fuel',
                'Chauffage urbain': 'Urban heating',
                'MA': 'Single-family',
                'Maison': 'Single-family',
                'AP': 'Multi-family',
                'Appartement': 'Multi-family',
                'LP': 'Privately rented',
                'Autres.*': 'Others',
                'LS': 'Social-housing',
                '.?lectricit.*': 'Electricity',}

In [5]:
name_file = 'comptages_DPE.csv'
stock_buildings = pd.read_csv(os.path.join(folder_input, name_file), sep=',', header=[0], encoding='latin-1',
                        index_col=[0, 1, 2, 3, 4]).squeeze()
index_names = ['Housing type', 'Occupancy status', 'Income tenant', 'Heating energy', 'Energy performance']
stock_buildings.index.set_names(index_names, inplace=True)

stock_buildings = stock_buildings.reset_index().replace(replace_dict, regex=True).set_index(stock_buildings.index.names).squeeze()

print(stock_buildings.head().to_frame().style.format('{:.1f}'))


print('Total number of housing in this study {:,.0f}'.format(stock_buildings.sum()))

<pandas.io.formats.style.Styler object at 0x7fc9cacf8190>
Total number of housing in this study 28,620,348


### Remove gratuity

In [6]:
stock_buildings = stock_buildings.loc[stock_buildings.index.get_level_values('Occupancy status') != 'G']
print(stock_buildings.head().to_frame().style.format('{:.1f}'))
print('Total number of housing at this point {:,.0f}'.format(stock_buildings.sum()))

<pandas.io.formats.style.Styler object at 0x7fc9d82495b0>
Total number of housing at this point 28,366,454


### De-aggregate 'Others' to 'Wood fuel' and 'Oil fuel'

Using another source of data, we de-aggregate each rows where Heating energy == 'Others' to 'Wood fuel' and 'Oil fuel'.  
Rate depends on Housing type

#### Read data oil fuel and wood fuel

In [7]:
name_file = 'fuel_oil_wood_2018.xlsx'
data_fuel = pd.read_excel(os.path.join(folder_input, name_file), header=[0], index_col=[1, 0])
print(data_fuel.head(10))

data_fuel.index.set_names(['Heating energy', 'Housing type'], inplace=True)
data_fuel = data_fuel.reset_index().replace(replace_dict, regex=True).set_index(data_fuel.index.names).squeeze()

fuel_list = ['Wood fuel', 'Oil fuel']
data_fuel = data_fuel[data_fuel.index.get_level_values('Heating energy').isin(fuel_list)]
data_fuel = data_fuel.loc[:, 'Taux du parc en %']
print(data_fuel.head(10).to_frame().style.format('{:.2f}'))

                                                  Parc en milliers de logements  \
Energie principale de chauffage Type de logement                                  
Autres énergies                 Appartement                           19.155201   
Bois*                           Appartement                           51.225000   
Chauffage urbain                Appartement                         1234.451978   
Fioul domestique                Appartement                          585.193629   
Gaz de pétrole liquéfié         Appartement                           10.900000   
Autres énergies                 Maison                               168.656387   
Bois*                           Maison                              1197.667239   
Fioul domestique                Maison                              2867.700000   
Gaz de pétrole liquéfié         Maison                               391.261813   

                                                  Taux du parc en %  
Energie principa

#### De-aggregate stock_buildings rows

##### Preparing data_fuel to de-aggregate

In [8]:
data_fuel = data_fuel.to_frame().pivot_table(columns='Heating energy', index='Housing type')
data_fuel = pd.concat([data_fuel], keys=['Others'], names=['Heating energy'], axis=0)
print(data_fuel.style.format('{:.0%}'))

<pandas.io.formats.style.Styler object at 0x7fc9984688b0>


##### Combining data_fuel with stock_buildings

In [9]:
data_fuel_re = reindex_mi(data_fuel, stock_buildings.index)

# multiplication will remove other value than Others (need to be added back later)
stock_buildings_others = (stock_buildings * data_fuel_re.T).T
stock_buildings_others.dropna(inplace=True)
stock_buildings_others = stock_buildings_others.droplevel('Heating energy', axis=0).stack().iloc[:, 0]
stock_buildings_others = stock_buildings_others.reorder_levels(stock_buildings.index.names)
print(stock_buildings_others.head().to_frame().style.format('{:.0f}'))


stock_buildings = pd.concat((stock_buildings.loc[stock_buildings.index.get_level_values('Heating energy') != 'Others'], stock_buildings_others), axis=0)
print(stock_buildings.groupby('Heating energy').sum().to_frame().style.format('{:,.0f}'))
print('\n Total number of housing at this point {:,.0f}'.format(stock_buildings.sum()))


<pandas.io.formats.style.Styler object at 0x7fc9cadcafd0>
<pandas.io.formats.style.Styler object at 0x7fc9cadcaf10>

 Total number of housing at this point 26,698,691


### Add owner income as attribute for each building

Using another source of data, we add another level (or attribute) to building stocks: income owner.
Income owner is useful to determine socio-economic parameters like the interest rate or the investment duration.

#### Read data income owner

In [10]:
name_file = 'parclocatifprive_post48_revenusPB.csv'
data_income_owner = pd.read_csv(os.path.join(folder_input, name_file), sep=',', header=[0],
                                index_col=[2, 0, 3, 5, 6])
print(data_income_owner.head())

index_names = ['Housing type', 'Occupancy status', 'Income tenant', 'Heating energy', 'Energy performance']
data_income_owner.index.set_names(index_names, inplace=True)

data_income_owner.rename(columns={'DECILE_PB': 'Income owner'}, inplace=True)
data_income_owner.reset_index(inplace=True)
data_income_owner.set_index(index_names + ['Income owner'], inplace=True)

data_income_owner = data_income_owner.reset_index().replace(replace_dict, regex=True).set_index(data_income_owner.index.names).squeeze()


data_income_owner = data_income_owner.loc[data_income_owner.index.get_level_values('Income owner') != 'NC']
data_income_owner = data_income_owner.loc[:, 'NB_LOG']

print(data_income_owner.head())
print('\n Total number of housing at this point {:,.0f} - stock with income owner'.format(data_income_owner.sum()))

                                      DATECONST DECILE_PB    NB_LOG
NATLOC OCC DECILE ENERGIE ETIQUETTE                                
AP     LP  D1     Autres  A          Après 1948        D1  1.474775
MA     LP  D1     Autres  A          Après 1948        D1  9.673954
AP     LP  D2     Autres  A          Après 1948        D1  0.952759
MA     LP  D2     Autres  A          Après 1948        D1  9.703615
AP     LP  D3     Autres  A          Après 1948        D1  0.334222
Housing type   Occupancy status  Income tenant  Heating energy  Energy performance  Income owner
Multi-family   Privately rented  D1             Others          A                   D1              1.474775
Single-family  Privately rented  D1             Others          A                   D1              9.673954
Multi-family   Privately rented  D2             Others          A                   D1              0.952759
Single-family  Privately rented  D2             Others          A                   D1              

#### De-aggregate 'Others' to 'Wood fuel' and 'Oil fuel'

Exactly what it is done before for stock_buildings data.

In [11]:
data_fuel_re = reindex_mi(data_fuel, data_income_owner.index)
print(data_fuel_re.head().style.format('{:.0%}'))

# multiplication will remove other value than Others (need to be added back later)
data_income_owner_others = (data_income_owner * data_fuel_re.T).T
data_income_owner_others.dropna(inplace=True)
data_income_owner_others = data_income_owner_others.droplevel('Heating energy', axis=0).stack().iloc[:, 0]
data_income_owner_others = data_income_owner_others.reorder_levels(data_income_owner.index.names)


data_income_owner = pd.concat((data_income_owner.loc[data_income_owner.index.get_level_values('Heating energy') != 'Others'], data_income_owner_others), axis=0)
print(data_income_owner.head())

print('\n Total number of housing at this point {:,.0f} - stock with income owner'.format(data_income_owner.sum()))

<pandas.io.formats.style.Styler object at 0x7fc9d82495e0>
Housing type   Occupancy status  Income tenant  Heating energy  Energy performance  Income owner
Multi-family   Privately rented  D1             Electricity     A                   D1               34.508022
Single-family  Privately rented  D1             Electricity     A                   D1              101.310182
Multi-family   Privately rented  D2             Electricity     A                   D1               50.501777
Single-family  Privately rented  D2             Electricity     A                   D1              181.706836
Multi-family   Privately rented  D3             Electricity     A                   D1               52.694762
dtype: float64

 Total number of housing at this point 2,486,346 - stock with income owner


#### Get proportion of other attributes based on Income owner in order to add it to stock_buildings

In [12]:
data_income_owner_ = data_income_owner.copy()
data_income_owner_ = data_income_owner_.groupby(['Housing type', 'Occupancy status', 'Income owner', 'Income tenant']).sum()
share_income_tenant = (data_income_owner_.unstack('Income tenant').T / data_income_owner_.unstack('Income tenant').sum(axis=1)).T
print(share_income_tenant.head())
share_income_tenant.to_csv(os.path.join(folder_output, 'share_income_tenant.csv'))

Income tenant                                     D1       D10        D2  \
Housing type Occupancy status Income owner                                 
Multi-family Privately rented D1            0.149581  0.068497  0.149302   
                              D10           0.090687  0.105143  0.090000   
                              D2            0.175912  0.047592  0.164074   
                              D3            0.172281  0.034828  0.151458   
                              D4            0.147198  0.038975  0.107708   

Income tenant                                     D3        D4        D5  \
Housing type Occupancy status Income owner                                 
Multi-family Privately rented D1            0.103206  0.107108  0.093357   
                              D10           0.083294  0.093956  0.097789   
                              D2            0.110753  0.118910  0.119314   
                              D3            0.123091  0.116244  0.090400   
           

#### Get proportion of other attributes based on Income owner in order to add it to stock_buildings

In [13]:
share_income_owner = (data_income_owner_.unstack('Income owner').T / data_income_owner_.unstack('Income owner').sum(axis=1)).T
print(share_income_owner.head())

share_income_owner.to_csv(os.path.join(folder_output, 'share_income_owner.csv'))

Income owner                                       D1       D10        D2  \
Housing type Occupancy status Income tenant                                 
Multi-family Privately rented D1             0.049976  0.316608  0.042466   
                              D10            0.034957  0.560692  0.017549   
                              D2             0.051833  0.326489  0.041156   
                              D3             0.040265  0.339562  0.031219   
                              D4             0.038916  0.356705  0.031216   

Income owner                                       D3        D4        D5  \
Housing type Occupancy status Income tenant                                 
Multi-family Privately rented D1             0.045133  0.044297  0.055800   
                              D10            0.013937  0.017916  0.028036   
                              D2             0.041229  0.033680  0.054267   
                              D3             0.037654  0.041916  0.047862  

In [14]:

# multiplication will remove other value than Landlords (need to be added back later)
stock_buildings_landlords = (stock_buildings * reindex_mi(share_income_owner, stock_buildings.index).T).T
stock_buildings_landlords = stock_buildings_landlords.stack()

stock_buildings_wolandlords = pd.concat([stock_buildings], keys=[float('nan')], names=['Income owner'])
stock_buildings_wolandlords = stock_buildings_wolandlords.loc[stock_buildings_wolandlords.index.get_level_values('Occupancy status') != 'Privately rented']
stock_buildings_wolandlords = stock_buildings_wolandlords.reorder_levels(stock_buildings_landlords.index.names)
stock_buildings = pd.concat((stock_buildings_wolandlords, stock_buildings_landlords), axis=0)
print(stock_buildings)
print('\n Total number of housing at this point {:,.0f}'.format(stock_buildings.sum()))

Housing type   Occupancy status  Income tenant  Heating energy  Energy performance  Income owner
Multi-family   Social-housing    D1             Electricity     A                   NaN             4162.321113
Single-family  Social-housing    D1             Electricity     A                   NaN             1618.652411
Multi-family   Owner-occupied    D1             Electricity     A                   NaN             1154.718085
Single-family  Owner-occupied    D1             Electricity     A                   NaN             5032.777538
Multi-family   Social-housing    D2             Electricity     A                   NaN             4298.192125
                                                                                                       ...     
Single-family  Privately rented  D10            Wood fuel       G                   D5                20.603479
                                                                                    D6                25.586547
       

#### Handle Owner-occupied and social-housing

1. Setting income owner = income occupant when occupancy status = 'Owner-occupied'
2. Setting income owner = D10 when occupancy status = 'Social-housing'

In [15]:
stock_buildings = stock_buildings.reset_index()

# Owner-occupied
stock_buildings.loc[stock_buildings.loc[:, 'Occupancy status'] == 'Owner-occupied', 'Income owner'] = stock_buildings.loc[stock_buildings.loc[:, 'Occupancy status'] == 'Owner-occupied', 'Income tenant']

# social-housing
temp = stock_buildings.loc[stock_buildings.loc[:, 'Occupancy status'] == 'Social-housing', 'Income owner']
stock_buildings.loc[stock_buildings.loc[:, 'Occupancy status'] == 'Social-housing', 'Income owner'] = ['D10'] * len(temp)

index_names = ['Housing type', 'Energy performance', 'Heating energy', 'Occupancy status', 'Income owner', 'Income tenant']
stock_buildings = stock_buildings.set_index(index_names).iloc[:, 0].rename('Stock buildings')

# stock_buildings.to_pickle(os.path.join(folder_output, 'building_stock_sdes2018.pkl'), protocol=4)
stock_buildings.to_csv(os.path.join(folder_output, 'building_stock_sdes2018_aggregated.csv'))

print(stock_buildings.to_frame())
print('\n Total number of housings {:,.0f}'.format(stock_buildings.sum()))

                                                                                             Stock buildings
Housing type  Energy performance Heating energy Occupancy status Income owner Income tenant                 
Multi-family  A                  Electricity    Social-housing   D10          D1                 4162.321113
Single-family A                  Electricity    Social-housing   D10          D1                 1618.652411
Multi-family  A                  Electricity    Owner-occupied   D1           D1                 1154.718085
Single-family A                  Electricity    Owner-occupied   D1           D1                 5032.777538
Multi-family  A                  Electricity    Social-housing   D10          D2                 4298.192125
...                                                                                                      ...
Single-family G                  Wood fuel      Privately rented D5           D10                  20.603479
                   

In [16]:
d = stock_buildings.xs('Multi-family', level='Housing type').groupby('Heating energy').sum()
print(d)

d = stock_buildings.xs('Single-family', level='Housing type').groupby('Heating energy').sum()
print(d)

Heating energy
Electricity    4.964876e+06
Natural gas    6.274891e+06
Oil fuel       5.065432e+05
Wood fuel      4.434032e+04
Name: Stock buildings, dtype: float64
Heating energy
Electricity    5.402671e+06
Natural gas    5.343517e+06
Oil fuel       2.935760e+06
Wood fuel      1.226092e+06
Name: Stock buildings, dtype: float64
