In [135]:
from project.utils import get_pandas
import pandas as pd
import os

# Merging SDES database with Profeel to get households information
The method is to match the sdes archetypes to a Profeel one according to the values for the three index levels: Housing types, Energy performance, heating energy.
And then we calculate among a group the weight of the sdes archetypes in terms of their sdes's size estimate. Finally, this weights allow us to get the size of each lines in terms of Profeel stock.

The merging rule is to conserve, from the database of origin (SDES), the proportions of each household characteristics among a group of buildings which have same basic building characteristics.

To be change:
- We can choose another profeel database, with no ventilation or with other energy performance estimates for example
- We can put the output in another place than analysis/preprocessing/building stock/output

In [136]:
profeel = get_pandas('project/input/stock/building_stock_profeel_vent.csv', lambda x: pd.read_csv(x, index_col=[1]).squeeze())
sdes = get_pandas('project/input/stock/building_stock_sdes2018_aggregated.csv', lambda x: pd.read_csv(x, index_col=[0, 1, 2]).squeeze())

profeel = profeel.rename(columns={'calculated_epc':'Energy performance'})
profeel['Heating energy'] = profeel['Heating system'].str.split('-').str[0]
profeel = profeel.set_index(['Heating energy', 'Energy performance'], append=True)

In [137]:
merged = profeel.merge(sdes, how='inner', on=['Housing type', 'Energy performance', 'Heating energy'])
# Adding a row specific index level
r_position = pd.Series(range(0,merged.shape[0]), name='r_position')
merged = merged.set_index(r_position, append=True)

# Weight of each lines in terms of sdes's sizes (y) among a profeel archetype uniquely define by its class label
merged = merged.set_index('Class', append=True)
merged['Stock buildings_y'] = merged['Stock buildings_y'].T / merged.groupby(level='Class')['Stock buildings_y'].sum()

# Multiplying the weights to the profeel's stocks (x)
merged['Stock buildings'] = (merged['Stock buildings_y'].T * merged['Stock buildings_x']).T
cleaned = merged.drop(labels=['Stock buildings_x', 'Stock buildings_y'], axis=1)
cleaned

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Heating system,Wall,Floor,Roof,Windows,Ventilation,Efficiency,energy_primary,Occupancy status,Income owner,Income tenant,Stock buildings
Housing type,Energy performance,Heating energy,r_position,Class,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Single-family,F,Wood fuel,0,MI-1,Wood fuel-Standard boiler,2.50,2.00,2.5,2.4,0.4,0.75,423.726894,Social-housing,D10,D1,957.121031
Single-family,F,Wood fuel,1,MI-1,Wood fuel-Standard boiler,2.50,2.00,2.5,2.4,0.4,0.75,423.726894,Owner-occupied,D1,D1,35153.671754
Single-family,F,Wood fuel,2,MI-1,Wood fuel-Standard boiler,2.50,2.00,2.5,2.4,0.4,0.75,423.726894,Social-housing,D10,D2,1189.894764
Single-family,F,Wood fuel,3,MI-1,Wood fuel-Standard boiler,2.50,2.00,2.5,2.4,0.4,0.75,423.726894,Owner-occupied,D2,D2,45195.093973
Single-family,F,Wood fuel,4,MI-1,Wood fuel-Standard boiler,2.50,2.00,2.5,2.4,0.4,0.75,423.726894,Social-housing,D10,D3,1012.741570
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Multi-family,B,Electricity,4675,LC24,Electricity-Performance boiler,0.34,0.25,0.2,1.6,0.2,0.95,88.607812,Privately rented,D5,D10,2483.498169
Multi-family,B,Electricity,4676,LC24,Electricity-Performance boiler,0.34,0.25,0.2,1.6,0.2,0.95,88.607812,Privately rented,D6,D10,3367.205202
Multi-family,B,Electricity,4677,LC24,Electricity-Performance boiler,0.34,0.25,0.2,1.6,0.2,0.95,88.607812,Privately rented,D7,D10,4713.874646
Multi-family,B,Electricity,4678,LC24,Electricity-Performance boiler,0.34,0.25,0.2,1.6,0.2,0.95,88.607812,Privately rented,D8,D10,7331.216560


In [138]:
# Checks
print("profeel building stock: {}".format(profeel['Stock buildings'].sum()))
print("sdes building stock: {}".format(sdes['Stock buildings'].sum()))
print("The number of new lines for each profeel archetypes: {}".format(merged.shape[0]/profeel.shape[0]))
print("merged building stock: {}".format(merged['Stock buildings_x'].sum()/(merged.shape[0]/profeel.shape[0])))
print("output building stock: {}".format(cleaned['Stock buildings'].sum()))
print("building stock loss: {}".format(profeel['Stock buildings'].sum() - cleaned['Stock buildings'].sum()))

profeel building stock: 35172600
sdes building stock: 26698690.691502385
The number of new lines for each profeel archetypes: 120.0
merged building stock: 35172600.0
output building stock: 35172600.0
building stock loss: 0.0


In [139]:
path_output = os.path.join(os.getcwd(), 'preprocessing/building_stock/output')
cleaned.to_csv(os.path.join(path_output, 'building_stock_profeel_hhdata.csv'))