In [None]:
import os

def scale_input_data(scale_factor):
  file_bases = ['./input/singapore_building_gfa_energy', './input/energy_performance_data_2016']
  for file_base in file_bases:
    import pandas as pd
    import shutil
    if scale_factor == 1.0:
      shutil.copyfile(file_base + '.csv', file_base + '.scaled.csv')
      continue
    df_to_scale = pd.read_csv(file_base + '.csv')
    new_num_rows = int(scale_factor * len(df_to_scale))
    if scale_factor <= 1.0:
      df_to_scale = df_to_scale.iloc[:new_num_rows]
    else:
      while len(df_to_scale) < new_num_rows:
        df_to_scale = pd.concat([df_to_scale, df_to_scale[:min(new_num_rows - len(df_to_scale), len(df_to_scale))]])
    df_to_scale.to_csv(file_base + '.scaled.csv', index=False)

if 'INPUT_SCALE_FACTOR' in os.environ:
  scale_input_data(float(os.environ['INPUT_SCALE_FACTOR']))

In [1]:
import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
exec(os.environ['IREWR_IMPORTS'])

In [2]:
#load the data
# ALEX: remove IPython commands
# !cp -r ../input/sgp-buildings/* ./

In [3]:
tables=  {'total':{'csv_name':'./input/singapore_building_gfa_energy.scaled.csv'},'commercial':{'csv_name':'./input/energy_performance_data_2016.scaled.csv'}}
for t in tables:
    tables[t]['tbl'] = pd.read_csv(tables[t]['csv_name'])

column_key = {'buildingtype':'property type','grossfloorarea':'GFA m2','2016energyusintensity':'kWh/m2/yr'}
fields = ['property type','GFA m2','kWh/m2/yr']
tables['commercial']['tbl'].rename(columns=column_key,inplace=True)
comm = tables['commercial']['tbl'][fields].copy()

for f in ['GFA m2','kWh/m2/yr']:
    comm[f] = comm[f].str.replace(',','').astype(float)

comm['kWh/yr'] = comm['GFA m2']*comm['kWh/m2/yr']

In [4]:
comm['property type'].unique()

array(['Office', 'Hotel', 'Retail', 'Mixed Development'], dtype=object)

In [5]:
pct_OK = len(comm.dropna())/len(comm)
pct_OK

0.7626582278481012

In [6]:
comm.dropna(inplace=True)

In [7]:
comm.head()

Unnamed: 0,property type,GFA m2,kWh/m2/yr,kWh/yr
0,Office,62469.0,188.0,11744172.0
1,Office,169727.0,245.0,41583115.0
2,Office,84229.0,140.0,11792060.0
3,Office,43006.0,202.0,8687212.0
4,Office,16519.0,201.0,3320319.0


In [8]:
pvt = pd.pivot_table(comm,index='property type',values=['GFA m2','kWh/yr'],aggfunc='sum')
pvt['kWh/m2/yr'] = pvt['kWh/yr']/pvt['GFA m2']

In [9]:
total = tables['total']['tbl'].copy()
total.rename(columns={'kWh/yr/m2':'kWh/m2/yr'},inplace=True)
total['GWh/yr'] = total['mil m2']*total['kWh/m2/yr']
comm_totals = total[total['property type']=='commercial'].iloc[0]

pvt_scaled = pvt.copy()
pvt_scaled['mil m2'] = pvt['GFA m2']*comm_totals['mil m2']/pvt['GFA m2'].sum()
pvt_scaled['GWh/yr'] = pvt['kWh/yr']*comm_totals['GWh/yr']/pvt['kWh/yr'].sum()

pvt_scaled

Unnamed: 0_level_0,GFA m2,kWh/yr,kWh/m2/yr,mil m2,GWh/yr
property type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Hotel,1598437.0,445428100.0,278.664756,2.597038,707.4459
Mixed Development,2358109.0,697655900.0,295.853966,3.831304,1108.043825
Office,7314578.0,1817305000.0,248.449752,11.884257,2886.313555
Retail,2885024.0,1040837000.0,360.772285,4.687402,1653.096719


In [10]:
fields = ['mil m2','kWh/m2/yr','GWh/yr']

ex_com = total.drop(total[total['property type']=='commercial'].index)
total = pd.concat([ex_com,pvt_scaled[fields].reset_index()],axis=0)

total

Unnamed: 0,property type,mil m2,kWh/m2/yr,GWh/yr
0,residential,112.0,50.0,5600.0
2,healthcare,2.2,267.7,588.94
3,education,4.2,224.8,944.16
4,civil / community,1.4,115.7,161.98
5,sports rec,1.1,201.8,221.98
6,transport,1.4,265.7,371.98
0,Hotel,2.597038,278.664756,707.4459
1,Mixed Development,3.831304,295.853966,1108.043825
2,Office,11.884257,248.449752,2886.313555
3,Retail,4.687402,360.772285,1653.096719


In [11]:
pop_mil = 5.6

pca = total.copy()
pca['m2/ca'] = total['mil m2']/pop_mil
pca['kWh/ca/yr'] = total['GWh/yr']/pop_mil

del pca['mil m2'], pca['GWh/yr']

In [12]:
pca.sort_values('kWh/ca/yr',ascending=False)

Unnamed: 0,property type,kWh/m2/yr,m2/ca,kWh/ca/yr
0,residential,50.0,20.0,1000.0
2,Office,248.449752,2.122189,515.413135
3,Retail,360.772285,0.837036,295.195843
1,Mixed Development,295.853966,0.684161,197.864969
3,education,224.8,0.75,168.6
0,Hotel,278.664756,0.463757,126.329625
2,healthcare,267.7,0.392857,105.167857
6,transport,265.7,0.25,66.425
5,sports rec,201.8,0.196429,39.639286
4,civil / community,115.7,0.25,28.925


In [13]:
# ALEX: remove plotting
# import seaborn as sb
# sb.barplot(x='kWh/ca/yr',y='property type',data=pca.sort_values('kWh/ca/yr',ascending=False))
_ = pca.sort_values('kWh/ca/yr',ascending=False)

In [14]:
pca['kWh/ca/yr'].sum()

2543.5607142857148

In context, avg electricity consumption is 9,000 kWh/ca/yr of which building energy presented here is only ~ 28% and 500 kWh/ca/yr is from transport so the balance 6000 kWh/ca/yr is mostly from manufacturing.  The figure of 9,000 kWh/ca/yr is electricity only and does not include energy for transport using personal vehicles, taxis and public buses nor does it include holiday travel flights.  As a benchmark comparison from the HDI vs energy usage, typical energy usage for a country with HDI > 0.8 is in the range 2,000 - 4,000 kWh/ca/yr inclusive of industrial and transport energy.  

A quick scan of the building energy use highlights that home energy is the highest contributor at 1,000 kWh/ca/yr, so for buildings this is a reasonable starting point for efficiency improvement.   

Another question to consider is to look at the split of where a typical resident's energy usage is coming from, is it while they are at home, work, errands or leisure, considering for some simplifying assumptions about their time usage at each of these locations.

In [15]:
weekly_hrs = 168
m2_factors = dict(zip(list(pca['property type']),list(pca['m2/ca'])))
usage_factors = {
    'residential':(12*7+4*2)/weekly_hrs,
    'Office':0.35*40/weekly_hrs,
    'Retail':0.15*40/weekly_hrs,
    'Mixed Development':0.05*40/weekly_hrs,
    'education':0.25*40*9/12/weekly_hrs,
    'Hotel':0.05*40/weekly_hrs,
    'healthcare':0.05*40/weekly_hrs,
    'transport':0.02*40/weekly_hrs,
    'sports rec':0,
    'civil / community':0,
}
out_of_home_hrs = 16/weekly_hrs #excludes time in transit
out_of_home = ['Retail','Mixed Development','healthcare','transport',
          'sports rec','civil / community']
# ALEX: make notebook run with data scaling
out_of_home = [elem for elem in out_of_home if elem in m2_factors]
out_of_home_m2 = dict(zip(out_of_home,[m2_factors[x] for x in out_of_home]))
total_ooh_m2 = sum(out_of_home_m2.values())

for f in out_of_home:
    usage_factors[f] = usage_factors[f] + out_of_home_hrs*out_of_home_m2[f]/total_ooh_m2
usage_factors

{'residential': 0.5476190476190477,
 'Office': 0.08333333333333333,
 'Retail': 0.06625181869405972,
 'Mixed Development': 0.0368649810770814,
 'education': 0.044642857142857144,
 'Hotel': 0.011904761904761904,
 'healthcare': 0.026237344657090392,
 'transport': 0.013882639240659253,
 'sports rec': 0.007166291376164244,
 'civil / community': 0.009120734478754491}

In [16]:
hours_per_year = 24*365
usg = pd.DataFrame({'hrs/ca':usage_factors})
usg.index.name = 'property type'
df = pd.merge(pca,usg.reset_index(),on='property type')
df['W/ca'] = df['kWh/ca/yr']/(df['hrs/ca']*hours_per_year)*1000
df.sort_values('W/ca',ascending=False)

Unnamed: 0,property type,kWh/m2/yr,m2/ca,kWh/ca/yr,hrs/ca,W/ca
6,Hotel,278.664756,0.463757,126.329625,0.011905,1211.379967
8,Office,248.449752,2.122189,515.413135,0.083333,706.04539
4,sports rec,201.8,0.196429,39.639286,0.007166,631.432966
7,Mixed Development,295.853966,0.684161,197.864969,0.036865,612.704104
5,transport,265.7,0.25,66.425,0.013883,546.204682
9,Retail,360.772285,0.837036,295.195843,0.066252,508.637442
1,healthcare,267.7,0.392857,105.167857,0.026237,457.571576
2,education,224.8,0.75,168.6,0.044643,431.123288
3,civil / community,115.7,0.25,28.925,0.009121,362.025739
0,residential,50.0,20.0,1000.0,0.547619,208.457415


In [17]:
# ALEX: remove plotting
# sb.barplot(x='W/ca',y='property type',
#            data=df.drop(df[df['property type']=='Hotel'].index).sort_values('W/ca',ascending=False))
_ = df.drop(df[df['property type']=='Hotel'].index).sort_values('W/ca',ascending=False)

Based on the weights input provided, after adjusting for the fact that more time is spent in the home other out of home energy uses appear more intense on a time-weighted basis.  The actual distribution between these sectors would require a more detailed validation of the usage weights.