In [150]:
import pandas as pd
import importlib as il
import sys, os

src = os.path.abspath(os.path.join('..', '..', 'src'))
sys.path.append(src)

# project modules
import ingest.ingest as ing
import modules.aggregate as agg
from helpers import show_all

In [2]:
il.reload(ing)
data_dir = '../ingest/data_in/'
schema_file = '../ingest/schema.json'
ingest = ing.Ingest(schema_file)
parsed_datasets = ingest.read_and_parse(data_dir)

Dataset: areas_countries_data
iso3_code     string
m2_area      float64
dtype: object
(256, 2)
***

Dataset: areas_zones_data
iso3_code        string
climate_zone      int64
m2_area         float64
dtype: object
(366, 3)
***

Dataset: building_statistics_fin
observation_year      int64
gross_floor_area    float64
dtype: object
(6, 2)
***

Dataset: climatetrace_countries
country_name    string
iso3_code       string
dtype: object
(252, 2)
***

Dataset: countries_data
iso3_code           string
pop_density        float64
emission_factor    float64
dtype: object
(256, 3)
***

Dataset: heating_demand_data
climate_zone               int64
heating_demand_factor    float64
dtype: object
(10, 2)
***

Dataset: manual_continents_data
iso3_code         string
continent_name    string
dtype: object
(27, 2)
***

Dataset: ne_countries_continents
continent_name    string
iso3_code         string
dtype: object
(255, 2)
***

Dataset: on_site_heat_data
continent_name     string
oheg_factor       float64

In [152]:
print('Imported datasets:\n', ingest.ds_names)

Imported datasets:
 ['areas_countries_data', 'areas_zones_data', 'building_statistics_fin', 'climatetrace_countries', 'countries_data', 'heating_demand_data', 'manual_continents_data', 'ne_countries_continents', 'on_site_heat_data']


In [None]:
areas_countries_data = parsed_datasets['areas_countries_data']
areas_zones_data = parsed_datasets['areas_zones_data']
building_statistics_fin = parsed_datasets['building_statistics_fin']
climatetrace_countries = parsed_datasets['climatetrace_countries']
countries_data = parsed_datasets['countries_data']
heating_demand_data = parsed_datasets['heating_demand_data']
manual_continents_data = parsed_datasets['manual_continents_data']
ne_countries_continents = parsed_datasets['ne_countries_continents']
on_site_heat_data = parsed_datasets['on_site_heat_data']

In [153]:
# combines continents is a full list of countries codes and continents
combined_continents = agg.Aggregate.combine_continents(ne_countries_continents, manual_continents_data, columns=['iso3_code'])

In [266]:
# based on the continent information, heated floor area factors are added
print('Heated floor area factors columns:\n',list(on_site_heat_data.columns))
# we are interested in the 'ohfa_factor', that is "On-site heated floor area factor,
# the percentage of non-district heated floor area, product of oheg and hfa"

df = combined_continents
other = on_site_heat_data
on = 'continent_name'
cols = 'ohfa_factor'
ohfa_factor_countries = agg.Aggregate.add_cols(df, other, cols=cols)

# check that all countries have ohfa factors:
print('Number of NA values in new column:\n', np.sum(ohfa_factor_countries[col].isna()))

Heated floor area factors columns:
 ['continent_name', 'oheg_factor', 'hfa_factor', 'ohfa_factor']


0

In [267]:
ohfa_factor_countries

Unnamed: 0,continent_name,iso3_code,ohfa_factor
0,Asia,IDN,0.56
1,Asia,MYS,0.56
2,South America,CHL,0.56
3,South America,BOL,0.56
4,South America,PER,0.56
...,...,...,...
272,Northern Europe,DNK,0.27
273,Northern Europe,FIN,0.27
274,Northern Europe,ISL,0.27
275,Northern Europe,NOR,0.27


In [None]:
# based on the continent information, heated floor area factors are added
print('Heated floor area factors columns:\n',list(on_site_heat_data.columns))
# we are interested in the 'ohfa_factor', that is "On-site heated floor area factor,
# the percentage of non-district heated floor area, product of oheg and hfa"

df = ohfa_factor_countries
other = on_site_heat_data
on = 'continent_name'
cols = 'ohfa_factor'
ohfa_factor_countries = agg.Aggregate.add_cols(df, other, cols=cols)

# check that all countries have ohfa factors:
print('Number of NA values in new column:\n', np.sum(ohfa_factor_countries[col].isna()))

In [4]:
areas_countries_data

Unnamed: 0,iso3_code,m2_area
0,LBY,1.616064e+12
1,CRI,5.117676e+10
2,LBN,1.023806e+10
3,VIR,3.621598e+08
4,LBR,9.591809e+10
...,...,...
251,RWA,2.527768e+10
252,PAN,7.510388e+10
253,TZA,9.405763e+11
254,ZMB,7.504916e+11


In [5]:
areas_zones_data

Unnamed: 0,iso3_code,climate_zone,m2_area
0,LBY,3,8.928505e+11
1,LBY,1,2.291252e+10
2,LBY,2,7.003007e+11
3,CRI,1,5.113669e+10
4,LBN,3,1.023806e+10
...,...,...,...
361,PAN,1,7.505592e+10
362,TZA,1,9.323747e+11
363,ZMB,1,7.504916e+11
364,AGO,1,1.237890e+12


In [6]:
building_statistics_fin

Unnamed: 0,observation_year,gross_floor_area
0,2015,466737473.0
1,2016,474052712.0
2,2017,482457803.0
3,2018,488370838.0
4,2019,494625956.0
5,2020,505285969.0


In [7]:
climatetrace_countries['country_name'].unique()

<StringArray>
[                       'Aruba',                  'Afghanistan',
                       'Angola',                     'Anguilla',
                'Åland Islands',                      'Albania',
                      'Andorra',         'Netherlands Antilles',
         'United Arab Emirates',                    'Argentina',
 ...
       'British Virgin Islands', 'United States Virgin Islands',
                     'Viet Nam',                      'Vanuatu',
    'Wallis and Futuna Islands',                        'Samoa',
                        'Yemen',                 'South Africa',
                       'Zambia',                     'Zimbabwe']
Length: 252, dtype: string

In [8]:
countries_data

Unnamed: 0,iso3_code,pop_density,emission_factor
0,ABW,0.000624,199.4
1,AFG,0.000049,199.4
2,AGO,0.000023,199.4
3,AIA,0.000140,199.4
4,ALA,0.000019,199.4
...,...,...,...
251,XSP,0.000000,199.4
252,YEM,0.000064,199.4
253,ZAF,0.000048,199.4
254,ZMB,0.000022,199.4


In [9]:
heating_demand_data

Unnamed: 0,climate_zone,heating_demand_factor
0,1,2e-05
1,2,2.3e-05
2,3,5.2e-05
3,4,0.000133
4,5,0.00016
5,6,0.000241
6,7,0.000159
7,8,0.00023
8,9,0.000385
9,10,0.000245


In [12]:
on_site_heat_data

Unnamed: 0,continent_name,oheg_factor,hfa_factor,ohfa_factor
0,Africa,0.9,0.5,0.45
1,Antarctica,0.0,0.0,0.0
2,Asia,0.8,0.7,0.56
3,Europe,0.6,0.9,0.54
4,,0.0,0.0,0.0
5,North America,0.7,0.9,0.63
6,Northern Europe,0.3,0.9,0.27
7,Oceania,0.8,0.5,0.4
8,Seven seas (open ocean),0.0,0.0,0.0
9,South America,0.8,0.7,0.56


In [13]:
countries_data.join(manual_continentsset_index.set_index('iso3_code')['continent_name'], on='iso3_code')

NameError: name 'manual_continentsset_index' is not defined

In [None]:
df = areas_zones_data.join(climatetrace_countries.set_index('iso3_code'), on='iso3_code')
a = set(countries_data['iso3_code']).difference(df['iso3_code'])
b = set(climatetrace_countries['iso3_code']).difference(df['iso3_code'])
#a = set([1,2,3])
#b = set([2,3,4])
a.difference(b)
set(climatetrace_countries['iso3_code']).difference(countries_data['iso3_code'])
#set(df['country_name']).difference(climatetrace_countries['country_name'])

In [167]:
cols = ['country_name']
df.set_index('iso3_code')[cols]


Unnamed: 0_level_0,country_name
iso3_code,Unnamed: 1_level_1
ABW,Aruba
AFG,Afghanistan
AFG,Afghanistan
AFG,Afghanistan
AGO,Angola
...,...
ZAF,South Africa
ZAF,South Africa
ZAF,South Africa
ZMB,Zambia


In [32]:
 with pd.option_context(None, None):
    display(climatetrace_countries.join(areas_zones_data.set_index('iso3_code'), on='iso3_code'))

TypeError: first argument must be string or compiled pattern

In [44]:
ct_areas_zones = climatetrace_countries.join(areas_zones_data.set_index('iso3_code'), on='iso3_code')
set(climatetrace_countries['iso3_code']).difference(ct_areas_zones['iso3_code'])

set()

In [40]:
import helpers as h
il.reload(h)
h.show_all(df)


Unnamed: 0,country_name,iso3_code,climate_zone,m2_area
0,Aruba,ABW,1.0,123830500.0
1,Afghanistan,AFG,3.0,622801200000.0
1,Afghanistan,AFG,2.0,19482730000.0
1,Afghanistan,AFG,5.0,1573999000.0
2,Angola,AGO,1.0,1237890000000.0
2,Angola,AGO,2.0,9410578000.0
3,Anguilla,AIA,1.0,83303310.0
4,Åland Islands,ALA,4.0,1074806000.0
4,Åland Islands,ALA,5.0,1379091.0
5,Albania,ALB,3.0,19043690000.0
