In [1]:
from pandas import DataFrame

def get_population_electricity_consumption_table(target_year):
    return DataFrame()

target_year = 2020
population_electricity_consumption_table = get_population_electricity_consumption_table(target_year)
population_electricity_consumption_table[:2]

In [2]:
from os.path import expanduser, join
from pandas import read_csv
source_folder = expanduser('~/Projects/infrastructure-planning/datasets')
country_name_variation_table_path = join(
    source_folder, 'world-country-name-variation.csv')
country_region_income_table_path = join(
    source_folder, 'world-country-region-income.csv')
electricity_consumption_per_capita_by_year_table_path = join(
    source_folder, 'world-electricity-consumption-per-capita-by-year.csv')
population_by_year_by_country_table_path = join(
    source_folder, 'world-population-by-year-by-country.csv')

In [3]:
country_name_variation_table = read_csv(country_name_variation_table_path)
country_name_variation_table[:2]

Unnamed: 0,World Bank,United Nations
0,"Bahamas, The",Bahamas
1,Bolivia,Bolivia (Plurinational State of)


In [4]:
from StringIO import StringIO
country_region_income_table = read_csv(StringIO(open(country_region_income_table_path, 'r').read().decode('utf-8-sig')))
country_region_income_table[:2]

Unnamed: 0,Country Name,Country Code,Region,IncomeGroup,SpecialNotes,Unnamed: 5
0,Aruba,ABW,Latin America & Caribbean,High income: nonOECD,SNA data for 2000-2011 are updated from offici...,
1,Afghanistan,AFG,South Asia,Low income,Fiscal year end: March 20; reporting period fo...,


In [5]:
population_by_year_by_country_table = read_csv(population_by_year_by_country_table_path)
population_by_year_by_country_table[:2]

Unnamed: 0,Country or Area,Year(s),Variant,Value
0,Afghanistan,2100,Const. mortality (Medium),40190.28
1,Afghanistan,2100,Constant-fertility scenario,845843.367


In [6]:
electricity_consumption_per_capita_by_year_table = read_csv(
    electricity_consumption_per_capita_by_year_table_path, skiprows=3)
electricity_consumption_per_capita_by_year_table[232:234][['Country Name', '2000', '2005', '2010']]

Unnamed: 0,Country Name,2000,2005,2010
232,Uruguay,2030.264582,1999.634353,2803.153377
233,United States,13671.052024,13704.577048,13394.014607


In [7]:
# List output table columns
# Country, Region, Income Group, Year, Population, Electricity Consumption Per Capita, Electricity Consumption

In [55]:
united_nations_country_names = population_by_year_by_country_table['Country or Area'].unique()
world_bank_country_names = country_region_income_table['Country Name'].unique()

def get_united_nations_country_name(world_bank_country_name):
    t = country_name_variation_table
    try:
        return t[t['World Bank'] == world_bank_country_name]['United Nations'].values[0]
    except IndexError:
        pass
    if world_bank_country_name not in united_nations_country_names:
        raise ValueError
    return world_bank_country_name

def get_world_bank_country_name(united_nations_country_name):
    t = country_name_variation_table
    try:
        return t[t['United Nations'] == united_nations_country_name]['World Bank'].values[0]
    except IndexError:
        pass
    if united_nations_country_name not in world_bank_country_names:
        raise ValueError
    return united_nations_country_name

print get_world_bank_country_name('Bahamas')
print get_world_bank_country_name('Angola')
print get_united_nations_country_name('Bahamas, The')

Bahamas, The
Angola
Bahamas


In [74]:
# Get countries from United Nations
united_nations_country_name = united_nations_country_names[111]
world_bank_country_name = get_world_bank_country_name(united_nations_country_name)
print united_nations_country_name
print world_bank_country_name

Ireland
Ireland


In [75]:
# Get actual and estimated populations for selected country
t = population_by_year_by_country_table
country_t = t[t['Country or Area'] == united_nations_country_name]
country_t[:2]

Unnamed: 0,Country or Area,Year(s),Variant,Value
18071,Ireland,2100,Const. mortality (Medium),5738.911
18072,Ireland,2100,Constant-fertility scenario,6732.225


In [76]:
# Get the earliest year when the United Nations starts estimating populations
earliest_estimated_year = min(country_t[country_t['Variant'] == 'Low variant']['Year(s)'])
earliest_estimated_year

2010

In [77]:
# Get actual populations for selected country
year_packs = country_t[country_t['Year(s)'] < earliest_estimated_year][[
    'Year(s)', 'Value',
]].values
year_packs

array([[ 2005.   ,  4158.002],
       [ 2000.   ,  3803.741],
       [ 1995.   ,  3610.918],
       [ 1990.   ,  3531.185],
       [ 1985.   ,  3534.064],
       [ 1980.   ,  3417.754],
       [ 1975.   ,  3183.722],
       [ 1970.   ,  2963.004],
       [ 1965.   ,  2874.705],
       [ 1960.   ,  2828.866],
       [ 1955.   ,  2901.138],
       [ 1950.   ,  2913.093]])

In [78]:
# Estimate population for the given year
# Use the spline interpolation regression extrapolation model
from infrastructure_planning.growth.interpolated import get_interpolated_spline_extrapolated_linear_function
estimate_population = get_interpolated_spline_extrapolated_linear_function(year_packs)
population = estimate_population(target_year)
population

array(4499.1355874125875)

In [79]:
# Get electricity consumption per capita for provided years
t = electricity_consumption_per_capita_by_year_table
country_t = t[t['Country Name'] == world_bank_country_name]
country_t

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,Unnamed: 60
101,Ireland,IRL,Electric power consumption (kWh per capita),EG.USE.ELEC.KH.PC,695.043484,752.018128,838.13755,886.193539,1041.670301,1134.39683,...,6218.768058,6340.733045,5947.909489,5910.983289,5661.168058,5664.831802,,,,


In [80]:
# Get year_packs
import numpy as np
year_packs = []
for column_name in country_t.columns:
    try:
        year = int(column_name)
    except ValueError:    
        continue
    value = country_t[column_name].values[0]
    if np.isnan(value):
        continue
    year_packs.append((year, value))
year_packs[:3]

[(1960, 695.04348440924809),
 (1961, 752.01812774394602),
 (1962, 838.13755046631809)]

In [81]:
# Estimate electricity consumption per capita for the given year
from infrastructure_planning.exceptions import EmptyDataset

try:
    estimate_electricity_consumption_per_capita = get_interpolated_spline_extrapolated_linear_function(
        year_packs)
except EmptyDataset as e:
    raise
    
electricity_consumption_per_capita = estimate_electricity_consumption_per_capita(target_year)
electricity_consumption_per_capita

array(6596.209336832399)

In [84]:
# Multiply population by electricity consumption per capita to get electricity consumption
electricity_consumption = population * electricity_consumption_per_capita
electricity_consumption

29677240.169365827

In [85]:
# Save table

In [None]:
def get_population_electricity_consumption_table(target_year):
    pass