In [146]:
import pandas as pd
import re

Re: Building a master TAM table:

All of our scraped/flatttened data is here: https://drive.google.com/file/d/1OXLfNI4MPoK9r4FTe2bOy0U-acvp2BsE/view?usp=sharing
Essentially, we want a table containing all of the electricity generation info, with these columns: 

Source, 
Year generated, 
Measurement year, 
value, 
unit, 
geography, 
category (electricity), 
sector, 
tech_type, 
scenario, 
impact level (Low, Medium, High, Very High)

(and we discussed maybe multiple columns for geography to capture varying levels of granularity)
Things we may need to standardize: geography names, tech types, converting units so they're all the same (TWh)), 

interpolating the data so they are from 2014-2060 (using raw values where they exist)


## green peace

Year generated, 
use table number for this - unit, 
category (electricity) vs group, 
sector, 
impact level (Low, Medium, High, Very High)

In [147]:
table1 = pd.read_csv("interim/greenpeace_190501.csv", index_col = None)

In [148]:
table1.drop(columns = ["Unnamed: 0", "NumTable"], inplace = True)

In [149]:
table1["Value"] = table1["Value"].str.replace('[,%]', '').astype(float)

In [150]:
table1 = table1.dropna()

In [151]:
table1 = table1[table1["TableName"] == "electricity generation"]

In [152]:
table1.drop(columns = ["TableName"], inplace = True)

In [153]:
table1["source"] = "greenpeace"
table1["year_generated"] = 2015
table1["category"] = "electricity"
table1["unit"] = 'twh'
table1.unit[table1["Technology"] == "total res "] = "%"
table1.unit[table1["Technology"] == "res share "] = "%"

table1["impact"] = None

table1.impact[table1["Scenario"] == "reference"] = "medium"
table1.impact[table1["Scenario"] == "energy revolution"] = "high"
table1.impact[table1["Scenario"] == "advanced energy revolution"] = "very high"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pyda

In [154]:
table1.columns

Index(['Geography', 'Sector', 'Scenario', 'Technology', 'Year', 'Value',
       'source', 'year_generated', 'category', 'unit', 'impact'],
      dtype='object')

In [155]:
table1.columns = ["geography", "sector", "scenario", "tech_type", "measurement_year", "value", "source", "year_generated",  "category", 'unit', "impact"]

In [156]:
table1.geography.unique()

array(['global', 'oecd north america', 'latin america', 'oecd europe',
       'africa', 'middle east', 'eastern europe/eurasia', 'india',
       'other asia', 'china', 'oecd asia oceania'], dtype=object)

In [157]:
table1.geography.replace({'global': 'world'}, inplace = True)

In [158]:
table1.tech_type.unique()

array(['power plants', 'hard coal (& non-renewable waste)', 'lignite',
       'gas', 'of which from h2', 'oil', 'diesel', 'nuclear',
       'biomass (& renewable waste)', 'hydro', 'wind',
       'of which wind offshore', 'pv', 'geothermal',
       'solar thermal power plants', 'ocean energy',
       'combined heat and power plants', 'hydrogen',
       'main activity producers', 'autoproducers', 'total generation',
       'fossil', 'of which renewable h2',
       'renewables (w/o renewable hydrogen)', 'distribution losses',
       'own consumption electricity',
       'electricity for hydrogen production',
       'electricity for synfuel production',
       'final energy consumption (electricity)',
       'fluctuating res (pv, wind, ocean)', 'share of fluctuating res',
       'res share', 'power plants ', 'hard coal (& non-renewable waste) ',
       'lignite ', 'gas ', 'oil ', 'diesel ', 'nuclear ',
       'biomass (& renewable waste) ', 'hydro ', 'wind ',
       'of which wind offshore

## IEEJ

In [159]:
table2 = pd.read_csv("interim/final_IEEJ.csv", index_col = None)

In [160]:
table2.drop(columns = ["Unnamed: 0"], inplace = True)

In [161]:
table2 = table2[table2.group == 'Electricity Generation']
table2 = table2[(table2.scenario != 'No New Coal Plants (Natural Gas Substitution)')]
table2 = table2[(table2.scenario != 'No New Coal Plants (Renewables Substitution)')]

In [162]:
table2.scenario.unique()

array(['Reference', 'Advanced Technologies'], dtype=object)

In [163]:
table2["source"] = "IEEJ Outlook"
table2["year_generated"] = 2019
table2["category"] = 'electricity'
table2['impact'] = None

table2.impact[table2["scenario"] == "Reference"] = "low"
table2.impact[table2["scenario"] == "Advanced Technologies"] = "high"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [164]:
table2 = table2.applymap(lambda s:s.lower() if type(s) == str else s)

In [165]:
table2.geography.unique()

array(['world', 'asia', 'china', 'india', 'japan', 'korea',
       'chinese taipei', 'asean', 'indonesia', 'malaysia', 'myanmar',
       'phillipines', 'thailand', 'vietnam', 'north america',
       'united states', 'latin america', 'oecd europe', 'non-oecd europe',
       'european union', 'africa', 'middle east', 'oceania', 'oecd',
       'non-oecd'], dtype=object)

In [166]:
table2.geography.replace({'oecd': 'oecd2019'}, inplace = True)

In [167]:
table2.columns = ["tech_type", "geography", "sector", "scenario", "unit", "measurement_year", "value", "source", "year_generated", "category", "impact"]

In [168]:
table2

Unnamed: 0,tech_type,geography,sector,scenario,unit,measurement_year,value,source,year_generated,category,impact
21,total,world,electricity generation,reference,twh,1980,8283,ieej outlook,2019,electricity,low
22,coal,world,electricity generation,reference,twh,1980,3137,ieej outlook,2019,electricity,low
23,oil,world,electricity generation,reference,twh,1980,1659,ieej outlook,2019,electricity,low
24,natural gas,world,electricity generation,reference,twh,1980,999,ieej outlook,2019,electricity,low
25,nuclear,world,electricity generation,reference,twh,1980,713,ieej outlook,2019,electricity,low
26,hydro,world,electricity generation,reference,twh,1980,1717,ieej outlook,2019,electricity,low
27,geothermal,world,electricity generation,reference,twh,1980,14,ieej outlook,2019,electricity,low
28,solar pv,world,electricity generation,reference,twh,1980,-,ieej outlook,2019,electricity,low
29,wind,world,electricity generation,reference,twh,1980,-,ieej outlook,2019,electricity,low
30,csp and marine,world,electricity generation,reference,twh,1980,0.5,ieej outlook,2019,electricity,low


## shell_sky_2018

In [169]:
table3 = pd.read_csv("interim/shell_sky_2018.csv", index_col = None)

In [170]:
table3 = table3[(table3["group"] == "Electricity Total Final Consumption - By Source") | (table3["group"] == "Electricity Total Final Consumption - By Sector - Total Electricity")]

In [171]:
table3["group"][table3["group"] == "Electricity Total Final Consumption - By Sector - Total Electricity"] = table3["tech_type"][table3["group"] == "Electricity Total Final Consumption - By Sector - Total Electricity"]

In [172]:
table3.head(5)

Unnamed: 0,tech_type,measurement_year,value,group,unit,scenario,geography,year_generated,added_datetime,updated_datetime
4550,Oil,1980,4.902708,Electricity Total Final Consumption - By Source,EJ / year,sky,World,2018,2019-05-08 00:20:42.450203,2019-05-08 00:20:42.450833
4551,Biofuels,1980,0.0,Electricity Total Final Consumption - By Source,EJ / year,sky,World,2018,2019-05-08 00:20:42.450203,2019-05-08 00:20:42.450833
4552,Natural Gas,1980,2.91907,Electricity Total Final Consumption - By Source,EJ / year,sky,World,2018,2019-05-08 00:20:42.450203,2019-05-08 00:20:42.450833
4553,Biomass Gasified,1980,0.000114,Electricity Total Final Consumption - By Source,EJ / year,sky,World,2018,2019-05-08 00:20:42.450203,2019-05-08 00:20:42.450833
4554,Coal,1980,9.257011,Electricity Total Final Consumption - By Source,EJ / year,sky,World,2018,2019-05-08 00:20:42.450203,2019-05-08 00:20:42.450833


In [173]:
table3["source"] = "Shell"
table3["category"] = "electricity"
table3["unit"] = "twh"
table3["impact"] = "high"

In [174]:
cf_ej_to_twh = 277.778
table3.value = cf_ej_to_twh * table3.value

In [175]:
table3 = table3.applymap(lambda s:s.lower() if type(s) == str else s)

In [176]:
table3.geography.unique()

array(['world', 'north america', 'europe', 'eurasia',
       'developed asia pacific', 'china', 'india',
       'developing asia pacific', 'latin america', 'middle east',
       'africa', 'international marine bunkers'], dtype=object)

In [177]:
table3.columns = ['tech_type', 'measurement_year', 'value', 'sector', 'unit', 'scenario',
       'geography', 'year_generated', 'added_datetime', 'updated_datetime',
       'source', 'impact', 'category']

## EWG-LUT

In [334]:
table4 = pd.read_csv("interim/ewg-lut_100re_2019.csv", index_col = None)

In [335]:
table4 = table4[table4.group == 'Generation']

In [336]:
table4.drop(columns= ['group'], inplace = True)

In [337]:
table4.sector.unique()

array(['integrated', 'power&heat', 'transport', 'desalination'],
      dtype=object)

In [338]:
table4["source"] = "EWG-LUT"
table4["category"] = "electricity"
table4["impact"] = "very high"

In [339]:
table4 = table4.applymap(lambda s:s.lower() if type(s) == str else s)

In [323]:
table4.geography.unique()

array(['global', 'europe', 'eurasia', 'mena', 'ssa', 'saarc', 'ne-asia',
       'se-asia', 'n-am', 's-am'], dtype=object)

In [340]:
table4.geography.replace({'global': 'world', 'ssa':'sub-saharan africa', 'mena': 'middle east and north africa', 'ne-asia': 'northeast asia', 'se-asia':'southeast asia', 'n-am':'north america', 's-am':'south america'}, inplace = True)

In [187]:
table4.columns

Index(['tech_type', 'unit', 'measurement_year', 'value', 'sector', 'scenario',
       'geography', 'year_generated', 'added_datetime', 'updated_datetime',
       'source', 'category', 'impact'],
      dtype='object')

In [341]:
table4.unit.unique()

array(['[twh_el]', '[twh_th]', '[twh]', '[mtco2]'], dtype=object)

In [342]:
table4 = table4[table4.unit != "[mtco2]"]

In [326]:
table4.unit = "twh"

In [343]:
x = table4[table4.geography == 'europe']
x = x[x.sector == "desalination"]
len(x.tech_type.unique()), len(x.tech_type)
x

Unnamed: 0,tech_type,unit,measurement_year,value,sector,scenario,geography,year_generated,added_datetime,updated_datetime,source,category,impact
7856,pv fixed tilted,[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high
7857,pv single-axis,[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high
7858,pv prosumers,[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high
7859,wind onshore,[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high
7860,wind offshore,[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high
7861,hydro run-of-river,[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high
7862,hydro reservoir (dam),[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high
7863,biomass solid,[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high
7864,biomass chp,[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high
7865,waste-to-energy chp,[twh_el],2015,0.0,desalination,100re,europe,2019,2019-05-08 21:54:45.055532,2019-05-08 21:54:45.056158,ewg-lut,electricity,very high


## Equinor

In [191]:
table5 = pd.read_csv("interim/equinor_energy-perspectives_2018.csv", index_col = None)

In [192]:
table5 = table5[table5.group == "Power & heat generation"]

In [193]:
table5["sector"] = table5.group
table5.drop(columns= ['group'], inplace = True)

In [194]:
table5["source"] = "Equinor"
table5["category"] = "electricity"
table5["impact"] = None

table5.impact[table5["scenario"] == "Rivalry"] = "low"
table5.impact[table5["scenario"] == "Renewal"] = "medium"
table5.impact[table5["scenario"] == "Reform"] = "high"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [195]:
table5.value = table5.value*1000
table5.unit = 'twh'

In [196]:
table5 = table5.applymap(lambda s:s.lower() if type(s) == str else s)

In [197]:
table5.geography.unique()

array(['world'], dtype=object)

In [198]:
table5.columns

Index(['tech_type', 'value', 'measurement_year', 'scenario', 'unit',
       'geography', 'year_generated', 'added_datetime', 'updated_datetime',
       'sector', 'source', 'category', 'impact'],
      dtype='object')

## IEA_WEO

In [467]:
table6 = pd.read_csv("interim/iea_weo_2018.csv", index_col = None)

In [468]:
table6 = table6[table6.group == 'Electricity generation (TWh)']

In [469]:
table6["sector"] = table6["group"]
table6.drop(columns= ['group'], inplace = True)

In [470]:
table6.scenario.unique()

array(['New Policies Scenario', 'Current Policies Scenario',
       'Sustainable Development Scenario'], dtype=object)

In [471]:
table6["source"] = "IEA WEO"
table6["category"] = 'electricity'
table6['unit'] = 'twh'
table6['impact'] = None

table6.impact[table6["scenario"] == "Current Policies Scenario"] = "low"
table6.impact[table6["scenario"] == "New Policies Scenario"] = "medium"
table6.impact[table6["scenario"] == "Sustainable Development Scenario"] = "high"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [472]:
table6 = table6.applymap(lambda s:s.lower() if type(s) == str else s)

In [473]:
table6.geography.unique()

array(['world', 'nam', 'us', 'csam', 'brazil', 'eur', 'eu', 'africa',
       'safr', 'me', 'eurasia', 'rus', 'asiapac', 'india', 'jpn', 'asean',
       'oecd', 'nonoecd', 'developingeco', 'advancedeco'], dtype=object)

In [474]:
table6.geography.replace({'global': 'world', 'nam':'north america', 'us': 'united states', 'csam': 'central south america', 'nonoecd': 'non-oecd', 'eu':'european union', 'eur':'europe',  'safr':'south africa', 'me':'middle east', 'rus':'russia', 'jpn':'japan', 'oecd':'oecd2019', 'asiapac':'asia pacific'}, inplace = True)

In [475]:
table6.tech_type.unique()

array(['total generation', 'coal', 'oil', 'gas', 'nuclear', 'renewables',
       'hydro', 'bioenergy', 'wind', 'geothermal', 'solar pv', 'csp',
       'marine'], dtype=object)

## IEA_ETP 2017

In [280]:
table7 = pd.read_csv("interim/iea_etp_2017.csv", index_col = None)

In [281]:
table7 = table7[table7.sector == 'scenario']
table7 = table7[table7.group == 'electricity_generation']

In [282]:
table7["sector"] = 'electricity generation'

table7.drop(columns= ['group'], inplace = True)

In [283]:
#convert pj to twh
cf_pj_to_twh = 0.2778
table7['value'] = cf_pj_to_twh * table7['value']

In [483]:
x = table7[table7['geography'] == "non-oecd"]
x = x[x.tech_type == "coal"]
x = x[x.scenario == "b2ds"]

In [285]:
table7["source"] = "IEA ETP 2017"
table7["category"] = "electricity"
table7["unit"] = 'twh'
table7["impact"] = None

table7.impact[table7["scenario"] == "rts"] = "low"
table7.impact[table7["scenario"] == "2ds"] = "medium"
table7.impact[table7["scenario"] == "b2ds"] = "high"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [286]:
table7 = table7.applymap(lambda s:s.lower() if type(s) == str else s)

In [287]:
table7.geography.unique()

array(['world', 'oecd', 'nonoecd', 'asean', 'brazil', 'china',
       'european union', 'india', 'mexico', 'russia', 'south africa',
       'united states'], dtype=object)

In [288]:
table7.geography.replace({'nonoecd': 'non-oecd', 'oecd':'oecd2017'}, inplace = True)

## IEA_ETP 2016

In [289]:
table8 = pd.read_csv("interim/iea_etp_2016.csv", index_col = None)

In [290]:
table8 = table8[table8.sector == 'scenario']
table8 = table8[table8.group == 'electricity_generation']

In [291]:
table8["sector"] = 'electricity generation'
table8.drop(columns= ['group'], inplace = True)

In [292]:
table8

Unnamed: 0,tech_type,measurement_year,value,added_datetime,updated_datetime,geography,scenario,sector,year_generated
57648,Oil,2013,1039.008000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016
57649,Coal,2013,9621.549000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016
57650,Coal with CCS,2013,0.000000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016
57651,Natural gas,2013,5066.215000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016
57652,Natural gas with CCS,2013,0.000000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016
57653,Nuclear,2013,2478.216000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016
57654,Biomass and waste,2013,461.777000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016
57655,Biomass with CCS,2013,0.000000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016
57656,Hydro (excl. pumped storage),2013,3790.281000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016
57657,Geothermal,2013,71.615000,2019-04-02 18:11:36.724793,2019-04-02 18:11:36.725565,WORLD,6ds,electricity generation,2016


In [293]:
#convert pj to twh
cf_pj_to_twh = 0.2778
table8['value'] = cf_pj_to_twh * table8['value']

In [294]:
table8["source"] = "IEA ETP 2016"
table8["category"] = "electricity"
table8["unit"] = 'twh'
table8["impact"] = None

table8.impact[table8["scenario"] == "6ds"] = "low"
table8.impact[table8["scenario"] == "4ds"] = "medium"
table8.impact[table8["scenario"] == "2ds"] = "high"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [295]:
table8 = table8.applymap(lambda s:s.lower() if type(s) == str else s)

In [296]:
table8.geography.unique()

array(['world', 'world urban', 'world non-urban', 'oecd', 'oecd urban',
       'oecd non-urban', 'nonoecd', 'nonoecd urban', 'nonoecd non-urban',
       'asean', 'brazil', 'china', 'european union', 'india', 'mexico',
       'russia', 'south africa', 'united states'], dtype=object)

In [224]:
table8.sector.unique()

array(['electricity generation'], dtype=object)

In [297]:
table8.geography.replace({'nonoecd': 'non-oecd', 'oecd':'oecd2016'}, inplace = True)

In [226]:
table8

Unnamed: 0,tech_type,measurement_year,value,added_datetime,updated_datetime,geography,scenario,sector,year_generated,source,category,unit,impact
55848,oil,2013,49084.936240,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low
55849,coal,2013,45389.993007,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low
55850,natural gas,2013,33752.235612,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low
55851,nuclear,2013,7519.360297,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low
55852,biomass and waste,2013,16020.873749,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low
55853,hydro,2013,3791.266528,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low
55854,other,2013,1939.049852,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low
55855,total,2013,157497.715284,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low
55856,oil,2020,51859.248701,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low
55857,coal,2020,49073.222807,2019-04-02 18:11:36.469095,2019-04-02 18:11:36.469982,world,6ds,electricity generation,2016,iea etp 2016,electricity,twh,low


## Ampere

In [227]:
table9 = pd.read_csv("interim/ampere_2014.csv", index_col = None)

In [228]:
table9 = table9[(table9.model == 'GEM-E3') | (table9.model == 'MESSAGE') | (table9.model == 'IMAGE')]

In [229]:
table9["geography"] = table9["region"]
table9.drop(columns= ['region'], inplace = True)
table9.geography.unique()

array(['EU', 'ASIA', 'China', 'India', 'LAM', 'MAF', 'OECD90', 'REF',
       'USA', 'World', 'Brazil', 'EU12', 'EU15', 'Japan', 'Russia'],
      dtype=object)

In [230]:
table9["source"] = table9["model"]
table9.drop(columns= ['model'], inplace = True)
table9.source.unique()

array(['GEM-E3', 'IMAGE', 'MESSAGE'], dtype=object)

In [231]:
table9 = table9[table9.variable.str.match('(Secondary Energy\|Electricity)')]

In [232]:
table9

Unnamed: 0,scenario,variable,unit,measurement_year,value,added_datetime,updated_datetime,year_generated,geography,source
39,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity,EJ/yr,2005,12.09191,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3
40,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity|Biomass,EJ/yr,2005,0.31619,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3
41,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity|Biomass|w/o CCS,EJ/yr,2005,0.31619,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3
42,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity|Coal,EJ/yr,2005,3.77758,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3
43,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity|Coal|w/ CCS,EJ/yr,2005,0.00000,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3
44,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity|Coal|w/o CCS,EJ/yr,2005,3.77758,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3
45,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity|Gas,EJ/yr,2005,2.48107,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3
46,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity|Gas|w/ CCS,EJ/yr,2005,0.00000,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3
47,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity|Gas|w/o CCS,EJ/yr,2005,2.48107,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3
48,AMPERE5-Decarb-AllOptions,Secondary Energy|Electricity|Hydro,EJ/yr,2005,1.11338,2019-05-14 18:33:42.122111,2019-05-14 18:33:42.123371,2014,EU,GEM-E3


In [233]:
# remove front words
table9.variable = table9.variable.str.replace('Secondary Energy\|Electricity\|', '')
# replace with total for sector overviews for each scenario
table9.variable = table9.variable.str.replace("Secondary Energy\|Electricity", 'Total')

In [234]:
table9["tech_type"] = table9.variable
table9.drop(columns= ['variable'], inplace = True)
table9.tech_type.unique()

array(['Total', 'Biomass', 'Biomass|w/o CCS', 'Coal', 'Coal|w/ CCS',
       'Coal|w/o CCS', 'Gas', 'Gas|w/ CCS', 'Gas|w/o CCS', 'Hydro',
       'Non-Biomass Renewables', 'Nuclear', 'Oil', 'Oil|w/o CCS', 'Solar',
       'Wind', 'Other', 'Biomass|w/ CCS', 'Geothermal', 'Oil|w/ CCS'],
      dtype=object)

In [235]:
table9 = table9[(table9.scenario == 'AMPERE3-Base') | (table9.scenario == 'AMPERE3-550') | (table9.scenario == 'AMPERE3-450')]

In [236]:
#check = table9[table9.source == 'GEM-E3']
#check.scenario.unique()

In [237]:
table9["sector"] = "Secondary Energy|Electricity"
table9["category"] = "electricity"
table9["impact"] = None


table9.impact[table9["scenario"] == "AMPERE3-Base"] = "low"
table9.impact[table9["scenario"] == "AMPERE3-450"] = "medium"
table9.impact[table9["scenario"] == "AMPERE3-550"] = "high"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.o

In [238]:
cf_ej_to_twh = 277.778
table9.value = cf_ej_to_twh * table9.value

In [239]:
table9.unit = 'twh'

In [240]:
table9 = table9.applymap(lambda s:s.lower() if type(s) == str else s)

'EU', 'ASIA', 'China', 'India', 'LAM', 'MAF', 'OECD90', 'REF',
       'USA', 'World', 'Brazil', 'EU12', 'EU15', 'Japan', 'Russia'

In [241]:
table9.geography.replace({'eu': 'european union', 'lam': 'latin america', 'maf': 'middle east and africa', 'usa':'united states'}, inplace = True)

In [242]:
table9.geography.unique()

array(['asia', 'china', 'european union', 'eu12', 'eu15', 'india',
       'latin america', 'middle east and africa', 'oecd90', 'ref',
       'united states', 'world', 'brazil', 'japan', 'russia'],
      dtype=object)

In [243]:
table9.head(100)

Unnamed: 0,scenario,unit,measurement_year,value,added_datetime,updated_datetime,year_generated,geography,source,tech_type,sector,category,impact
428461,ampere3-450,twh,2005,4482.825808,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,total,secondary energy|electricity,electricity,medium
428462,ampere3-450,twh,2005,4.366670,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,biomass,secondary energy|electricity,electricity,medium
428463,ampere3-450,twh,2005,0.000000,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,biomass|w/ ccs,secondary energy|electricity,electricity,medium
428464,ampere3-450,twh,2005,4.366670,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,biomass|w/o ccs,secondary energy|electricity,electricity,medium
428465,ampere3-450,twh,2005,2866.460627,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,coal,secondary energy|electricity,electricity,medium
428466,ampere3-450,twh,2005,0.000000,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,coal|w/ ccs,secondary energy|electricity,electricity,medium
428467,ampere3-450,twh,2005,2866.460627,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,coal|w/o ccs,secondary energy|electricity,electricity,medium
428468,ampere3-450,twh,2005,489.536503,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,gas,secondary energy|electricity,electricity,medium
428469,ampere3-450,twh,2005,0.000000,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,gas|w/ ccs,secondary energy|electricity,electricity,medium
428470,ampere3-450,twh,2005,489.536503,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,gas|w/o ccs,secondary energy|electricity,electricity,medium


In [244]:
table9

Unnamed: 0,scenario,unit,measurement_year,value,added_datetime,updated_datetime,year_generated,geography,source,tech_type,sector,category,impact
428461,ampere3-450,twh,2005,4482.825808,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,total,secondary energy|electricity,electricity,medium
428462,ampere3-450,twh,2005,4.366670,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,biomass,secondary energy|electricity,electricity,medium
428463,ampere3-450,twh,2005,0.000000,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,biomass|w/ ccs,secondary energy|electricity,electricity,medium
428464,ampere3-450,twh,2005,4.366670,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,biomass|w/o ccs,secondary energy|electricity,electricity,medium
428465,ampere3-450,twh,2005,2866.460627,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,coal,secondary energy|electricity,electricity,medium
428466,ampere3-450,twh,2005,0.000000,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,coal|w/ ccs,secondary energy|electricity,electricity,medium
428467,ampere3-450,twh,2005,2866.460627,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,coal|w/o ccs,secondary energy|electricity,electricity,medium
428468,ampere3-450,twh,2005,489.536503,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,gas,secondary energy|electricity,electricity,medium
428469,ampere3-450,twh,2005,0.000000,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,gas|w/ ccs,secondary energy|electricity,electricity,medium
428470,ampere3-450,twh,2005,489.536503,2019-05-14 18:33:59.455750,2019-05-14 18:33:59.467968,2014,asia,message,gas|w/o ccs,secondary energy|electricity,electricity,medium


## Compile

In [570]:
combined = pd.concat([table1, table2, table3, table4, table5, table6, table7, table8, table9], axis = 0)
combined.drop(columns = ["added_datetime", "updated_datetime"], inplace = True)
combined["value"] = combined["value"].astype(str).str.replace(",","")
combined["value"] = combined["value"].astype(str).str.replace("-","0")
combined["value"] = combined["value"].astype(str).str.replace("n.a.","0")
combined["value"] = combined["value"].astype(float) + .01

combined.to_csv("compiled.csv", index = False)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [477]:
combined.geography.unique()

array(['world', 'oecd north america', 'latin america', 'oecd europe',
       'africa', 'middle east', 'eastern europe/eurasia', 'india',
       'other asia', 'china', 'oecd asia oceania', 'asia', 'japan',
       'korea', 'chinese taipei', 'asean', 'indonesia', 'malaysia',
       'myanmar', 'phillipines', 'thailand', 'vietnam', 'north america',
       'united states', 'non-oecd europe', 'european union', 'oceania',
       'oecd2019', 'non-oecd', 'europe', 'eurasia',
       'developed asia pacific', 'developing asia pacific',
       'international marine bunkers', 'middle east and north africa',
       'sub-saharan africa', 'saarc', 'northeast asia', 'southeast asia',
       'south america', 'central south america', 'brazil', 'south africa',
       'russia', 'asia pacific', 'developingeco', 'advancedeco',
       'oecd2017', 'mexico', 'world urban', 'world non-urban', 'oecd2016',
       'oecd urban', 'oecd non-urban', 'nonoecd urban',
       'nonoecd non-urban', 'eu12', 'eu15', 'middle ea

In [478]:
combined.source.unique()

array(['greenpeace', 'ieej outlook', 'shell', 'ewg-lut', 'equinor',
       'iea weo', 'iea etp 2017', 'iea etp 2016', 'message', 'image',
       'gem-e3'], dtype=object)

In [463]:
combined.columns

Index(['category', 'geography', 'impact', 'measurement_year', 'scenario',
       'sector', 'source', 'tech_type', 'unit', 'value', 'year_generated'],
      dtype='object')

## Interpolation

In [579]:
from data_interpolator import interpolate
combined = pd.read_csv('compiled.csv')

prints source if years are non-unique

In [580]:
years = [2012, 2020, 2025, 2030, 2040, 2050]
values = [22604, 27586, 31297, 36867, 51939, 67535]
test = interpolate(years, values, include_raw=False)
#print(test)

What if we did it based on length?
if unique len = 1, then all the values are the same and we can try to calculate using linear


In [588]:
final = pd.DataFrame()
cols = ['category', 'geography', 'impact', 'scenario', 'sector', 'source', 'tech_type', 'unit', 'year_generated']

for source in combined.source.unique():
    tmp0 = combined[combined.source == source]
    for geography in tmp0.geography.unique():
        tmp1 = tmp0[tmp0.geography == geography]
        for scenario in tmp1.scenario.unique():
            tmp2 = tmp1[tmp1.scenario == scenario]
            for tech in tmp2.tech_type.unique():
                tmp3 = tmp2[tmp2.tech_type == tech] 
                for sector in tmp3.sector.unique():
                    tmp4 = tmp3[tmp3.sector == sector] 
                    for impact in tmp4.impact.unique():
                        tmp5 = tmp4[tmp4.sector == sector] 
                        for unit in tmp5.unit.unique():
                            tmp6 = tmp5[tmp5.unit == unit] 
                            if len(tmp6.value.unique()) == 1:
                                test= interpolate(tmp6.measurement_year, tmp6.value, include_raw = True, method = "linear")
                            else: 
                                test= interpolate(tmp6.measurement_year, tmp6.value, include_raw = True, method = "polynomial 2nd order")
                            test = test.reset_index()
                            for col in cols:
                                test[col]=x[col].iloc[0]
                            final = pd.concat([final,test] , axis = 0)   

  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den
  r = r_num / r_den


In [589]:
final

Unnamed: 0,index,0,category,geography,impact,scenario,sector,source,tech_type,unit,year_generated
0,2005,13472.248797,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017
1,2006,14254.000025,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017
2,2007,15033.067535,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017
3,2008,15809.451327,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017
4,2009,16583.151401,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017
5,2010,17354.167757,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017
6,2011,18122.500395,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017
7,2012,19125.010000,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017
8,2013,19651.114517,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017
9,2014,20411.396001,electricity,non-oecd,high,b2ds,electricity generation,iea etp 2017,coal,twh,2017


In [548]:
final.to_csv("final_compiled.csv")