# Investment Opportunities and Alignment

- how investments consistent with the Paris Agreement Long-term target look like
- the investments of today determine to a large extent the emissions of tomorrow

In [84]:
using CSV
using Statistics
using JSON
using DataFrames

## Helper functions

### Average years
This function is used to get the average of a given array of years. We use this later on.

In [85]:
function averageYears(row, years)
    # We loop over the years and get the value for each year from the row
    # Since the column name of the years are in Symbols we need to convert the String-year
    # From this new array we calculate the mean and cound it with 3 digits
    function getYear(year)
        value = row[1, Symbol(year)]
        if (typeof(value) === String) # In some cases we need to convert the data from the csv to a number
           parse(Float64, replace(value, "," => ".")) 
        else
            value
        end
    end
    round(mean(getYear, years), digits=3)
end

averageYears (generic function with 1 method)

## Mappings
Some variables have different names in the historic data set. Also, some regions might have a different name.

In [133]:
HISTORIC_VARIABLE_MAPPING = Dict{String,String}(
    "Energy Efficiency" => "Investment|Energy Efficiency",
    "Transmission and Distribution" => "Investment|Energy Supply|Electricity|Transmission and Distribution",
    "Nuclear" => "Investment|Energy Supply|Electricity|Nuclear",
    "Bioenergy" => "Investment|Energy Supply|Extraction|Biomass",
    "Solar" => "Investment|Energy Supply|Electricity|Solar",
    "Wind" => "Investment|Energy Supply|Electricity|Wind",
    "Fossil Fuels" => "Investment|Energy Supply|Electricity|Fossil",
    "other renewables" => "Investment|Energy Supply|Electricity|Other renewables",
    "Coal" => "Investment|Energy Supply|Extraction|Coal"
)

HISTORIC_REGION_MAPPING = Dict{String,String}(
    "EU" => "EUR"
)

SCENARIO_VARIABLE_MAPPING = Dict{String,String}(
    "Energy Efficiency" => "Energy Efficiency",
    "Transmission and Distribution" => "Electricity - T&D and Storage",
    "Nuclear" => "Extraction and Conversion - Nuclear",
    "Bioenergy" => "Extraction and Conversion - Bioenergy",
    "Solar" => "Energy Supply|Electricity|Solar",
    "Wind" => "Energy Supply|Electricity|Wind",
    "Fossil Fuels" => "Electricity - Fossil Fuels w/o CCS",
    "other renewables" => "Investment|Energy Supply|Electricity|Other renewables",
    "Hydrogen - Fossil" => "Hydrogen - Fossil",
    "Hydrogen - Non-fossil" => "Hydrogen - Non-fossil",
    "CCS" => "CCS"
)


Dict{String,String} with 11 entries:
  "other renewables"              => "Investment|Energy Supply|Electricity|Other renewables"
  "Bioenergy"                     => "Extraction and Conversion - Bioenergy"
  "Solar"                         => "Energy Supply|Electricity|Solar"
  "Fossil Fuels"                  => "Electricity - Fossil Fuels w/o CCS"
  "Transmission and Distribution" => "Electricity - T&D and Storage"
  "Hydrogen - Fossil"             => "Hydrogen - Fossil"
  "Wind"                          => "Energy Supply|Electricity|Wind"
  "Nuclear"                       => "Extraction and Conversion - Nuclear"
  "Hydrogen - Non-fossil"         => "Hydrogen - Non-fossil"
  "CCS"                           => "CCS"
  "Energy Efficiency"             => "Energy Efficiency"

## Getting the historic data

In [87]:
# historicData = CSV.read("source/WEI2020-DataTables_supplytimeseries_BNEF.csv", decimal='.')

In [88]:
historicData = CSV.read("source/report_SENSES.csv"; delim=";")

unique(historicData[!,:Region])

12-element Array{String,1}:
 "R5LAM"      
 "R5ASIA"     
 "R5MAF"      
 "R5OECD90+EU"
 "R5REF"      
 "USA"        
 "CHN"        
 "IND"        
 "RUS"        
 "JPN"        
 "EUR"        
 "World"      

### Relevant historic years
We extract a average of three historic years from the data.

In [89]:
HISTORIC_YEARS = (2017, 2018, 2019)

(2017, 2018, 2019)

### Find historic data
We search for historic data by variable and region.

In [90]:
function findHistoricData(variable, region)
    row = filter(row -> row[:Variable] == variable && row[:Region] == region, historicData)
    if (nrow(row) === 0)
        println("$variable in $region was not found in historic data. Maybe variable name or region is spelled differently in the data set. Will use 0 as value.")
        return 0
    else
        return averageYears(row, HISTORIC_YEARS)
    end    
end

findHistoricData (generic function with 1 method)

In [91]:
# Test function
findHistoricData("Investment|Energy Efficiency", "CHN")

48.822

### Get historic data
We need an additional function to get the correct historic data as some variable need some extra processing.

In [92]:
function getHistoricData(variable, region)
    dict = Dict{String, Float64}()
    dict["Reference"] = 0

    # Use region mapping if region is available for that.
    region = haskey(HISTORIC_REGION_MAPPING, region) ? HISTORIC_REGION_MAPPING[region] : region
    
    if (haskey(HISTORIC_VARIABLE_MAPPING, variable)) # Some variables have different names in the historic data so we map the keys to the corresponding ones.
        dict["Reference"] = findHistoricData(HISTORIC_VARIABLE_MAPPING[variable], region)
    elseif (variable === "Oil and Gas") # Some variables need to be specificly calculated
        oil = findHistoricData("Investment|Energy Supply|Extraction|Oil", region)
        gas = findHistoricData("Investment|Energy Supply|Extraction|Gas", region)
        dict["Reference"] = oil + gas
    else # Some variables are the same as in the historic data set
        dict["Reference"] = findHistoricData(variable, region)
    end
    
    dict["Reference"] = round(dict["Reference"], digits=3)
    dict["average"] = dict["Reference"] # Since we don’t have any models for the historic data, the average is the same
    dict["max"] = dict["Reference"] # The same applies for the maximum value
    return dict
end

getHistoricData (generic function with 1 method)

## Scenario data
We now load the scenario data set.

In [93]:
scenarioRuns = CSV.read("source/41560_2018_179_MOESM2_ESM-1.csv", decimal=',')

Unnamed: 0_level_0,Model,Region,Scenario,Variable
Unnamed: 0_level_1,String,String,String,String
1,AIM/CGE,World,CPol,Total energy investment
2,AIM/CGE,World,CPol,Total energy investment (supply side)
3,AIM/CGE,World,CPol,Low carbon investment
4,AIM/CGE,World,CPol,Low carbon investment (supply side)
5,AIM/CGE,World,CPol,Total_inv/GDP
6,AIM/CGE,World,CPol,LC_inv/GDP
7,AIM/CGE,World,CPol,Extraction and Conversion - Fossil Fuels
8,AIM/CGE,World,CPol,Electricity - Fossil Fuels w/o CCS
9,AIM/CGE,World,CPol,Hydrogen - Fossil
10,AIM/CGE,World,CPol,Electricity - Non-bio Renewables


In [94]:
# unique(scenarioRuns[!,:Variable])
unique(scenarioRuns[!,:Variable])

31-element Array{String,1}:
 "Total energy investment"                                
 "Total energy investment (supply side)"                  
 "Low carbon investment"                                  
 "Low carbon investment (supply side)"                    
 "Total_inv/GDP"                                          
 "LC_inv/GDP"                                             
 "Extraction and Conversion - Fossil Fuels"               
 "Electricity - Fossil Fuels w/o CCS"                     
 "Hydrogen - Fossil"                                      
 "Electricity - Non-bio Renewables"                       
 "Hydrogen - Non-fossil"                                  
 "Extraction and Conversion - Bioenergy"                  
 "Extraction and Conversion - Nuclear"                    
 ⋮                                                        
 "Energy Supply|Electricity|Gas|w/o CCS"                  
 "Energy Supply|Electricity|Oil|w/ CCS"                   
 "Energy Supply|Electricity|

## Defining valid values

In [95]:
VARIABLES = (
  "Oil and Gas",
  "Coal",
  "Fossil Fuels",
  "Hydrogen - Fossil",
  "Nuclear",
  "Bioenergy",
  "Hydrogen - Non-fossil",
  "Solar",
  "Wind",
  "other renewables",
  "Transmission and Distribution",
  "Energy Efficiency",
  "CCS"
)

MODELS = (
  "AIM/CGE",
  "IMAGE",
  "MESSAGEix-GLOBIOM",
  "POLES",
  "REMIND-MAgPIE"
)

REGIONS = (
  "World", # Julia thing: we actually need a comma if we have just one item in the tuple
#  "CHN",
# "IND",
#  "USA",
#  "R5OECD90+EU",
#  "R5REF",
#  "R5ASIA",
#  "R5MAF",
#  "R5LAM",
#  "EU"
)

SCENARIOS = (
    "1.5C",
    "NDC",
    "2C",
    "historic"
  # "CPol"
)

("1.5C", "NDC", "2C", "historic")

## Filtering runs

After defining the valid values, we filter the runs by checking if each attributes is present in the value tuples.

The `in` function checks if the first parameter is in second parameter.

In [96]:
function included(d)
    return in(d[:Model], MODELS) && in(d[:Region], REGIONS) && in(d[:Scenario], SCENARIOS)
end

datum = filter(included, scenarioRuns)

Unnamed: 0_level_0,Model,Region,Scenario,Variable
Unnamed: 0_level_1,String,String,String,String
1,AIM/CGE,World,NDC,Total energy investment
2,AIM/CGE,World,NDC,Total energy investment (supply side)
3,AIM/CGE,World,NDC,Low carbon investment
4,AIM/CGE,World,NDC,Low carbon investment (supply side)
5,AIM/CGE,World,NDC,Total_inv/GDP
6,AIM/CGE,World,NDC,LC_inv/GDP
7,AIM/CGE,World,NDC,Extraction and Conversion - Fossil Fuels
8,AIM/CGE,World,NDC,Electricity - Fossil Fuels w/o CCS
9,AIM/CGE,World,NDC,Hydrogen - Fossil
10,AIM/CGE,World,NDC,Electricity - Non-bio Renewables


In [97]:
unique(datum[!,:Variable])

31-element Array{String,1}:
 "Total energy investment"                                
 "Total energy investment (supply side)"                  
 "Low carbon investment"                                  
 "Low carbon investment (supply side)"                    
 "Total_inv/GDP"                                          
 "LC_inv/GDP"                                             
 "Extraction and Conversion - Fossil Fuels"               
 "Electricity - Fossil Fuels w/o CCS"                     
 "Hydrogen - Fossil"                                      
 "Electricity - Non-bio Renewables"                       
 "Hydrogen - Non-fossil"                                  
 "Extraction and Conversion - Bioenergy"                  
 "Extraction and Conversion - Nuclear"                    
 ⋮                                                        
 "Energy Supply|Electricity|Gas|w/o CCS"                  
 "Energy Supply|Electricity|Oil|w/ CCS"                   
 "Energy Supply|Electricity|

## Prepare functions

### Summing up values

Each year has it own column in the source data and we want to sum it up as one value.
Additionally, we need to parse the value and convert it from German notation to English.

In [98]:
NEAR_TERM_YEARS = (2020, 2025, 2030)
MEDIUM_TERM_YEARS = (2025, 2030, 2035, 2040, 2045, 2050)

(2025, 2030, 2035, 2040, 2045, 2050)

To get a value in a DataFrame we use [x, y], where `x` is the row number and `y` the cell id. This would usually look something like `[1, :Region]`, but in this special case the id of the column is a `Symbol` with the name of the year. So we use `Symbol(year)`.

### GetValues
This function finds the rows in the DataFrame, gets every value and calculates the average.

First, we create an empty Dictionary with `Strings` as keys and `Float64` as numbers.
We loop over the `MODELS` and search for each row with the current model.
If we found one, we summarise the years and add the result to the Dictionary

After the loop, we get the values of the dictionary and calculate the mean value.

In [134]:
function getValues(scenario, variable, region)
    dict = Dict{String, Float64}()
    
    for model in MODELS
        if (haskey(SCENARIO_VARIABLE_MAPPING, variable)) # Some variables have different names in the scenario data so we map the keys to the corresponding ones.
            dict[model] = findScenarioData(scenario, SCENARIO_VARIABLE_MAPPING[variable], region, model)
#        elseif (variable === "Oil and Gas") # Some variables need to be specificly calculated
#            oil = findScenarioData(scenario, "Extraction and Conversion - Fossil Fuels", region, model)
#            gas1 = findScenarioData(scenario, "Energy Supply|Electricity|Gas|w/ CCS", region, model)
#            gas2 = findScenarioData(scenario, "Energy Supply|Electricity|Gas|w/o CCS", region, model)
#            dict[model] = oil + gas1 + gas2
        elseif (variable === "Oil and Gas") # Some variables need to be specificly calculated
            dict[model] = findScenarioData(scenario, "Extraction and Conversion - Fossil Fuels", region, model) * 0.9
        elseif (variable === "Coal") # Some variables need to be specificly calculated
            dict[model] = findScenarioData(scenario, "Extraction and Conversion - Fossil Fuels", region, model) * 0.1
        else # Some variables are the same as in the historic data set
            dict[model] = findScenarioData(scenario, variable, region, model)
        end
    end
    
    dict["average"] = round(mean(values(dict)), digits=3) # We calculate the average by simply getting the mean of all values
    dict["max"] = maximum(values(dict)) # We calculate the maximum value from all values
    return dict
end

getValues (generic function with 1 method)

In [131]:
getValues("NDC", "Energy Supply|Electricity|Oil|w/ CCS", "World")

Dict{String,Float64} with 7 entries:
  "average"           => 0.248
  "POLES"             => 0.0
  "max"               => 1.241
  "REMIND-MAgPIE"     => 0.0
  "AIM/CGE"           => 1.241
  "IMAGE"             => 0.001
  "MESSAGEix-GLOBIOM" => 0.0

In [124]:
ENV["COLUMNS"] = 1000

1000

In [125]:
row = filter(row -> row[:Scenario] == "NDC" && row[:Region] == "World" && row[:Variable] == "Energy Supply|Electricity|Oil|w/ CCS", datum)

Unnamed: 0_level_0,Model,Region,Scenario,Variable,Unit,2015,2020,2025,2030,2035,2040,2045,2050,2055,2060,2065,2070,2075,2080,2085,2090,2095,2100
Unnamed: 0_level_1,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String,String
1,AIM/CGE,World,NDC,Energy Supply|Electricity|Oil|w/ CCS,Billion US$2015/yr,0,0,1557671,13005955.0,33749176.0,13566847,8330768,4249083,3459921,1348967,585893,52176,52176,520673,527195,547848,440235,1157655
2,IMAGE,World,NDC,Energy Supply|Electricity|Oil|w/ CCS,Billion US$2015/yr,2757331,14806188,3778691,0.183294,0.183294,0,0,0,0,0,0,0,0,0,0,0,0,0
3,MESSAGEix-GLOBIOM,World,NDC,Energy Supply|Electricity|Oil|w/ CCS,Billion US$2015/yr,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,POLES,World,NDC,Energy Supply|Electricity|Oil|w/ CCS,Billion US$2015/yr,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,REMIND-MAgPIE,World,NDC,Energy Supply|Electricity|Oil|w/ CCS,Billion US$2015/yr,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Filter function
The filter function takes two parameter: a function returning true or false for each item; and a list of items.

In [100]:
function findScenarioData(scenario, variable, region, model)
    row = filter(row -> row[:Scenario] == scenario && row[:Region] == region && row[:Variable] == variable && row[:Model] == model, datum)
    if (size(row, 1) > 0)
        return averageYears(row, MEDIUM_TERM_YEARS)
    else
        return 0
    end
end

findScenarioData (generic function with 1 method)

In [118]:
begin
    findScenarioData("NDC", "Energy Supply|Electricity|Oil|w/ CCS", "World", "AIM/CGE")
end

1.241

### Calculating the change

Function to calculate the change between two values.
We calculate the absolute change and if its positive or negative. This makes it easier to process later.

In [137]:
function calcChange(reference, value)
    if (reference === value)
       return (0, true) 
    end
    ref = reference === 0.0 ? 1 : reference # In order to prevent the value from being Infinity, we change the value to 1 if it is 0
    change = round(abs(ref - value) / ref, digits = 3)
    # change = round(abs(reference - value) / max(reference, value), digits=3)
    isPositive = ref <= value

    return (min(200, change), isPositive) # We limit the change to 200 percent
end

calcChange (generic function with 1 method)

### Calculating the changes

This function iterates over all values, gets its refeference value and saves the change in the dictionary.

In [103]:
function calcChanges(vals, refs)
    changes = Dict{String, Any}()
    ref = refs["average"]
    for (key, val) in vals
        changes[key] = calcChange(ref, val)
    end
    changes
end

calcChanges (generic function with 1 method)

## Build the final data set

Finally, we loop over all scenarios, variables and regions and calculate everything.

In [138]:
runs = []

for scenario in SCENARIOS
    for variable in VARIABLES
        for region in REGIONS
            if (scenario == "historic")
                vals = getHistoricData(variable, region)
                refs = vals
                changes = calcChanges(vals, refs)
            else
                vals = getValues(scenario, variable, region)
                refs = getHistoricData(variable, region)
                changes = calcChanges(vals, refs)
            end
            push!(runs, (scenario = scenario, variable = variable, region = region, values = vals, reference = refs, changes = changes))
        end
    end
end

Hydrogen - Fossil in World was not found in historic data. Maybe variable name or region is spelled differently in the data set. Will use 0 as value.
Hydrogen - Non-fossil in World was not found in historic data. Maybe variable name or region is spelled differently in the data set. Will use 0 as value.
CCS in World was not found in historic data. Maybe variable name or region is spelled differently in the data set. Will use 0 as value.
Hydrogen - Fossil in World was not found in historic data. Maybe variable name or region is spelled differently in the data set. Will use 0 as value.
Hydrogen - Non-fossil in World was not found in historic data. Maybe variable name or region is spelled differently in the data set. Will use 0 as value.
CCS in World was not found in historic data. Maybe variable name or region is spelled differently in the data set. Will use 0 as value.
Hydrogen - Fossil in World was not found in historic data. Maybe variable name or region is spelled differently in the d

## Export the dataset

`JSON.json` takes two arguments: a Dict and the indent.

In [139]:
open("../static/data/investments.json", "w") do io
   write(io, JSON.json(runs, 2));
end;