## Demonstration for updating GDP and Population dynamically using `gcamwrapper`

### Goal: Update GDP and Population data using alternative data starting in 2020 for each 10-year timestep through 2100

In [1]:
import os

import pandas as pd

import gcamwrapper as gw



### Set the paths to the required GCAM files

In [2]:
# path to the exe directory where gcam-core is installed
core_exe_path = 'gcam-core/exe'

# path to the xml configuration file you want to use
config_xml_file = 'configuration_reduced.xml'


### Instantiate GCAM

In [3]:
%%time

gcam = gw.Gcam(config_xml_file, core_exe_path)


Running GCAM model code base version 5.3 revision gcam-v5.3

Configuration file:  configuration_reduced.xml
Parsing input files...
Parsing ../input/gcamdata/xml/no_climate_model.xml scenario component.
Parsing ../input/gcamdata/xml/socioeconomics_gSSP2.xml scenario component.
Parsing ../input/gcamdata/xml/resources.xml scenario component.
Parsing ../input/gcamdata/xml/en_supply.xml scenario component.
Parsing ../input/gcamdata/xml/en_transformation.xml scenario component.
Parsing ../input/gcamdata/xml/electricity.xml scenario component.
Parsing ../input/gcamdata/xml/heat.xml scenario component.
Parsing ../input/gcamdata/xml/hydrogen.xml scenario component.
Parsing ../input/gcamdata/xml/en_distribution.xml scenario component.
Parsing ../input/gcamdata/xml/industry.xml scenario component.
Parsing ../input/gcamdata/xml/industry_incelas_gssp2.xml scenario component.
Parsing ../input/gcamdata/xml/cement.xml scenario component.
Parsing ../input/gcamdata/xml/cement_incelas_gssp2.xml scenario 

### This generates the GCAM time period integer from a year

In [4]:
final_cal_period = gcam.convert_year_to_period(2020)


### Run GCAM through the desired year, in this case 2020

In [5]:
%%time

gcam.run_to_period(final_cal_period)


Starting a model run. Running period 5
Model run beginning.
Period 0: 1975
Model solved with last period's prices.

Period 1: 1990
Model solved normally. Iterations period 1: 1. Total iterations: 2

Period 2: 2005
Model solved normally. Iterations period 2: 1. Total iterations: 3

Period 3: 2010
Model solved normally. Iterations period 3: 1. Total iterations: 4

Period 4: 2015
Model solved normally. Iterations period 4: 1. Total iterations: 5

Period 5: 2020
Model solved normally. Iterations period 5: 938. Total iterations: 943

All model periods solved correctly.
Model run completed.
CPU times: user 3min 39s, sys: 386 ms, total: 3min 39s
Wall time: 3min 38s


### Generate the query string for GDP

In [14]:
gdp_query = gw.get_query('socioeconomic', 'labor_productivity')

gdp_query


'world/region{region@name}/GDP/laborproductivity{year@year}'

### Generate the query string for population

In [11]:
pop_query = gw.get_query('socioeconomic', 'population')

pop_query


'world/region{region@name}/demographic/population{year@year}/totalPop'

### Prepare the query parameters getting the current GCAM year being processed and up

All regions ('*') for years less than or equal to year 2020

In [12]:
gdp_get_query_params = {'region': ['*'], 'year': ['<=', gcam.get_current_year()]}

pop_get_query_params = {'region': ['*'], 'year': ['<=', gcam.get_current_year()]}


### Get the data using the queries in the form of a Pandas DataFrame

In [15]:
# create an output data frame containing land data
gdp_df = gcam.get_data(gdp_query, gdp_get_query_params)

gdp_df.head(7)


Unnamed: 0,region,year,laborproductivity
0,Africa_Eastern,1975,0.00154
1,Africa_Eastern,1990,0.00154
2,Africa_Eastern,2005,0.01216
3,Africa_Eastern,2010,0.03926
4,Africa_Eastern,2015,0.02085
5,Africa_Eastern,2020,0.01952
6,Africa_Northern,1975,0.0285


In [16]:
# create an output data frame containing land data
pop_df = gcam.get_data(pop_query, pop_get_query_params)

pop_df.head(7)


Unnamed: 0,region,year,totalPop
0,Africa_Eastern,1975,91890.0
1,Africa_Eastern,1990,145593.0
2,Africa_Eastern,2005,222829.0
3,Africa_Eastern,2010,255333.0
4,Africa_Eastern,2015,292560.0
5,Africa_Eastern,2020,327652.0
6,Africa_Northern,1975,81330.0


### Let's use Ag prices as an indicator of change and check out year 2020 without updates first for the US

In [17]:
# get Corn producer prices
ag_get_query_string = gw.get_query('ag', 'prices')

ag_get_query_params = {'region': ['*'], 
                       'sector': ['=', 'Corn'], 
                       'year': ['=', gcam.get_current_year()]}

prod_prices_old = gcam.get_data(ag_get_query_string, ag_get_query_params)

prod_prices_old.loc[prod_prices_old['region'] == 'USA']


Unnamed: 0,region,sector,year,price
31,USA,Corn,2020,0.061234


### Modify our population and GDP data for testing

In [18]:
# make a copy of the population data
x_pop_df = pop_df.copy()

# grow population globally by 15% to test
x_pop_df['totalPop'] = round((x_pop_df['totalPop'] * 0.15) + x_pop_df['totalPop'], 0)

x_pop_df.head(7)


Unnamed: 0,region,year,totalPop
0,Africa_Eastern,1975,105674.0
1,Africa_Eastern,1990,167432.0
2,Africa_Eastern,2005,256253.0
3,Africa_Eastern,2010,293633.0
4,Africa_Eastern,2015,336444.0
5,Africa_Eastern,2020,376800.0
6,Africa_Northern,1975,93530.0


In [19]:
# make a copy of the GDP data
x_gdp_df = gdp_df.copy()

# shrink GDP globally by 5% to test
x_gdp_df['laborproductivity'] = x_gdp_df['laborproductivity'] - (x_gdp_df['laborproductivity'] * 0.05)

x_gdp_df.head(7)


Unnamed: 0,region,year,laborproductivity
0,Africa_Eastern,1975,0.001463
1,Africa_Eastern,1990,0.001463
2,Africa_Eastern,2005,0.011552
3,Africa_Eastern,2010,0.037297
4,Africa_Eastern,2015,0.019807
5,Africa_Eastern,2020,0.018544
6,Africa_Northern,1975,0.027075


### Set new values using our modified data

In [21]:
# The syntax for the query params are slightly different for set data as we
#   need to explicitly tell it which match with the '+' argument and of course we do
#   not give the values to compare against as those are coming from the DataFrame
gdp_set_query_params = {'region': ['+', '='], 
                        'year': ['+', '=']}

gcam.set_data(x_gdp_df, gdp_query, gdp_set_query_params)


In [22]:
pop_set_query_params = {'region': ['+', '='], 
                    'year': ['+', '=']}

gcam.set_data(x_pop_df, pop_query, pop_set_query_params)


### Re-run current year (2020) with the updated data

In [23]:
gcam.run_to_period(gcam.get_current_period())


Starting a model run. Running period 5
Model run beginning.
Period 5: 2020
Model solved normally. Iterations period 5: 1372. Total iterations: 2314

All model periods solved correctly.
Model run completed.


### Investigate updates

In [24]:
updated_pop_df = gcam.get_data(pop_query, pop_get_query_params)

updated_pop_df.head(7)


Unnamed: 0,region,year,totalPop
0,Africa_Eastern,1975,105674.0
1,Africa_Eastern,1990,167432.0
2,Africa_Eastern,2005,256253.0
3,Africa_Eastern,2010,293633.0
4,Africa_Eastern,2015,336444.0
5,Africa_Eastern,2020,376800.0
6,Africa_Northern,1975,93530.0


In [25]:
updated_gdp_df = gcam.get_data(gdp_query, gdp_get_query_params)

updated_gdp_df.head(7)

Unnamed: 0,region,year,laborproductivity
0,Africa_Eastern,1975,0.001463
1,Africa_Eastern,1990,0.001463
2,Africa_Eastern,2005,0.011552
3,Africa_Eastern,2010,0.037297
4,Africa_Eastern,2015,0.019807
5,Africa_Eastern,2020,0.018544
6,Africa_Northern,1975,0.027075


### See if Ag prices changed

In [36]:
# get updated corn producer prices
prod_prices_new = gcam.get_data(ag_get_query_string, ag_get_query_params)

old_price = prod_prices_old.loc[prod_prices_old['region'] == 'USA']['price'].values[0].round(4)
new_price = prod_prices_new.loc[prod_prices_new['region'] == 'USA']['price'].values[0].round(4)

print(f"Corn price in the USA changed from {old_price} to {new_price} after the updates to population and GDP were applied.")


Corn price in the USA changed from 0.0612 to 0.0607 after the updates to population and GDP were applied.


# !REVISIT ONCE NEW DATA HAS BEEN PROVIDED!
# Let's try with the sample data you provided

### Loading the sample data you provided

In [41]:
new_gdp_file = '/home/msdadmin/repos/gcam_demeter_clinic/gcam_demeter_clinic/data/test_gdp_1.csv'
new_pop_file = '/home/msdadmin/repos/gcam_demeter_clinic/gcam_demeter_clinic/data/test_population_1.csv'

new_gdp_df = pd.read_csv(new_gdp_file)
new_pop_df = pd.read_csv(new_pop_file)


#### Population

In [42]:
new_pop_df = new_pop_df[['gcam_region', f'{gcam.get_current_year()}']].copy()

new_pop_df.head()


Unnamed: 0,gcam_region,2020
0,1,11.146
1,2,243.8
2,3,49.692
3,4,303.28
4,5,572.65


#### GDP

In [40]:
new_gdp_df = new_gdp_df[['gcam_region', f'{gcam.get_current_year()}']].copy()

new_gdp_df.head()


Unnamed: 0,gcam_region,2020
0,1,12301700
1,2,187115
2,3,515864
3,4,172719
4,5,565196
