In [1]:
import pandas as pd
import pyam
import aneris
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load data

In [2]:
aim_path = '../../../data/SOD/model_results/raw/Consolidated_AIM_GEO7_240621.xlsx'
aim_trade_path = '../../../data/SOD/model_results/raw/240513_GEO7_AIM.xlsx'
image_path = '../../../data/SOD/model_results/raw/Consolidated_IMAGE_GEO7_R3_results_12022025_0956.xlsx'

In [3]:
variables_to_load = [
    'Agricultural Demand',
    'Agricultural Demand|Energy',
    'Agricultural Demand|Energy|Crops',
    'Agricultural Demand|Energy|Residues',
    'Agricultural Demand|Non-Energy|Crops',
    'Agricultural Demand|Non-Energy|Crops|Feed',
    'Agricultural Demand|Non-Energy|Crops|Food',
    'Agricultural Demand|Non-Energy|Crops|Other',
    'Agricultural Demand|Non-Energy|Livestock',
    'Agricultural Production',
    'Agricultural Production|Energy|Crops',
    'Agricultural Production|Non-Energy|Crops', 
    'Agricultural Production|Non-Energy|Livestock',
    'Agricultural Production|Crops|Non-Energy|Maize',
    'Agricultural Production|Crops|Non-Energy|Rice',
    'Agricultural Production|Crops|Non-Energy|Soybeans',
    'Agricultural Production|Crops|Non-Energy|Wheat',
    'Emissions|CH4',
    'Emissions|CH4|AFOLU',
    'Emissions|CH4|Energy',
    'Emissions|CO2',
    'Emissions|CO2|AFOLU',
    'Emissions|CO2|Energy',
    'Emissions|N2O',
    'Emissions|N2O|AFOLU',
    'Emissions|N2O|Energy',
    'Emissions|Kyoto Gases',
    'Emissions|BC', 
    'Emissions|CO', 
    'Emissions|NOx', 
    'Emissions|NH3', 
    'Emissions|OC', 
    'Emissions|Sulfur', 
    'Emissions|VOC',
    'Land Cover|Built-up Area', 
    'Land Cover|Cropland',
    'Land Cover|Forest',
    'Land Cover|Pasture',
    'Land Cover|Other Natural Land',
    'Water Withdrawal',
    'Water Consumption',
    'Water Consumption|Electricity',
    'Water Consumption|Industrial Water',
    'Water Consumption|Irrigation',
    'Water Consumption|Municipal Water',
    'Water Withdrawal|Electricity',
    'Water Withdrawal|Industrial Water',
    'Water Withdrawal|Irrigation',
    'Water Withdrawal|Municipal Water',
    'Primary Energy',
    'Primary Energy|Biomass',
    'Primary Energy|Biomass|w/o CCS',
    'Primary Energy|Coal',
    'Primary Energy|Coal|w/o CCS',
    'Primary Energy|Gas',
    'Primary Energy|Gas|w/o CCS',
    'Primary Energy|Oil',
    'Primary Energy|Oil|w/o CCS',
    'Primary Energy|Hydro',
    'Primary Energy|Non-Biomass Renewables',
    'Primary Energy|Nuclear',
    'Primary Energy|Solar',
    'Primary Energy|Wind',
    'Primary Energy|Biomass|Traditional',
    'Food Demand*',
    'Final Energy',
    'Final Energy|Electricity',
    'Final Energy|Industry',
    'Final Energy|Other Sector',
    'Final Energy|Residential and Commercial',
    'Final Energy|Transportation',
    'Final Energy|Hydrogen',
    'Final Energy|Heat',
    'Final Energy|Gases',
    'Final Energy|Liquids',
    'Final Energy|Solids',
    'Secondary Energy|Electricity',
    'Secondary Energy|Electricity|Biomass',
    'Secondary Energy|Electricity|Coal',
    'Secondary Energy|Electricity|Gas',
    'Secondary Energy|Electricity|Non-Biomass Renewables',
    'Secondary Energy|Electricity|Nuclear',
    'Secondary Energy|Electricity|Oil',
    'Secondary Energy|Electricity|Hydro',
    'Secondary Energy|Electricity|Solar',
    'Secondary Energy|Electricity|Wind',
    "Temperature|Global Mean",
    'Yield|*'
]

# Load Data
df_aim = pyam.IamDataFrame(aim_path)
# Add population here as an exception, needed for aggregation, remove later because we only want to share the image population
df_aim = df_aim.filter(
                     variable=variables_to_load + ['Population'],
                     scenario=['REF-v2'],
                    )

df_image_all = pyam.IamDataFrame(image_path)
df_image = df_image_all.filter(
                     variable=variables_to_load,
                     scenario=['TECH-TP-v2', 'LIFE-TP-v2'],
                    )

# IMAGE has uploaded results for 'Agricultural Demand|Non-Energy|Livestock', 
# assumed to be the same as "Agricultural Demand|Non-Energy|Livestock|Food"
df_image.aggregate("Agricultural Demand|Non-Energy|Livestock|Food", 
                       components=['Agricultural Demand|Non-Energy|Livestock'], 
                        append=True)

# Add population here as an exception, needed for aggregation, remove later
df_image_hist = df_image_all.filter(
                     variable=variables_to_load + ['Population'],
                     scenario='REF-v2',
                    )

# On world level water consumption gets reported as 0 by IMAGE, easier to aggregate it here
df_image = df_image.filter(variable='Water Consumption', region='World', keep=False)
df_image.aggregate_region(variable='Water Consumption', append=True)

df_image_hist = df_image_hist.filter(variable='Water Consumption', region='World', keep=False)
df_image_hist.aggregate_region(variable='Water Consumption', append=True)

# On world level water withdrawal misses some sectors, easier to aggregate it here
df_image = df_image.filter(variable='Water Withdrawal', region='World', keep=False)
df_image.aggregate_region(variable='Water Withdrawal', append=True)

df_image_hist = df_image_hist.filter(variable='Water Withdrawal', region='World', keep=False)
df_image_hist.aggregate_region(variable='Water Withdrawal', append=True)

# Read in GDP and Population from IMAGE for REF as well, vassilis mentioned consistency
# Read in w/ CCS primary energy categories as well, not uploaded for REF by AIM
df_gdp = df_image_all.filter(
                     variable=[
                        'GDP|PPP', 
                        'Population', 
                        'GDP|MER',
                        'Primary Energy|Biomass|w/ CCS',
                        'Primary Energy|Coal|w/ CCS',
                        'Primary Energy|Gas|w/ CCS',
                        'Primary Energy|Oil|w/ CCS',
                        ],
                     scenario=['REF-v2','TECH-TP-v2', 'LIFE-TP-v2'],
                    )

df_gdp.divide('GDP|PPP', 'Population', 'GDP|PPP|Per Capita', ignore_units='thousand USD_2010/yr', append=True)

[INFO] 10:39:08 - pyam.core: Reading file ..\..\..\data\SOD\model_results\raw\Consolidated_AIM_GEO7_240621.xlsx
[INFO] 10:39:16 - pyam.core: Reading file ..\..\..\data\SOD\model_results\raw\Consolidated_IMAGE_GEO7_R3_results_12022025_0956.xlsx


In [4]:
# Trade variables were not uploaded with the consolidated values for AIM
df_not_on_db_aim = pyam.IamDataFrame(aim_trade_path)
df_not_on_db_aim = df_not_on_db_aim.filter(
    variable=[
    'Trade|Primary Energy|Biomass|Volume',
    'Trade|Primary Energy|Coal|Volume',
    'Trade|Primary Energy|Gas|Volume',
    'Trade|Primary Energy|Oil|Volume',
    'Trade|Secondary Energy|Liquids|Biomass|Volume'
       ],
    scenario=['REF-v2'],)

df_not_on_db_aim.aggregate('Trade|Energy|Biomass & Biofuels',
                           components=['Trade|Primary Energy|Biomass|Volume',
                            'Trade|Secondary Energy|Liquids|Biomass|Volume'
                            ],
                            append=True
                           )

df_not_on_db_aim.rename(variable={
    'Trade|Primary Energy|Coal|Volume':'Trade|Energy|Coal',
    'Trade|Primary Energy|Gas|Volume':'Trade|Energy|Gas',
    'Trade|Primary Energy|Oil|Volume':'Trade|Energy|Oil',
   },
   inplace=True
)

df_not_on_db_aim = df_not_on_db_aim.filter(variable=['Trade|Energy|Biomass & Biofuels',
                                  'Trade|Energy|Coal',
                                  'Trade|Energy|Gas',
                                  'Trade|Energy|Oil',
                                  ])

df_not_on_db_aim.aggregate('Trade|Energy',
                            append=True
                           )

df_aim = pyam.concat([df_aim, df_not_on_db_aim])

[INFO] 10:39:28 - pyam.core: Reading file ..\..\..\data\SOD\model_results\raw\240513_GEO7_AIM.xlsx


In [5]:
# Read in IMAGE data that is not yet configured for the IIASA DB
# Or that IMAGE is used for REF as well
df_not_on_db_image = df_image_all.filter(variable=[
    'Carbon Sequestration|CCS', 
    'Carbon Sequestration|CCS|Biomass', 
    'Carbon Sequestration|CCS|Fossil',
    'Population|Urban',        
    'Carbon stocks',
    'Carbon stocks|Soil',        
    'Carbon stocks|Vegetation',
    'Agricultural Production|Crops|Non-Energy',
    "Population|Clean Cooking Access",
    "Population|Electricity Access",
    "Population|Relying on Solid Fuels",
    'Food Waste',
    'Food Waste|Consumption',
    'Food Waste|Production',   
    'Trade|Biomass & Biofuels',
    'Trade|Coal',
    'Trade|Energy|Volume',
    'Trade|Gas',
    'Trade|Oil',
    'Food Waste|Per capita',
])

# Capatalize and rename variables
df_not_on_db_image.rename(variable={
    'Carbon stocks':'Carbon Stocks',
    'Carbon stocks|Soil':'Carbon Stocks|Soil',        
    'Carbon stocks|Vegetation':'Carbon Stocks|Vegetation'
}, inplace=True)

df_not_on_db_image.rename(variable={'Agricultural Production|Crops|Non-Energy':'Agricultural Production|Non-Energy|Crops',
                                        'Trade|Biomass & Biofuels':'Trade|Energy|Biomass & Biofuels',
                                        'Trade|Coal':'Trade|Energy|Coal',
                                        'Trade|Energy|Volume':'Trade|Energy',
                                        'Trade|Gas':'Trade|Energy|Gas',
                                        'Trade|Oil':'Trade|Energy|Oil'}, inplace=True)

# Read in trade values for IMAGE hist and rename the variables
df_not_on_db_image_hist = df_image_all.filter(variable=['Agricultural Production|Crops|Non-Energy',
                                                  'Trade|Biomass & Biofuels',
                                                  'Trade|Coal',
                                                  'Trade|Energy|Volume',
                                                  'Trade|Gas',
                                                  'Trade|Oil'], scenario='REF-v2')

df_not_on_db_image_hist.rename(variable={'Agricultural Production|Crops|Non-Energy':'Agricultural Production|Non-Energy|Crops',
                                    'Trade|Biomass & Biofuels':'Trade|Energy|Biomass & Biofuels',
                                    'Trade|Coal':'Trade|Energy|Coal',
                                    'Trade|Energy|Volume':'Trade|Energy',
                                    'Trade|Gas':'Trade|Energy|Gas',
                                    'Trade|Oil':'Trade|Energy|Oil'}, inplace=True)


df_image = pyam.concat([df_image, df_not_on_db_image, df_gdp])
df_image_hist = pyam.concat([df_image_hist, df_not_on_db_image_hist])

In [6]:
weighted_variables = [
    'Food Demand',
    'Food Demand|Crops',
    'Food Demand|Livestock',
    'Yield|Cereal',
    'Yield|Oilcrops',
    'Yield|Sugarcrops',
    'Food Waste|Per Capita'
]
abs_variables = [value for value in df_image.variable if value not in weighted_variables]

In [7]:
# Some of the variables are not configured on the iiasa database yet, aggregate here manually (Only a summation)
def compute_UN_groups(df, variables, weight=None):
    df_africa = df.aggregate_region(variables,
                              region='Africa (UN-R5)',
                              subregions=[
                                    'NAF',
                                    'WAF',
                                    'EAF',
                                    'SAF',
                                    'RSAF',
                              ],
                              weight=weight
                              )

    df_asia = df.aggregate_region(variables,
                              region='Asia and the Pacific (UN-R5)',
                              subregions=[
                                'CHN',
                                'INDIA',
                                'INDO',
                                'JAP',
                                'KOR',
                                'ME',
                                'RSAS',
                                'SEAS',
                                'STAN',
                              ],
                              weight=weight
                              )

    df_ee = df.aggregate_region(variables,
                              region='Eastern Europe (UN-R5)',
                              subregions=[
                                    'CEU',
                                    'RUS',
                                    'UKR',
                              ],
                              weight=weight
                              )

    df_latin = df.aggregate_region(variables,
                              region='Latin America and Caribbean (UN-R5)',
                              subregions=[
                                    'BRA',
                                    'MEX',
                                    'RCAM',
                                    'RSAM',
                              ],
                              weight=weight
                              )

    df_we = df.aggregate_region(variables,
                              region='Western Europe and Other States (UN-R5)',
                              subregions=[
                                        'CAN',
                                        'OCE',
                                        'TUR',
                                        'USA',
                                        'WEU',
                              ],
                              weight=weight
                              )
    
    df_world = df.filter(region='World', variable=variables)

    df_un_groups = pyam.concat([df_africa, df_asia, df_ee, df_latin, df_we, df_world])
    return df_un_groups

# Some of the variables are not configured on the iiasa database yet, aggregate here manually (Only a summation)
def compute_UN_groups_aim(df, variables, weight=None):
    
    df_africa = df.aggregate_region(variables,
                              region='Africa (UN-R5)',
                              subregions=[
                                    'XAF',
                                    'XNF',
                              ],
                              weight=weight
                              )

    df_asia = df.aggregate_region(variables,
                              region='Asia and the Pacific (UN-R5)',
                              subregions=[
                                    'CHN',
                                    'IND',
                                    'JPN',
                                    'XSE',
                                    'XSA',
                                    'XME',
                              ],
                              weight=weight
                              )

    df_ee = df.aggregate_region(variables,
                              region='Eastern Europe (UN-R5)',
                              subregions=[
                                    'CIS',
                                    'XER',
                              ],
                              weight=weight
                              )

    df_latin = df.aggregate_region(variables,
                              region='Latin America and Caribbean (UN-R5)',
                              subregions=[
                                    'BRA',
                                    'XLM',
                              ],
                              weight=weight
                              )

    df_we = df.aggregate_region(variables,
                              region='Western Europe and Other States (UN-R5)',
                              subregions=[
                                    'CAN',
                                    'XOC',
                                    'TUR',
                                    'USA',
                                    'XE25', 
                              ],
                              weight=weight
                              )
    
    df_world = df.filter(region='World', variable=variables)

    df_un_groups = pyam.concat([df_africa, df_asia, df_ee, df_latin, df_we, df_world])
    return df_un_groups

In [8]:
df_image_abs = compute_UN_groups(df_image, abs_variables)
# There are some variables in the IMAGE df that we also need for REF, but not for the variables below, so remove any REF values
df_image_food = compute_UN_groups(df_image.filter(scenario='REF-v2', keep=False), ['Food Demand','Food Demand|Crops','Food Demand|Livestock','Food Waste|Per Capita'], 'Population')
df_image_yield = compute_UN_groups(df_image.filter(scenario='REF-v2', keep=False), ['Yield|Cereal', 'Yield|Oilcrops', 'Yield|Sugarcrops',], 'Agricultural Production|Non-Energy|Crops')
df_image = pyam.concat([df_image_abs, df_image_food, df_image_yield])

df_image_hist_abs = compute_UN_groups(df_image_hist, abs_variables)
df_image_hist_food = compute_UN_groups(df_image_hist, ['Food Demand','Food Demand|Crops','Food Demand|Livestock','Food Waste|Per Capita'], 'Population')
df_image_hist_yield = compute_UN_groups(df_image_hist, ['Yield|Cereal', 'Yield|Oilcrops', 'Yield|Sugarcrops',], 'Agricultural Production|Non-Energy|Crops')
df_image_hist = pyam.concat([df_image_hist_abs, df_image_hist_food, df_image_hist_yield])
df_image_hist = df_image_hist.filter(variable='Population', keep=False)

df_aim_abs = compute_UN_groups_aim(df_aim, abs_variables)
df_aim_food = compute_UN_groups_aim(df_aim, ['Food Demand','Food Demand|Crops','Food Demand|Livestock', 'Food Waste|Per Capita'], 'Population')
df_aim_yield = compute_UN_groups_aim(df_aim, ['Yield|Cereal', 'Yield|Oilcrops', 'Yield|Sugarcrops',], 'Agricultural Production|Non-Energy|Crops')
df_aim = pyam.concat([df_aim_abs, df_aim_food, df_aim_yield])
df_aim = df_aim.filter(variable='Population', keep=False)

In [9]:
# Copy Temperature to all regions
for region in [
        'Latin America and Caribbean (UN-R5)',
        'Western Europe and Other States (UN-R5)',
        'Africa (UN-R5)',
        'Asia and the Pacific (UN-R5)',
        'Eastern Europe (UN-R5)',
        ]:
        df_region_image = df_image.filter(variable="Temperature|Global Mean", region='World').rename(region={'World':region})
        df_image = pyam.concat([df_image, df_region_image])
        df_region_image_hist = df_image_hist.filter(variable="Temperature|Global Mean", region='World').rename(region={'World':region})
        df_image_hist = pyam.concat([df_image_hist, df_region_image_hist])
        df_region_aim = df_aim.filter(variable="Temperature|Global Mean", region='World').rename(region={'World':region})
        df_aim = pyam.concat([df_aim, df_region_aim])

# Aggregate variables

In [10]:
def aggregate_variables(pyam_df):
    # Create new variables
    pyam_df = pyam_df.rename(mapping={'variable': {'Land Cover|Built-up Area': 'Land Cover|Other',
                                                   }},
                                                   )
    
    pyam_df.multiply('Primary Energy|Nuclear', 1, 'Primary Energy|Other', ignore_units='EJ/yr', append=True)

    # Land Cover|Forest and Other Natural Land
    pyam_df.aggregate("Land Cover|Forest and Other Natural Land", 
                        components=['Land Cover|Forest',
                                    'Land Cover|Other Natural Land'], 
                            append=True)

    # Priamry Energy|Fossil
    pyam_df.aggregate("Primary Energy|Fossil", 
                        components=['Primary Energy|Coal', 
                                'Primary Energy|Oil', 
                                'Primary Energy|Gas',], 
                            append=True)
    
    # Priamry Energy|Fossil|w/ CCS"
    pyam_df.aggregate("Primary Energy|Fossil|w/ CCS", 
                        components=['Primary Energy|Coal|w/ CCS', 
                                'Primary Energy|Oil|w/ CCS', 
                                'Primary Energy|Gas|w/ CCS',], 
                            append=True)
    
    # Priamry Energy|Fossil|w/ CCS"
    pyam_df.aggregate("Primary Energy|Fossil|w/o CCS", 
                        components=['Primary Energy|Coal|w/o CCS', 
                                'Primary Energy|Oil|w/o CCS', 
                                'Primary Energy|Gas|w/o CCS',], 
                            append=True)

    # Share of Non-Biomass Renewables
    pyam_df.divide("Primary Energy|Non-Biomass Renewables", 
                "Primary Energy", 
                "Primary Energy|Non-Biomass Renewables (Fraction)", 
                ignore_units=True, 
                append=True)
    pyam_df.multiply("Primary Energy|Non-Biomass Renewables (Fraction)", 
                100, 
                "Primary Energy|Non-Biomass Renewables (Share)", 
                ignore_units='%', 
                append=True)

    # Agricultural Demand|Crops
    pyam_df.aggregate("Agricultural Demand|Crops", 
                        components=['Agricultural Demand|Energy|Crops', 
                                    'Agricultural Demand|Non-Energy|Crops',
                                    ], 
                            append=True)

    pyam_df.divide("Food Demand|Crops",
                "Food Demand", 
                "Food Demand|Crops (Fraction)", 
                ignore_units=True, 
                append=True)
    pyam_df.multiply("Food Demand|Crops (Fraction)", 
                100, 
                "Food Demand|Crops (Share)", 
                ignore_units='%', 
                append=True)
    
    # Agricultural Production|Crops
    pyam_df.aggregate("Agricultural Production|Crops",
                    components=['Agricultural Production|Non-Energy|Crops',
                                'Agricultural Production|Energy|Crops',],
                    append=True)

    pyam_df.subtract(
        f'Agricultural Production|Crops',
        f'Agricultural Demand|Crops',
        f'Trade|Agriculture|Crops',
        ignore_units='million t DM/yr',
        append=True
    )

    # Remove the regional emissions precursors 
    pyam_df = pyam_df.filter(
        variable=[
        'Emissions|BC', 
        'Emissions|CO', 
        'Emissions|NOx', 
        'Emissions|NH3', 
        'Emissions|OC', 
        'Emissions|Sulfur', 
        'Emissions|VOC',],
        region=[
        'Latin America and Caribbean (UN-R5)',
        'Western Europe and Other States (UN-R5)',
        'Africa (UN-R5)',
        'Asia and the Pacific (UN-R5)',
        'Eastern Europe (UN-R5)',
        ], keep=False)
    
    return pyam_df

# pyam.utils: Formatted data is empty. is because in AIM REF there is no CCS
df_aim = aggregate_variables(df_aim)
df_image = aggregate_variables(df_image)
df_image_hist = aggregate_variables(df_image_hist)
# df_aim_transpaths = aggregate_variables(df_aim_transpaths)



# Harmonization of AIM to IMAGE 2020

In [11]:
# Find the variables that are uploaded by AIM, these should also be harmonized
# The others can be copy pasted to AIM results
common_variables = [value for value in df_image.filter(scenario='REF-v2').variable if value in df_aim.variable]

# Reset index for a specific level
# Add variables to df_image_hist_extended df and df_aim and remove from df_not_on_db
# Remove any common data from df_image_hist and concat it with REF data from IMAGE that is present in AIM
df_image_hist_extended = pyam.concat([df_image_hist.filter(variable=common_variables, keep=False), 
                                      df_image.filter(variable=common_variables, scenario='REF-v2')], 
                                     ignore_meta_conflict=True)
df_image = df_image.filter(variable=common_variables, scenario='REF-v2', keep=False)

# Create a dataframe that contains the method of harmonization per variable
# Only create methods for variables present in AIM
methods = df_image_hist_extended.filter(variable=df_aim.variable).timeseries().reset_index(level='model', drop=True)
# The offset methods ensure internal regional consistency better than the ratio methods
methods['method'] = 'reduce_offset_2080' # 'reduce_ratio_2080' 'constant_ratio' 'constant_offset' 'reduce_offset_2080'
methods_series = methods['method']

# These variables need a different method, manually checked for best performance
agricultural_vars = ['Agricultural Demand',
                    'Agricultural Demand|Energy|Crops',
                    'Agricultural Demand|Crops',
                    'Agricultural Demand|Non-Energy|Livestock',
                    'Agricultural Demand|Non-Energy|Crops',
                    'Agricultural Demand|Non-Energy|Crops|Feed',
                    'Agricultural Demand|Non-Energy|Crops|Food',
                    'Agricultural Demand|Non-Energy|Crops|Other',
                    'Agricultural Production',
                    'Agricultural Production|Energy|Crops',
                    'Agricultural Production|Crops',
                    'Agricultural Production|Non-Energy|Livestock',
                    'Agricultural Production|Non-Energy|Crops'
                    ]

land_cover_vars = ['Land Cover|Other',
                    'Land Cover|Cropland',
                    'Land Cover|Forest and Other Natural Land',
                    'Land Cover|Pasture',
                    ]

yield_vars = ['Yield|Cereal', 'Yield|Oilcrops', 'Yield|Sugarcrops']

trade_vars = [
    'Trade|Energy',
    'Trade|Energy|Biomass & Biofuels',
    'Trade|Energy|Coal',
    'Trade|Energy|Gas',
    'Trade|Energy|Oil'
]

methods_series.loc[:,:,agricultural_vars + land_cover_vars + yield_vars  + trade_vars,:] = 'constant_offset' # + emission_vars
methods_series

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  methods_series.loc[:,:,agricultural_vars + land_cover_vars + yield_vars  + trade_vars,:] = 'constant_offset' # + emission_vars


scenario  region          variable                              unit           
REF-v2    Africa (UN-R5)  Agricultural Demand                   million t DM/yr       constant_offset
                          Agricultural Demand|Crops             million t DM/yr       constant_offset
                          Agricultural Demand|Energy            million t DM/yr    reduce_offset_2080
                          Agricultural Demand|Energy|Crops      million t DM/yr       constant_offset
                          Agricultural Demand|Non-Energy|Crops  million t DM/yr       constant_offset
                                                                                          ...        
          World           Water Withdrawal|Irrigation           km3/yr             reduce_offset_2080
                          Water Withdrawal|Municipal Water      km3/yr             reduce_offset_2080
                          Yield|Cereal                          t DM/ha/yr            constant_offset
  

In [12]:
# AIM will be harmonized to IMAGE REF historic values
hist_to_harmonize = df_image_hist_extended.filter(variable=df_aim.variable).timeseries() # .filter(variable=variable)
df_aim_to_harmonize = df_aim.timeseries() # .filter(variable=variable)

# Remove the model level from index and column names should be strings
df_aim_to_harmonize = df_aim_to_harmonize.reset_index(level='model', drop=True)
df_aim_to_harmonize.columns = df_aim_to_harmonize.columns.astype(str)
hist_to_harmonize = hist_to_harmonize.reset_index(level='model', drop=True)
hist_to_harmonize.columns = hist_to_harmonize.columns.astype(str)

# Harmonize 
h = aneris.harmonize.Harmonizer(df_aim_to_harmonize, hist_to_harmonize, config={'harmonize_year': '2020'})
harmonized = h.harmonize(methods_series) # methods_copy

# Add all necessary index columns
new_level = 'AIM_Harmonized'
df_index = harmonized.index.to_frame()  # Convert the MultiIndex to a DataFrame
df_index['model'] = new_level  # Add the new level
df_index = df_index[['model', 'scenario', 'region', 'variable', 'unit']]
new_index = pd.MultiIndex.from_frame(df_index)  # Create a new MultiIndex with the added level
harmonized.index = new_index

df_harmonized = pyam.IamDataFrame(harmonized)

# Final Energy|Heat is missing from AIM, but the total on world level is still correct
# Probably no Final Energy|Heat in Africa, so adding 0s is alright
# ALWAYS CHECK IF ADDING NEW VARIABLES!!! 

REF-v2    Africa (UN-R5)  Final Energy|Heat  EJ/yr    (REF-v2, Africa (UN-R5), Final Energy|Heat, EJ...
dtype: object
  return np.abs(np.std(x) / np.mean(x))
INFO:root:Harmonizing with constant_offset
INFO:root:Harmonizing with reduce_offset_2080


# Concat AIM Harmonized REF to IMAGE

In [13]:
df = pyam.concat([df_harmonized, df_image])
df.divide('Population|Urban', ['Population'], 'Population|Urban (Share)', append=True)
df.convert_unit('', '%', inplace=True)
df = df.filter(year=[2000, 2010, 2020, 2030, 2040, 2050])
df = df.rename(scenario={
    'REF-v2':'REF', 
    'TECH-TP-v2':'TECH-TP', 
    'LIFE-TP-v2':'LIFE-TP'})

# To see what variables for the REF scenario are now from IMAGE
df.filter(model='IMAGE 3.3', scenario='REF').variable

['Carbon Sequestration|CCS',
 'Carbon Sequestration|CCS|Biomass',
 'Carbon Sequestration|CCS|Fossil',
 'Carbon Stocks',
 'Carbon Stocks|Soil',
 'Carbon Stocks|Vegetation',
 'Food Waste',
 'Food Waste|Consumption',
 'Food Waste|Per capita',
 'Food Waste|Production',
 'GDP|MER',
 'GDP|PPP',
 'GDP|PPP|Per Capita',
 'Population',
 'Population|Clean Cooking Access',
 'Population|Electricity Access',
 'Population|Relying on Solid Fuels',
 'Population|Urban',
 'Population|Urban (Share)',
 'Primary Energy|Biomass|w/ CCS',
 'Primary Energy|Coal|w/ CCS',
 'Primary Energy|Fossil|w/ CCS',
 'Primary Energy|Gas|w/ CCS',
 'Primary Energy|Oil|w/ CCS']

# Check Regional Consistency

In [14]:
np_isclose_args = {
    "equal_nan": True,
    "rtol": 0.001,
    "atol": 1e-05,
}

harmonized_inconsistency = df.filter(model='AIM_Harmonized').check_internal_consistency(**np_isclose_args)
harmonized_inconsistency = harmonized_inconsistency.rename(columns={'region':'region_total', 'subregions':'subregions_total'})[['region_total', 'subregions_total']]
harmonized_inconsistency = harmonized_inconsistency.dropna()
df_unstacked = harmonized_inconsistency.unstack(level='year')

df_stacked = df_unstacked.stack(level=0)
# Adjust the column names for better readability
df_stacked.columns = [f'{year}' for year in df_stacked.columns]
df_stacked = df_stacked.rename_axis(index={None: 'totals'})
df_stacked = df_stacked.reset_index()
# The aggregated fraction and shares variables will not be aggregates of the subregions
df_stacked = df_stacked[~df_stacked['unit'].isin(['%', 'unknown', 'kcal/cap/day', '°C', 't DM/ha/yr'])]
df_stacked

[INFO] 10:39:41 - pyam.core: `Agricultural Demand` - 24 of 24 rows are not aggregates of components
INFO:pyam.core:`Agricultural Demand` - 24 of 24 rows are not aggregates of components
[INFO] 10:39:41 - pyam.aggregation: Cannot aggregate variable 'Agricultural Demand|Crops' because it has no components.
INFO:pyam.aggregation:Cannot aggregate variable 'Agricultural Demand|Crops' because it has no components.
[INFO] 10:39:41 - pyam.core: `Agricultural Demand|Energy` - 18 of 24 rows are not aggregates of components
INFO:pyam.core:`Agricultural Demand|Energy` - 18 of 24 rows are not aggregates of components
[INFO] 10:39:41 - pyam.aggregation: Cannot aggregate variable 'Agricultural Demand|Energy|Crops' because it has no components.
INFO:pyam.aggregation:Cannot aggregate variable 'Agricultural Demand|Energy|Crops' because it has no components.
[INFO] 10:39:41 - pyam.aggregation: Cannot aggregate variable 'Agricultural Demand|Non-Energy|Crops|Feed' because it has no components.
INFO:pyam.ag

Unnamed: 0,model,scenario,region,variable,unit,totals,2020,2030,2040,2050
14,AIM_Harmonized,REF,World,Secondary Energy|Electricity|Nuclear,EJ/yr,region_total,,7.049134,4.582987,2.40364
15,AIM_Harmonized,REF,World,Secondary Energy|Electricity|Nuclear,EJ/yr,subregions_total,,7.038118,4.574654,2.398691
18,AIM_Harmonized,REF,World,Trade|Agriculture|Crops,million t DM/yr,region_total,0.0,-0.0162,-0.0907,-0.1193
19,AIM_Harmonized,REF,World,Trade|Agriculture|Crops,million t DM/yr,subregions_total,-0.000759,-0.016732,-0.090906,-0.119479
20,AIM_Harmonized,REF,World,Trade|Energy,EJ/yr,region_total,,0.007,0.0092,0.0119
21,AIM_Harmonized,REF,World,Trade|Energy,EJ/yr,subregions_total,,0.007102,0.009602,0.012002
22,AIM_Harmonized,REF,World,Trade|Energy|Biomass & Biofuels,EJ/yr,region_total,,0.0,,0.0
23,AIM_Harmonized,REF,World,Trade|Energy|Biomass & Biofuels,EJ/yr,subregions_total,,0.0002,,0.0001
24,AIM_Harmonized,REF,World,Trade|Energy|Coal,EJ/yr,region_total,,0.007,0.0092,0.0119
25,AIM_Harmonized,REF,World,Trade|Energy|Coal,EJ/yr,subregions_total,,0.006801,0.009501,0.011801


In [15]:
# Variables that will be shared and what topic they belong to
to_share_vars_topics = {
    'Agricultural Demand': 'Land',
    'Agricultural Demand|Crops': 'Land',
    'Agricultural Demand|Energy': 'Land',
    'Agricultural Demand|Energy|Crops': 'Land',
    'Agricultural Demand|Non-Energy|Crops|Feed': 'Land',
    'Agricultural Demand|Non-Energy|Crops|Food': 'Land',
    'Agricultural Demand|Non-Energy|Crops|Other': 'Land',
    'Agricultural Demand|Non-Energy|Livestock': 'Land',
    'Agricultural Production': 'Land',
    'Agricultural Production|Crops': 'Land',
    'Agricultural Production|Energy|Crops': 'Land',
    'Agricultural Production|Non-Energy|Crops': 'Land',
    'Agricultural Production|Non-Energy|Livestock': 'Land',
    'Carbon Sequestration|CCS': 'Emissions',
    'Carbon Sequestration|CCS|Biomass': 'Emissions',
    'Carbon Sequestration|CCS|Fossil': 'Emissions',
    'Carbon Stocks': 'Land',
    'Carbon Stocks|Soil': 'Land',
    'Carbon Stocks|Vegetation': 'Land',
    'Emissions|CH4': 'Emissions',
    'Emissions|CH4|AFOLU': 'Emissions',
    'Emissions|CH4|Energy': 'Emissions',
    'Emissions|CO2': 'Emissions',
    'Emissions|CO2|AFOLU': 'Emissions',
    'Emissions|CO2|Energy': 'Emissions',
    'Emissions|Kyoto Gases': 'Emissions',
    'Emissions|N2O': 'Emissions',
    'Emissions|N2O|AFOLU': 'Emissions',
    'Emissions|N2O|Energy': 'Emissions',
    'Emissions|F-Gases':'Emissions',
    'Emissions|BC':'Emissions', 
    'Emissions|CO':'Emissions', 
    'Emissions|NOx':'Emissions', 
    'Emissions|NH3':'Emissions', 
    'Emissions|OC':'Emissions', 
    'Emissions|Sulfur':'Emissions', 
    'Emissions|VOC':'Emissions',
    'Food Demand|Crops (Share)': 'Food',
    'GDP|PPP': 'Macro_Economy',
    'GDP|PPP|Per Capita': 'Macro_Economy',
    'Land Cover|Cropland': 'Land',
    'Land Cover|Forest and Other Natural Land': 'Land',
    'Land Cover|Other': 'Land',
    'Land Cover|Pasture': 'Land',
    'Population': 'Macro_Economy',
    'Population|Urban (Share)': 'Macro_Economy',
    'Primary Energy': 'Energy',
    'Primary Energy|Biomass': 'Energy',
    'Primary Energy|Biomass|w/ CCS': 'Energy',
    'Primary Energy|Biomass|w/o CCS': 'Energy',
    'Primary Energy|Coal': 'Energy',
    'Primary Energy|Coal|w/ CCS': 'Energy',
    'Primary Energy|Coal|w/o CCS': 'Energy',
    'Primary Energy|Fossil': 'Energy',
    'Primary Energy|Fossil|w/ CCS': 'Energy',
    'Primary Energy|Fossil|w/o CCS': 'Energy',
    'Primary Energy|Gas': 'Energy',
    'Primary Energy|Gas|w/ CCS': 'Energy',
    'Primary Energy|Gas|w/o CCS': 'Energy',
    'Primary Energy|Non-Biomass Renewables': 'Energy',
    'Primary Energy|Non-Biomass Renewables (Share)': 'Energy',
    'Primary Energy|Oil': 'Energy',
    'Primary Energy|Oil|w/ CCS': 'Energy',
    'Primary Energy|Oil|w/o CCS': 'Energy',
    'Primary Energy|Other': 'Energy',
    'Water Withdrawal': 'Water',
    'Agricultural Demand|Non-Energy|Crops': 'Land',
    'Agricultural Production|Non-Energy|Crops': 'Land',
    'Final Energy': 'Energy',
    'Final Energy|Electricity': 'Energy',
    'Final Energy|Industry': 'Energy',
    'Final Energy|Other Sector': 'Energy',
    'Final Energy|Residential and Commercial': 'Energy',
    'Final Energy|Transportation': 'Energy',
    'Primary Energy|Solar': 'Energy',
    'Primary Energy|Wind': 'Energy',
    'Primary Energy|Hydro': 'Energy',
    'Primary Energy|Nuclear': 'Energy',
    'Water Consumption': 'Water',
    'Population|Clean Cooking Access': 'Socio_Economics',
    'Population|Electricity Access': 'Socio_Economics',
    'Population|Relying on Solid Fuels': 'Socio_Economics',
    'Temperature|Global Mean': 'Climate',
    'Primary Energy|Biomass|Traditional': 'Energy',
    'Carbon stocks': 'Land',
    'Carbon stocks|Soil': 'Land',
    'Carbon stocks|Vegetation': 'Land',
    'Food Waste': 'Food',
    'Food Waste|Consumption': 'Food',
    'Food Waste|Per capita': 'Food',
    'Food Waste|Production': 'Food',
    'Food Demand': 'Food',
    'Food Demand|Crops': 'Food',
    'Food Demand|Livestock': 'Food',
    'Yield|Cereal':'Land',
    'Trade|Agriculture|Crops':'Land',
    'Trade|Energy':'Energy',
    'Trade|Energy|Biomass & Biofuels':'Energy',
    'Trade|Energy|Coal':'Energy',
    'Trade|Energy|Gas':'Energy',
    'Trade|Energy|Oil':'Energy',
    'Secondary Energy|Electricity':'Energy',
    'Secondary Energy|Electricity|Biomass':'Energy',
    'Secondary Energy|Electricity|Coal':'Energy',
    'Secondary Energy|Electricity|Gas':'Energy',
    'Secondary Energy|Electricity|Non-Biomass Renewables':'Energy',
    'Secondary Energy|Electricity|Nuclear':'Energy',
    'Secondary Energy|Electricity|Oil':'Energy',
    'Secondary Energy|Electricity|Hydro':'Energy',
    'Secondary Energy|Electricity|Other':'Energy',
    'Secondary Energy|Electricity|Solar':'Energy',
    'Secondary Energy|Electricity|Wind':'Energy',
    'Final Energy|Hydrogen':'Energy',
    'Final Energy|Heat':'Energy',
    'Final Energy|Gases':'Energy',
    'Final Energy|Liquids':'Energy',
    'Final Energy|Solids':'Energy',}


In [16]:
def round_to_0_1_percent(value):
    if value == 0:
        return 0
    # Determine the number of significant digits to round to
    magnitude = np.floor(np.log10(abs(value))) - 2
    rounding_factor = 10 ** magnitude
    return np.round(value / rounding_factor) * rounding_factor

df_to_share = df.filter(variable=to_share_vars_topics.keys()).timeseries().reset_index()
ratio_mask = df_to_share['unit'] == '%'
# Apply rounding to the 'value' column for rows where the 'unit' column is '%'
df_to_share.loc[ratio_mask, df_to_share.columns[5:]] = round(df_to_share.loc[ratio_mask, df_to_share.columns[5:]])
# Apply rounding to 0.1 percent for the other variables
df_to_share.loc[:, df_to_share.columns[5:]] = df_to_share.loc[:, df_to_share.columns[5:]].applymap(round_to_0_1_percent)
df_to_share['model'] = df_to_share['variable'].map(to_share_vars_topics).fillna(df_to_share['model'])
df_to_share

  df_to_share.loc[:, df_to_share.columns[5:]] = df_to_share.loc[:, df_to_share.columns[5:]].applymap(round_to_0_1_percent)


Unnamed: 0,model,scenario,region,variable,unit,2010,2020,2030,2040,2050
0,Land,REF,Africa (UN-R5),Agricultural Demand,million t DM/yr,,561.00,764.000,1020.00,1320.00
1,Land,REF,Africa (UN-R5),Agricultural Demand|Crops,million t DM/yr,,542.00,735.000,977.00,1270.00
2,Land,REF,Africa (UN-R5),Agricultural Demand|Energy,million t DM/yr,,8.75,7.990,7.71,11.00
3,Land,REF,Africa (UN-R5),Agricultural Demand|Energy|Crops,million t DM/yr,,8.75,8.950,9.64,13.90
4,Land,REF,Africa (UN-R5),Agricultural Demand|Non-Energy|Crops,million t DM/yr,,533.00,726.000,968.00,1250.00
...,...,...,...,...,...,...,...,...,...,...
1888,Energy,TECH-TP,World,Primary Energy|Biomass|w/ CCS,EJ/yr,0.0,0.00,13.000,39.00,53.50
1889,Energy,TECH-TP,World,Primary Energy|Coal|w/ CCS,EJ/yr,0.0,0.00,2.350,13.30,17.60
1890,Energy,TECH-TP,World,Primary Energy|Fossil|w/ CCS,EJ/yr,0.0,0.00,9.630,87.20,129.00
1891,Energy,TECH-TP,World,Primary Energy|Gas|w/ CCS,EJ/yr,0.0,0.00,6.430,71.10,108.00


In [17]:
df_to_share.to_excel('../../../data/SOD/model_results/to_share/IMAGE_and_AIM_to_share.xlsx', index=False)