In [1]:
import numpy as np
import pandas as pd
import pymrio

In [2]:
io = pymrio.load_test()
io.calc_all()

<pymrio.core.mriosystem.IOSystem at 0x7f78561c98e0>

In [3]:
print(
    "Sectors: {sec},\nRegions: {reg}".format(
        sec=io.get_sectors().tolist(), reg=io.get_regions().tolist()
    )
)

Sectors: ['food', 'mining', 'manufactoring', 'electricity', 'construction', 'trade', 'transport', 'other'],
Regions: ['reg1', 'reg2', 'reg3', 'reg4', 'reg5', 'reg6']


# **Aggregation using a dataframe concordance**

We create a df where old and new sectors and regions appear.
The old sectors : 'food', 'mining', 'manufactoring', 'electricity', 'construction', 'trade', 'transport', 'other' are aggregated in 3 new sectors : sec0, sec1 and sec2.

The old regions : 'reg1', 'reg2', 'reg3', 'reg4', 'reg5', 'reg6' are aggregated in 2 new regions : reg0 and reg1

In [4]:
sectors = {'food': 'sec0', 'mining': 'sec1', 'manufactoring': 'sec1', 'electricity': 'sec1', 'construction': 'sec1', 'trade' : 'sec2', 'transport' :'sec2', 'other' : 'sec2'}
regions = {'reg1' : 'reg0','reg2':'reg0','reg3':'reg0','reg4' : 'reg1','reg5':'reg1','reg6':'reg1'}

d = {'Sector':sectors,'Regions':regions}
df = pd.DataFrame(d)

In [5]:
df

Unnamed: 0,Sector,Regions
food,sec0,
mining,sec1,
manufactoring,sec1,
electricity,sec1,
construction,sec1,
trade,sec2,
transport,sec2,
other,sec2,
reg1,,reg0
reg2,,reg0


We want to, from the df, set up 2 aggregation matrices : sec_agg_matrix and reg_agg_matrix These are numpy arrays we can later use in the procedure **aggregation using a numerical concordance matrix**.

To do so, we import the module **from_df_to_array.py** which contains the function **transformation(df,io)** :

In [8]:
import numpy as np
import re

def transformation(df,io):
    
    '''
    We define two integer 'nbr_matrix_sec' and 'nbr_matrix_reg' which correspond respectively
    to the number of new sectors and new regions.
    '''
    
    nbr_matrix_sec=len(df['Sector'].dropna().unique())
    nbr_matrix_reg=len(df['Regions'].dropna().unique())
    
    
    '''
    We create, for each new sector and region, a 1D matrice of lenth 8 or 6, numbers corresponding
    to respectively the initial number of sectors and regions. All the matrices are filled with value 0.
    '''
    string_mat_sec = "matrix_sec"

    for i in range(0,len(df['Sector'].ffill().unique())):
        locals()["_".join([string_mat_sec,str(i)])] = [0] * len(io.get_index().get_level_values(1).unique())
        
    string_mat_reg = "matrix_reg"
    
    for i in range(0,len(df['Regions'].ffill().unique())):
        locals()["_".join([string_mat_reg,str(i)])] = [0] * len(io.get_index().get_level_values(0).unique())
    '''
    We create 2 arrays 'sec' and 'reg' containing respectively the names of the new sectors and regions.
    '''
    sec = []
    sec = df['Sector'].dropna().unique()
    reg= []
    reg = df['Regions'].dropna().unique()
    
    '''
    The initial 1D matrices contains only 0.
    The initial matrices are of length corresponding to the initial number of sectors or regions.
    The initial sectors are :['food', 'mining', 'manufactoring', 'electricity', 'construction', 'trade',
    transport', 'other'].
    The initial regions are ['reg1', 'reg2', 'reg3', 'reg4', 'reg5', 'reg6'].
    
    The matrices corresponds to the composition of the new regions. We indicate the non presence of an old
    sector (or region) by allocating the value 0 in the matrice and the presence by allocating the value of 1.
    '''
    
    for i in df.index:
        for j in zip(sec,range(0,len(sec))):
            if df.loc[df.index==str(i),'Sector'][0] == str(j[0]):
                result= np.where(io.get_index().get_level_values(1).unique()==str(i))
                index = int(re.findall(r'\[(.*?)\]', str(result))[0])
                locals()["_".join(["matrix_sec",str(j[1])])][index]=1
        
    for i in df.index:
        for j in zip(reg,range(0,len(reg))):
            if df.loc[df.index==str(i),'Regions'][0] == str(j[0]):
                result= np.where(io.get_index().get_level_values(0).unique()==str(i))
                index = int(re.findall(r'\[(.*?)\]', str(result))[0])
                locals()["_".join(["matrix_reg",str(j[1])])][index]=1
    
    '''
    we create the aggregation matrices by 
    1. combining the matrices created and modifided in the previous steps
    2. convert the aggregartion matrices to  numpy arrays
    '''
    
    sec_agg_matrix = []
    for n in range(0,len(sec)):
        print(n)           
        sec_agg_matrix.append(locals()["_".join(["matrix_sec",str(n)])])  
    sec_agg_matrix = np.array(sec_agg_matrix)  
    reg_agg_matrix = []
    for n in range(0,len(reg)):
        print(n)           
        reg_agg_matrix.append(locals()["_".join(["matrix_reg",str(n)])])  
    reg_agg_matrix = np.array(reg_agg_matrix)  
    
    return sec_agg_matrix,reg_agg_matrix

In [9]:
sec_agg_matrix,reg_agg_matrix = transformation(df,io)

0
1
2
0
1


In [10]:
io.aggregate(region_agg=reg_agg_matrix, sector_agg=sec_agg_matrix)

<pymrio.core.mriosystem.IOSystem at 0x7f78561c98e0>

In [11]:
print(
    "Sectors: {sec},\nRegions: {reg}".format(
        sec=io.get_sectors().tolist(), reg=io.get_regions().tolist()
    )
)

Sectors: ['sec0', 'sec1', 'sec2'],
Regions: ['reg0', 'reg1']


In [12]:
io.calc_all()

<pymrio.core.mriosystem.IOSystem at 0x7f78561c98e0>

In [13]:
io.emissions.D_cba

Unnamed: 0_level_0,region,reg0,reg0,reg0,reg1,reg1,reg1
Unnamed: 0_level_1,sector,sec0,sec1,sec2,sec0,sec1,sec2
stressor,compartment,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
emission_type1,air,9041149.0,301879100.0,152323600.0,24694650.0,346874200.0,245411700.0
emission_type2,water,2123543.0,48845090.0,98897570.0,6000239.0,45945300.0,189273100.0
