# Summary table

This notebook tabulates key aggregates for each country and country-sector, namely: 

- `x`: total output
- `zuse`: intermediate use
- `va`: value added
- `zsales`: intermediate sales
- `y`: final sales
- `ez`: exports of intermediates
- `ey`: exports of final goods
- `e`: total exports

It loads the MRIO tables stored in `data/mrio/`. Results are saved as `summary.parquet` in `data/`.

In [1]:
import numpy as np
import pandas as pd
import duckdb
from functions import zeroout

## Set up

In [2]:
# input = 'adb-mrio.parquet'
# years = np.arange(2017, 2022+1)
# output = 'summary'
# version = None

# input = 'adb-mrio62.parquet'
# years = np.append(2000, np.arange(2007, 2021+1))
# output = 'summary62'
# version = None

input = 'adb-mrio62-const.parquet'
years = np.arange(2007, 2022+1)
output = 'summary62-const'
version = None

# input = 'adb-mrio_jun2023.parquet'
# years = np.arange(2020, 2022+1)
# output = 'summary'
# version = 'jun2023'

sectors = pd.read_excel('../data/raw/sectors.xlsx')
sectors = sectors.drop_duplicates(subset='ind', ignore_index=True)

# G = 73      # Number of countries + ROW
G = 63
N = 35      # Number of sectors
f = 5       # Number of final demand components

np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

## Compile table

In [3]:
df = pd.DataFrame()

for year in years:
    
    mrio = duckdb.sql(f"SELECT * EXCLUDE(t, si) FROM read_parquet('../data/mrio/{input}') WHERE t={year}").df()
    mrio = mrio.values

    x = mrio[-1][:(G*N)]
    Z = mrio[:(G*N)][:, :(G*N)]
    zuse = np.sum(Z, axis=0)
    zsales = np.sum(Z, axis=1)
    va = np.sum(mrio[-7:-1][:, :(G*N)], axis=0)
    Y_big = mrio[:(G*N)][:, (G*N):-1]
    Y = Y_big @ np.kron(np.eye(G), np.ones((f, 1)))
    Zd = zeroout(Z @ np.kron(np.eye(G), np.ones((N, 1))), inverse=True)
    Yd = zeroout(Y, inverse=True)
    y = np.sum(Y, axis=1)
    ez = np.sum(Zd, axis=1)
    ey = np.sum(Yd, axis=1)

    df_t = pd.DataFrame({
        't': year,
        's': np.arange(1, G+1).repeat(N),
        'i': np.tile(sectors['ind'], G),
        'i5': np.tile(sectors['ind5'], G),
        'i15': np.tile(sectors['ind15'], G),
        'x': x,
        'zuse': zuse,
        'va': va,
        'zsales': zsales,
        'y': y,
        'e': ez + ey,
        'ez': ez,
        'ey': ey
    })
    df = pd.concat([df, df_t], ignore_index=True)

    print(f'{year} done')

if version is None:
    outputname = f'{output}.parquet'
else:
    outputname = f'{output}_{version}.parquet'

df.to_parquet(f'../data/{outputname}', index=False)

2007 done
2008 done
2009 done
2010 done
2011 done
2012 done
2013 done
2014 done
2015 done
2016 done
2017 done
2018 done
2019 done
2020 done
2021 done
2022 done


In [4]:
df

Unnamed: 0,t,s,i,i5,i15,x,zuse,va,zsales,y,e,ez,ey
0,2007,1,1,1,1,54648.110587,32242.086925,22406.023662,43350.556971,11297.553616,46999.346708,37033.418336,9965.928372
1,2007,1,2,1,2,150331.391823,53956.174041,96375.217783,138334.517541,11996.874283,70234.382761,59303.233422,10931.149339
2,2007,1,3,2,3,76420.977590,56118.016682,20302.960908,36993.237903,39427.739687,59339.179828,32572.677063,26766.502765
3,2007,1,4,2,3,6916.714730,4519.841273,2396.873457,5388.573717,1528.141013,5248.697953,4115.359733,1133.338219
4,2007,1,5,2,3,1435.920082,1052.686569,383.233513,1253.637563,182.282519,907.754565,811.907659,95.846906
...,...,...,...,...,...,...,...,...,...,...,...,...,...
35275,2022,63,31,5,13,896419.887183,402503.377734,493916.509448,103023.031609,793396.855573,880580.317397,91849.328909,788730.988488
35276,2022,63,32,5,14,606242.991976,264462.309757,341780.682219,50779.917469,555463.074507,594501.062573,45195.505607,549305.556966
35277,2022,63,33,5,14,568282.131680,321334.621551,246947.510129,88993.175308,479288.956372,557491.012028,82686.335717,474804.676311
35278,2022,63,34,5,15,432878.035263,253143.825317,179734.209947,204032.450814,228845.584449,418356.858477,194683.864522,223672.993956
