# Summary table

This notebook tabulates key aggregates for each country and country-sector, namely: 

- `x`: total output
- `zuse`: intermediate use
- `va`: value added
- `zsales`: intermediate sales
- `y`: final sales
- `ez`: exports of intermediates
- `ey`: exports of final goods
- `e`: total exports

It loads the MRIO tables stored in `data/mrio/`. Results are saved as `summary.parquet` in `data/`.

## Set up

In [1]:
import numpy as np
import pandas as pd
import duckdb
from functions import zeroout

### Select MRIO version

In [None]:
input, output = 'adb-mrio.parquet', 'summary.parquet'
# input, output = 'adb-mrio62.parquet', 'summary62.parquet'
# input, output = 'adb-mrio62-const.parquet', 'summary62-const.parquet'

### Parameters

In [None]:
sectors = pd.read_excel('../data/raw/sectors.xlsx').drop_duplicates(subset='ind', ignore_index=True)
years = duckdb.sql(f"SELECT DISTINCT t FROM read_parquet('../data/mrio/{input}') ORDER BY t").df()['t']
rows = duckdb.sql(f"SELECT COUNT(*) FROM read_parquet('../data/mrio/{input}')").df()

N = 35                                              # Number of sectors
G = int((rows.iloc[0, 0] / len(years) - 7) / N)     # Number of countries + 1
f = 5                                               # Number of final demand components

np.seterr(divide='ignore', invalid='ignore')

## Compile table

In [3]:
df = pd.DataFrame()

for year in years:
    
    mrio = duckdb.sql(f"SELECT * EXCLUDE(t, si) FROM read_parquet('../data/mrio/{input}') WHERE t={year}").df()
    mrio = mrio.values

    x = mrio[-1][:(G*N)]
    Z = mrio[:(G*N)][:, :(G*N)]
    zuse, zsales = np.sum(Z, axis=0), np.sum(Z, axis=1)
    va = np.sum(mrio[-7:-1][:, :(G*N)], axis=0)
    Y_big = mrio[:(G*N)][:, (G*N):-1]
    Y = Y_big @ np.kron(np.eye(G), np.ones((f, 1)))
    Zd = zeroout(Z @ np.kron(np.eye(G), np.ones((N, 1))), inverse=True)
    Yd = zeroout(Y, inverse=True)
    y, ez, ey = np.sum(Y, axis=1), np.sum(Zd, axis=1), np.sum(Yd, axis=1)

    df_t = pd.DataFrame({
        't': year,
        's': np.arange(1, G+1).repeat(N),
        'i': np.tile(sectors['ind'], G),
        'i5': np.tile(sectors['ind5'], G),
        'i15': np.tile(sectors['ind15'], G),
        'x': x,
        'zuse': zuse,
        'va': va,
        'zsales': zsales,
        'y': y,
        'e': ez + ey,
        'ez': ez,
        'ey': ey
    })
    df = pd.concat([df, df_t], ignore_index=True)

    print(f'{year} done')

if version is None:
    outputname = f'{output}.parquet'
else:
    outputname = f'{output}_{version}.parquet'

df.to_parquet(f'../data/{outputname}', index=False)

2000 done
2007 done
2008 done
2009 done
2010 done
2011 done
2012 done
2013 done
2014 done
2015 done
2016 done
2017 done
2018 done
2019 done
2020 done
2021 done
2022 done


In [4]:
df

Unnamed: 0,t,s,i,i5,i15,x,zuse,va,zsales,y,e,ez,ey
0,2000,1,1,1,1,3.193114e+04,17552.069348,14379.073025,26201.419947,5729.722426,2.484790e+04,20321.508799,4526.392603
1,2000,1,2,1,2,3.401253e+04,13149.310624,20863.216969,32445.331035,1567.196559,1.437000e+04,13038.688683,1331.306334
2,2000,1,3,2,3,3.420367e+04,24735.352284,9468.317206,17106.492506,17097.176984,2.495872e+04,14063.061725,10895.662622
3,2000,1,4,2,3,3.055290e+03,1888.371583,1166.918433,1891.897568,1163.392448,1.210870e+03,603.869736,607.000059
4,2000,1,5,2,3,6.788278e+02,486.097892,192.729927,500.416684,178.411134,1.837121e+02,104.557562,79.154574
...,...,...,...,...,...,...,...,...,...,...,...,...,...
37480,2022,63,31,5,13,1.095568e+06,509164.565212,586403.798443,125944.719657,969623.643998,1.076339e+06,112288.704333,964050.297289
37481,2022,63,32,5,14,7.409258e+05,334967.915470,405957.907566,62097.664870,678828.158166,7.267739e+05,55269.325496,671504.556710
37482,2022,63,33,5,14,6.945316e+05,407613.305425,286918.279607,108782.670610,585748.914422,6.814517e+05,101076.543901,580375.183511
37483,2022,63,34,5,15,5.290461e+05,315981.680621,213064.458958,249727.360082,279318.779497,5.114178e+05,238297.375401,273120.439283
