In [1]:
import pandas as pd 
import numpy as np
import os
import re
import copy
import time

### Concordance Tables
For more details on product concordance and country concordance, check the notebook on trade data processing

In [2]:
### concordance for country names
baci_country = pd.read_csv("data/BACI_HS92_V202401b/country_codes_V202401b.csv", encoding = "latin-1")
ifs_country = pd.read_excel("concordance/ccode_Comtrade_BACI_WITS_20241113.xlsx")
ifs_country = ifs_country[["BACI","Country"]].dropna()
baci_ifs = dict(zip(ifs_country.BACI, ifs_country.Country))
baci_code_to_name = dict(zip(baci_country.country_code, baci_country.country_name))
### mapping from hs combined to h0 to GTAP to IFs
h0_gtap = pd.read_csv("concordance/hs92gtap.csv", encoding="latin-1")
h0_gtap = h0_gtap[[" Product Code", "GTAP Product Code"]]
h0_gtap.columns = ["H0", "GTAP"]
h0_gtap = dict(zip(h0_gtap.H0, h0_gtap.GTAP))
gtap_ifs = pd.read_excel("concordance/GTAP 9_10_11 to IFs.xlsx")
gtap_ifs = gtap_ifs[["10number", "GTAP10toIFs"]]
gtap_ifs.columns = ["GTAP", "IFs"]
gtap_ifs = dict(zip(gtap_ifs.GTAP, gtap_ifs.IFs))

In [3]:
gdp_curr = pd.read_csv("concordance/WDI GDP currentUSD 20241119.csv", skiprows=3)
gdp_curr_bl = gdp_curr[gdp_curr["Country Name"].isin(["Belgium","Luxembourg"])]\
    [[*["Country Name"], *[str(y) for y in range(1995,2001)]]].copy(True)
gdp_curr_sacu = gdp_curr[gdp_curr["Country Name"].isin(["Botswana","Lesotho","Eswatini","Namibia", "South Africa"])]\
    [[*["Country Name"], *[str(y) for y in range(1995,2001)]]].copy(True)
###
totals = gdp_curr_bl.iloc[:, 1:].sum()
prop_bl = gdp_curr_bl.iloc[:, 1:].div(totals)
prop_bl.insert(0, 'Country Name', gdp_curr_bl['Country Name'])
prop_bl = prop_bl.melt(id_vars="Country Name", var_name="Year", value_name="Ratio")
prop_bl.Year = prop_bl.Year.astype(int)
totals = gdp_curr_sacu.iloc[:, 1:].sum()
prop_sacu = gdp_curr_sacu.iloc[:, 1:].div(totals)
prop_sacu.insert(0, 'Country Name', gdp_curr_sacu['Country Name'])
prop_sacu = prop_sacu.melt(id_vars="Country Name", var_name="Year", value_name="Ratio")
prop_sacu.Year = prop_sacu.Year.astype(int)

### TCI Calculation
- There is no need for dollar conversion when calculating the TCI
- Be aware of the memory use and time spent, some previous records below </br>
    &emsp; Year 2018 done, 89.47776087125142 mins</br>
    &emsp; Year 2019 done, 87.68518654505412 mins</br>
    &emsp; Year 2020 done, 87.02021634976069 mins</br>
    &emsp; Year 2021 done, 87.31895196437836 mins</br>
- For SACU & Bel-Lux prior to 2000, data still need to be disaggregated using GDP
    - https://data.worldbank.org/indicator/NY.GDP.MKTP.CD

In [4]:
def trade_split_gdp(dt_baci, prop, year, actor_column ="Exporter", dict_country={"Southern African Customs Union (...1999)": ["Botswana","Lesotho","Eswatini","Namibia", "South Africa"]}):
    '''Given the formatted BACI data, split trade for certain country groups based on GDP'''
    ### if no country group to be splitted, return the original data
    dt_baci_group = dt_baci[dt_baci[actor_column].isin(dict_country.keys())]
    if dt_baci_group.empty:
        return dt_baci
    ### expand rows in the dataframe, and use the country group dictionary to add those country rows
    dt_baci_group_rest = dt_baci[~dt_baci[actor_column].isin(dict_country.keys())]
    expanded_rows =  dt_baci_group[actor_column].map(dict_country).explode()
    dt_baci_group_df = dt_baci_group.loc[dt_baci_group.index.repeat(expanded_rows.groupby(level=0).size())]
    dt_baci_group_df[actor_column] = expanded_rows.values
    ### use GDP to split trade values
    gdp_curr_group = prop[prop.Year==year].drop(columns=["Year"])
    gdp_curr_group.columns = [actor_column, "Ratio"]
    dt_baci_group_df = pd.merge(left= dt_baci_group_df, right=gdp_curr_group, on=[actor_column], how = "left")
    dt_baci_group_df.Trade = dt_baci_group_df.Trade * dt_baci_group_df.Ratio
    dt_baci_group_df = dt_baci_group_df.drop(columns=["Ratio"])
    return pd.concat([dt_baci_group_rest, dt_baci_group_df])
# ### test example
# trade = trade_split_gdp(dt_baci=trade, prop= prop_sacu, year = 1997, actor_column= "Exporter", 
#                         dict_country={"Southern African Customs Union (...1999)": ["Botswana","Lesotho","Eswatini","Namibia", "South Africa"]})
# trade = trade_split_gdp(dt_baci=trade, prop= prop_sacu, year = 1997, actor_column= "Importer", 
#                         dict_country={"Southern African Customs Union (...1999)": ["Botswana","Lesotho","Eswatini","Namibia", "South Africa"]})
# trade = trade_split_gdp(dt_baci=trade, prop= prop_bl, year = 1997, actor_column= "Exporter", 
#                         dict_country={"Belgium-Luxembourg (...1998)": ["Belgium","Luxembourg"]})
# trade = trade_split_gdp(dt_baci=trade, prop= prop_bl, year = 1997, actor_column= "Importer", 
#                         dict_country={"Belgium-Luxembourg (...1998)": ["Belgium","Luxembourg"]})
# trade

In [5]:
Sectors=["Agri","Manu","Mate","ICT","Ener"]
for year in range(1995,2023):
    # output the calculation time for each year
    start = time.time()
    trade=pd.read_csv(f"data/BACI_HS92_V202401b/BACI_HS92_Y{year}_V202401b.csv", encoding = "latin-1")
    trade.drop(columns=["q"],inplace=True)
    trade.rename(columns={"t":"Year","i":"Exporter","j":"Importer","k":"Commodity","v":"Trade"},inplace=True)
    assert trade.Year[0]==year
    trade.drop(columns=["Year"],inplace=True)
    # to IFs countries
    trade.Exporter = trade.Exporter.map(baci_code_to_name)
    trade.Importer = trade.Importer.map(baci_code_to_name)
    trade.Exporter = trade.Exporter.map(baci_ifs)
    trade.Importer = trade.Importer.map(baci_ifs)
    trade.dropna(subset=["Exporter","Importer"],inplace=True)
    ### split trade for two country groups, when exists
    trade = trade_split_gdp(dt_baci=trade, prop= prop_sacu, year = year, actor_column= "Exporter", 
                            dict_country={"Southern African Customs Union (...1999)": ["Botswana","Lesotho","Eswatini","Namibia", "South Africa"]})
    trade = trade_split_gdp(dt_baci=trade, prop= prop_sacu, year = year, actor_column= "Importer", 
                            dict_country={"Southern African Customs Union (...1999)": ["Botswana","Lesotho","Eswatini","Namibia", "South Africa"]})
    trade = trade_split_gdp(dt_baci=trade, prop= prop_bl, year = year, actor_column= "Exporter", 
                            dict_country={"Belgium-Luxembourg (...1998)": ["Belgium","Luxembourg"]})
    trade = trade_split_gdp(dt_baci=trade, prop= prop_bl, year = year, actor_column= "Importer", 
                            dict_country={"Belgium-Luxembourg (...1998)": ["Belgium","Luxembourg"]})
    ### sort and add IFs sub-sectors, and save concorded data
    trade.sort_values(by=["Exporter","Importer","Commodity"],inplace=True)
    trade.reset_index(drop=True,inplace=True)
    trade["GTAP"] = trade["Commodity"].map(h0_gtap)
    trade["IFsSector"] = trade["GTAP"].map(gtap_ifs)
    assert trade.IFsSector.isna().sum()==0
    trade.to_csv(f"output/trade_ifscountry/{year}.csv",index=False,encoding="latin-1")
    trade.drop(columns=["GTAP"], inplace=True) ## we don't have to drop this if we want to calculate TCI at GTAP sector levels
    ### calculate TCI 
    # trade values at sub levels
    # Commodity level trade
    ex_c=trade.groupby(["Commodity","Exporter"])["Trade"].agg("sum").reset_index()
    ex_c.columns=["Commodity","Exporter","ExportAc"]
    im_c=trade.groupby(["Commodity","Importer"])["Trade"].agg("sum").reset_index()
    im_c.columns=["Commodity","Importer","ImportBc"]
    # Sector level trade
    ex_s=trade.groupby(["IFsSector","Exporter"])["Trade"].agg("sum").reset_index()
    ex_s.columns=["IFsSector","Exporter","ExportAs"]
    im_s=trade.groupby(["IFsSector","Importer"])["Trade"].agg("sum").reset_index()
    im_s.columns=["IFsSector","Importer","ImportBs"]
    # World level trade
    ex_w=trade.groupby(["Exporter"])["Trade"].agg("sum").reset_index()
    ex_w.columns=["Exporter","ExportAw"]
    im_w=trade.groupby(["Importer"])["Trade"].agg("sum").reset_index()
    im_w.columns=["Importer","ImportBw"]
    # TCI calculation by exporter
    c_trade = list(set(trade.Exporter) | set(trade.Importer))
    c_trade.sort()
    h0_trade = trade.Commodity.unique()
    n_h0 = len(h0_trade)
    del trade # memory control
    start = time.time()
    for e in c_trade:
        TCI_list_e=[]
        for i in c_trade:
            if e==i:
                continue
            dt_outer=pd.DataFrame({"Exporter":[e]*n_h0,"Importer":[i]*n_h0})
            dt_outer["Commodity"]=h0_trade
            dt_outer["GTAP"]=dt_outer["Commodity"].map(h0_gtap)
            dt_outer["IFsSector"]=dt_outer["GTAP"].map(gtap_ifs)
            dt_outer.drop(columns=["GTAP"],inplace=True)
            #
            dt_outer=pd.merge(left=dt_outer,right=ex_c,on=["Commodity","Exporter"],how="left")
            dt_outer=pd.merge(left=dt_outer,right=im_c,on=["Commodity","Importer"],how="left")
            dt_outer=pd.merge(left=dt_outer,right=ex_s,on=["IFsSector","Exporter"],how="left")
            dt_outer=pd.merge(left=dt_outer,right=im_s,on=["IFsSector","Importer"],how="left")
            dt_outer=pd.merge(left=dt_outer,right=ex_w,on=["Exporter"],how="left")
            dt_outer=pd.merge(left=dt_outer,right=im_w,on=["Importer"],how="left")
            #
            dt_outer["ExportAPctS"]=dt_outer["ExportAc"]/dt_outer["ExportAs"]
            dt_outer["ExportAPctW"]=dt_outer["ExportAc"]/dt_outer["ExportAw"]
            dt_outer["ImportBPctS"]=dt_outer["ImportBc"]/dt_outer["ImportBs"]
            dt_outer["ImportBPctW"]=dt_outer["ImportBc"]/dt_outer["ImportBw"]
            dt_outer.drop(columns=["ExportAc","ImportBc","ExportAs","ImportBs","ExportAw","ImportBw"],inplace=True)
            #
            if not dt_outer.dropna(subset=["ExportAPctW","ImportBPctW"],how="all").empty:
                dt_outer.fillna(0,inplace=True)
                dt_outer["AbsDifExAImBS"]=abs(dt_outer["ExportAPctS"]-dt_outer["ImportBPctS"])
                dt_outer["AbsDifExAImBW"]=abs(dt_outer["ExportAPctW"]-dt_outer["ImportBPctW"])
                # 
                TCI_sector=dt_outer.groupby(["Exporter","Importer","IFsSector"])["AbsDifExAImBS"].agg("sum").reset_index()
                TCI_sector.columns=["Exporter","Importer","IFsSector","SumAbsDifExAImBS"]
                TCI_sector["TCIExAImBPctS"]=(1-(TCI_sector["SumAbsDifExAImBS"]/2))*100
                # 
                TCI_country=dt_outer.groupby(["Exporter","Importer"])["AbsDifExAImBW"].agg("sum").reset_index()
                TCI_country.columns=["Exporter","Importer","SumAbsDifExAImBW"]
                TCI_country["TCIExAImBPctW"]=(1-(TCI_country["SumAbsDifExAImBW"]/2))*100
                # 
                TCI_country_sector=pd.merge(left=TCI_sector,right=TCI_country,how="left",on=["Exporter","Importer"])
                TCI_country_sector.drop(columns=["SumAbsDifExAImBS","SumAbsDifExAImBW"],inplace=True)
                #   
                for s in Sectors:
                    if dt_outer[dt_outer.IFsSector.isin([s])].ExportAPctS.sum()==dt_outer[dt_outer.IFsSector.isin([s])].ImportBPctS.sum()==0:
                        TCI_country_sector=TCI_country_sector[~TCI_country_sector.IFsSector.isin([s])]
                TCI_list_e.append(TCI_country_sector)
        pd.concat(TCI_list_e).to_csv(f"output/tci_ifscountry/{year}_{e}.csv",index=False,encoding="latin-1")
    end = time.time()
    t = (end - start)/60
    print(f"year {year} done, {t} mins")
del TCI_list_e

year 1995 done, 82.68244405984879 mins
year 1996 done, 86.86610796451569 mins
year 1997 done, 84.17946394681931 mins
year 1998 done, 84.82406907081604 mins
year 1999 done, 83.32073316971461 mins
year 2000 done, 86.47021865447363 mins
year 2001 done, 86.07931794722874 mins
year 2002 done, 86.2250910838445 mins
year 2003 done, 86.83517854213714 mins
year 2004 done, 87.44034706354141 mins
year 2005 done, 88.11348487138748 mins
year 2006 done, 89.78537087043127 mins
year 2007 done, 89.93182307481766 mins
year 2008 done, 89.32038046916325 mins
year 2009 done, 88.819222398599 mins
year 2010 done, 89.02202553749085 mins
year 2011 done, 90.82366143465042 mins
year 2012 done, 97.70642408529918 mins
year 2013 done, 94.07156061728796 mins
year 2014 done, 97.64962896505992 mins
year 2015 done, 95.46058386166891 mins
year 2016 done, 90.77985587914785 mins
year 2017 done, 90.93249181906383 mins
year 2018 done, 91.1276346762975 mins
year 2019 done, 90.66832416852316 mins
year 2020 done, 89.6310946623