In [64]:
import pandas as pd
import numpy as np 
import sqlite3
import time
from itertools import combinations
pd.set_option('display.max_columns', None)

### Processing
__Columns needed__ </br>
'refYear', 'flowCode'</br>
'reporterCode', 'reporterDesc', 'partnerCode', 'partnerDesc' </br>
'classificationSearchCode', 'cmdCode', 'cmdDesc', 'aggrLevel', 'isLeaf' </br>
'cifvalue', 'fobvalue', 'primaryValue' </br>

__Notes__ </br>
"CIF value" refers to the "Cost, Insurance, and Freight" price of a good, which includes the cost of the product itself, the cost of shipping it to the destination port, and the insurance cost for the journey, while "primary value" generally refers to the inherent worth of a product based on its intrinsic qualities and intended use, without considering additional costs like shipping or insurance; essentially, the base price of the good itself. It seems in total service trade, cifvalue = primaryValue. </br>
For both goods & service, we use reported exports as the baseline, because that's normally the "FOB" value; Although "CIF" in imports generate tariff revenue</br>
Values are in current USD, need to be constant $2011 or $2017


In [65]:
# read the country concordacne table
c_map = pd.read_excel("concordance/ccode_Comtrade_BACI_WITS_20241113.xlsx")
c_ifs = c_map.Country.unique()[:188]
# following steps only need to be ran to check for changes in country names from the raw data
# c_uncomtrade = list(set(c_reporter.reporterDesc.str.strip()) | set(c_partner.PartnerDesc.str.strip()))
# c_uncomtrade.sort()
# for c in c_uncomtrade:
#     if c not in c_ifs:
#         print(c.__repr__())
# for c in c_ifs:
#     if c not in c_uncomtrade:
#         print(c.__repr__())
# create a dictionary that matches UN Comtrade entity names with IFs entity names
c_comtrade_ifs_serv = c_map[['UNComtrade_Service', 'Country']].dropna()
dict_comtrade_ifs_serv = dict(zip(c_comtrade_ifs_serv.UNComtrade_Service, c_comtrade_ifs_serv.Country))
# total serv trade
df_list = []
for year in range(2000,2024):   
    df = pd.read_csv(f"data/UNComtrade_TotalService_20241112/Service_EBOPS_{year}.csv", low_memory=False, 
                     usecols=['refYear', 'reporterCode', 'reporterDesc', 'partnerCode', 'partnerDesc', 'flowCode', 
                              'classificationSearchCode', 'cmdCode', 'cmdDesc', 'aggrLevel', 'isLeaf',
                              'cifvalue', 'fobvalue', 'primaryValue'])
    df.reporterDesc = df.reporterDesc.str.strip()
    df.partnerDesc = df.partnerDesc.str.strip()
    df = df[(df.reporterDesc.isin(dict_comtrade_ifs_serv)) & (df.partnerDesc.isin(dict_comtrade_ifs_serv))]
    # df[(df.reporterDesc.isin(dict_comtrade_ifs_serv))&(df.partnerDesc.isin(dict_comtrade_ifs_serv))&(df.cmdDesc.str.contains("otal"))]
    df_list.append(df)
df = pd.concat(df_list)
del df_list
# the next line is checking the differences between CIF value and primary value
# (df.cifvalue/df.primaryValue).describe()

In [66]:
# convert to IFs country name
df["reporter"] = df.reporterDesc.replace(dict_comtrade_ifs_serv)
df["partner"] = df.partnerDesc.replace(dict_comtrade_ifs_serv) 
# the purpose of the rest of this section is to fill in trade values that are only reported by one side
df_m = df[df.flowCode=="M"][["refYear", "reporter", "partner", "cmdCode", "primaryValue"]].copy()
df_x = df[df.flowCode=="X"][["refYear", "reporter", "partner", "cmdCode", "primaryValue"]].copy()
# No code duplicates between cmdCodes 200 & S 
# No country duplicates except for 2006 Belarus to Serbia
# df_m[df_m.duplicated(subset=["refYear", "reporter", "partner"])]
# df_x[df_x.duplicated(subset=["refYear", "reporter", "partner"])]
df_m = df_m.groupby(["refYear", "reporter", "partner", "cmdCode"])["primaryValue"].sum(min_count=1).reset_index()
df_x = df_x.groupby(["refYear", "reporter", "partner", "cmdCode"])["primaryValue"].sum(min_count=1).reset_index()
###
df_m = df_m[["refYear", "reporter", "partner", "primaryValue"]]
df_m.columns = ["Year", "Actor", "Partner", "Imports"]
df_x = df_x[["refYear", "reporter", "partner", "primaryValue"]]
df_x.columns = ["Year", "Actor", "Partner", "Exports"]
# Use exports as the baseline
# if A is the reporter in A's exports to B, then it becomes B's imports from A (x_mirror), which overwrites B's imports from A (when B is the reporting importer)
# if A is the reporter in A's imports from B, then it becomes B's exports to A (m_mirror), which can fill in the Nulls in B's exports to A (when B is the reporting exporter)
df_x_mirror = df_x.copy()
df_x_mirror.columns = ["Year", "Partner", "Actor", "Imports_mirror"]
df_m_mirror = df_m.copy()
df_m_mirror.columns = ["Year", "Partner", "Actor", "Exports_mirror"]
df_x = pd.merge(left=df_x, right=df_m_mirror, on= ["Year", "Actor", "Partner"], how = "outer")
df_m = pd.merge(left=df_m, right=df_x_mirror, on= ["Year", "Actor", "Partner"], how = "outer")
df_x.Exports = df_x.Exports.fillna(df_x.Exports_mirror)
df_m.Imports_mirror = df_m.Imports_mirror.fillna(df_m.Imports)
df_mx = pd.merge(left=df_x, right=df_m, on=["Year", "Actor", "Partner" ], how="outer")
df_mx = df_mx[["Year", "Actor", "Partner", "Exports", "Imports_mirror"]].rename(columns={"Imports_mirror": "Imports"})

In [67]:
# this section converts trade values from current USD to constant $2017
deflator=pd.read_csv("concordance/WDI GDP Deflator 20241119.csv", skiprows=3)
deflator=deflator[deflator["Country Name"]=="United States"][[*["Country Name"], *[str(y) for y in range(1995,2024)]]].copy(True)
deflator_long = deflator.melt(id_vars="Country Name", var_name="Year", value_name="Deflator")
deflator_long.Year = deflator_long.Year.astype(int)
deflator_long["DRatio"] = deflator_long.loc[deflator_long.Year==2017,"Deflator"].values[0]/deflator_long.Deflator
df_mx = pd.merge(left=df_mx, right=deflator_long, on="Year",how="left")
df_mx["Exports_deflate"] = df_mx["Exports"] * df_mx["DRatio"]
df_mx["Imports_deflate"] = df_mx["Imports"] * df_mx["DRatio"]
df_mx = df_mx[["Year", "Actor", "Partner", "Exports_deflate", "Imports_deflate"]]
df_mx.columns = ["Year", "Actor", "Partner", "Exports", "Imports"]
df_mx

Unnamed: 0,Year,Actor,Partner,Exports,Imports
0,2000,Afghanistan,Belarus,1.375095e+03,2.062643e+04
1,2000,Albania,Austria,3.838578e+06,8.956681e+06
2,2000,Albania,Croatia,5.357375e+06,2.176474e+06
3,2000,Albania,Czech Republic,7.037393e+04,2.670371e+06
4,2000,Albania,Hungary,1.279526e+05,1.919289e+04
...,...,...,...,...,...
78475,2023,United States of America,Spain,,4.594855e+09
78476,2023,United States of America,Sweden,1.208749e+10,1.128693e+10
78477,2023,United States of America,Switzerland,2.490808e+10,1.709001e+10
78478,2023,United States of America,Thailand,3.239702e+09,4.783615e+09


### IFs Format
Convert the data into IFs formatting, including adding the most recent/earliest columns

In [68]:
ifs_country = pd.read_excel("concordance/ccode_Comtrade_BACI_WITS_20241113.xlsx")
ifs_country = ifs_country[["Country","FIPS_CODE"]].drop_duplicates().dropna()
ifs_fips = dict(zip(ifs_country.Country, ifs_country.FIPS_CODE))
df_1 = pd.DataFrame(list(combinations(ifs_country.Country.unique(), 2)))
df_1.columns = ["Actor", "Partner"]
df_2 = pd.DataFrame(list(combinations(ifs_country.Country.unique(), 2)))
df_2.columns = ["Partner", "Actor"]
df = pd.concat([df_1,df_2], sort=False)
del df_1, df_2
df["Actor_FIPS"] = df["Actor"].map(ifs_fips)
df["Partner_FIPS"] = df["Partner"].map(ifs_fips)
df = df[["Actor", "Actor_FIPS", "Partner", "Partner_FIPS"]]
assert df.shape[0] == 188*187

In [69]:
export_serv = df_mx.pivot(index=["Actor","Partner"], columns = "Year", values="Exports").reset_index()
dt = pd.merge(left=df, right=export_serv, on=["Actor","Partner"], how="left")
# takes about 1-2mins for all tables
Ear=[]
Rec=[]
for i in range(dt.shape[0]):
    line=dt.iloc[i,4:]
    line.dropna(inplace=True)
    if not line.empty:
        Ear.append(line.values[0])
        Rec.append(line.values[-1])
    else:
        Ear.append(np.NaN)
        Rec.append(np.NaN)
dt["Earliest"]=Ear
dt["MostRecent"]=Rec
dt.to_csv(f"output/IFs Format/SeriesExportsServ.csv", index=False, encoding="latin-1")
###
import_serv = df_mx.pivot(index=["Actor","Partner"], columns = "Year", values="Imports").reset_index()
dt = pd.merge(left=df, right=import_serv, on=["Actor","Partner"], how="left")
# takes about 1-2mins for all tables
Ear=[]
Rec=[]
for i in range(dt.shape[0]):
    line=dt.iloc[i,4:]
    line.dropna(inplace=True)
    if not line.empty:
        Ear.append(line.values[0])
        Rec.append(line.values[-1])
    else:
        Ear.append(np.NaN)
        Rec.append(np.NaN)
dt["Earliest"]=Ear
dt["MostRecent"]=Rec
dt.to_csv(f"output/IFs Format/SeriesImportsServ.csv", index=False, encoding="latin-1")

In [70]:
### total 2000-2022
# exports 
x_goods = pd.read_csv("output/IFs Format/SeriesExportsGoods.csv", encoding="latin-1")
x_serv = pd.read_csv("output/IFs Format/SeriesExportsServ.csv", encoding="latin-1")
y_same = [y for y in x_serv.columns if y in x_goods.columns]
x_serv_value = x_serv[y_same].drop(columns=['Actor','Actor_FIPS','Partner','Partner_FIPS', "Earliest", "MostRecent"])
x_goods_value = x_goods[y_same].drop(columns=['Actor','Actor_FIPS','Partner','Partner_FIPS', "Earliest", "MostRecent"])
x_tot_value = x_goods_value.add(x_serv_value,fill_value=0)
dt = pd.concat([x_goods[x_goods.columns[:4]],x_tot_value], axis=1)
Ear=[]
Rec=[]
for i in range(dt.shape[0]):
    line=dt.iloc[i,4:]
    line.dropna(inplace=True)
    if not line.empty:
        Ear.append(line.values[0])
        Rec.append(line.values[-1])
    else:
        Ear.append(np.NaN)
        Rec.append(np.NaN)
dt["Earliest"]=Ear
dt["MostRecent"]=Rec
dt.to_csv(f"output/IFs Format/SeriesExportsTotal.csv", index=False, encoding="latin-1")
# imports
m_goods = pd.read_csv("output/IFs Format/SeriesImportsGoods.csv", encoding="latin-1")
m_serv = pd.read_csv("output/IFs Format/SeriesImportsServ.csv", encoding="latin-1")
y_same = [y for y in m_serv.columns if y in m_goods.columns]
m_serv_value = m_serv[y_same].drop(columns=['Actor','Actor_FIPS','Partner','Partner_FIPS', "Earliest", "MostRecent"])
m_goods_value = m_goods[y_same].drop(columns=['Actor','Actor_FIPS','Partner','Partner_FIPS', "Earliest", "MostRecent"])
m_tot_value = m_goods_value.add(m_serv_value,fill_value=0)
dt = pd.concat([m_goods[m_goods.columns[:4]],m_tot_value], axis=1)
Ear=[]
Rec=[]
for i in range(dt.shape[0]):
    line=dt.iloc[i,4:]
    line.dropna(inplace=True)
    if not line.empty:
        Ear.append(line.values[0])
        Rec.append(line.values[-1])
    else:
        Ear.append(np.NaN)
        Rec.append(np.NaN)
dt["Earliest"]=Ear
dt["MostRecent"]=Rec
dt.to_csv(f"output/IFs Format/SeriesImportsTotal.csv", index=False, encoding="latin-1")