In [1]:
import pandas as pd
import numpy as np
import sqlite3
from collections import namedtuple
import copy
import time
import os
from itertools import combinations

## IFs
- Actors are exporters
- DataDict has inconsistent series names

In [2]:
conn = sqlite3.connect('D:/IFsDyadSeries/DyadicHistSeriesAndDataDict 20250204/DataDictDyadic.db')
cursor = conn.cursor()
dd = pd.read_sql_query("SELECT * from DataDict", conn)
conn.close()
Sectors=["Agri","Manu","Mate","ICT","Ener", "Goods"]
tb_list = []
for s in Sectors:
    for tb in dd.Table:
        if tb.endswith(s) or tb.endswith(s[:3]) or tb.endswith(s[:2]):
            tb_list.append(tb)
tb_list.sort()
assert len(tb_list) == 5*6

### Data Frame

In [3]:
ifs_country = pd.read_excel("concordance/ccode_Comtrade_BACI_WITS_20241113.xlsx")
ifs_country = ifs_country[["Country","FIPS_CODE"]].drop_duplicates().dropna()
ifs_fips = dict(zip(ifs_country.Country, ifs_country.FIPS_CODE))

In [4]:
df_1 = pd.DataFrame(list(combinations(ifs_country.Country.unique(), 2)))
df_1.columns = ["Actor", "Partner"]
df_2 = pd.DataFrame(list(combinations(ifs_country.Country.unique(), 2)))
df_2.columns = ["Partner", "Actor"]
df = pd.concat([df_1,df_2], sort=False)
del df_1, df_2
df["Actor_FIPS"] = df["Actor"].map(ifs_fips)
df["Partner_FIPS"] = df["Partner"].map(ifs_fips)
df = df[["Actor", "Actor_FIPS", "Partner", "Partner_FIPS"]]
assert df.shape[0] == 188*187

### TCI

In [5]:
y_start = 1995
y_end = 2022
tci_list = []
for f in os.listdir("output/tci_ifscountry"):
    for y in range(y_start, y_end+1):
        if str(y) in f:
            dt = pd.read_csv(f"output/tci_ifscountry/{f}", encoding="latin-1")
            dt["Year"] = y
            tci_list.append(dt)
tci = pd.concat(tci_list)
del tci_list, dt
tci_s = tci[["Year",'Exporter', 'Importer', 'IFsSector', 'TCIExAImBPctS']].copy()
tci_s.rename(columns={"TCIExAImBPctS":"TCI"}, inplace=True)
tci_w = tci[["Year",'Exporter', 'Importer', 'TCIExAImBPctW']].drop_duplicates().copy()
tci_w["IFsSector"] = "Goods"
tci_w.rename(columns={"TCIExAImBPctW":"TCI"}, inplace=True)
tci = pd.concat([tci_s, tci_w] , sort=False)
tci.rename(columns={"Exporter":"Actor", "Importer":"Partner"}, inplace=True)
del tci_s,tci_w

In [33]:
for tb in tb_list:
    if "TCI" not in tb:
        continue
    sec = tb.replace("SeriesTCI", "")
    tci_s = tci[tci.IFsSector==sec].pivot(index=["Actor","Partner"], columns = "Year", values="TCI").reset_index()
    dt = pd.merge(left=df, right=tci_s, on=["Actor","Partner"], how="left")
    # takes about 1-2mins for all tables
    Ear=[]
    Rec=[]
    for i in range(dt.shape[0]):
        line=dt.iloc[i,4:]
        line.dropna(inplace=True)
        if not line.empty:
            Ear.append(line.values[0])
            Rec.append(line.values[-1])
        else:
            Ear.append(np.NaN)
            Rec.append(np.NaN)
    dt["Earliest"]=Ear
    dt["MostRecent"]=Rec
    dt.to_csv(f"output/IFs Format/{tb}.csv", index=False, encoding="latin-1")
del tci, tci_s

### Rev, WtTariff, Trade

In [34]:
trw_list = []
for y in range(y_start, y_end+1):
    dt = pd.read_csv(f"output/trade_wt_rev_2017usd_ifscountry/{y}.csv", encoding="latin-1")
    dt["Year"] = y
    trw_list.append(dt)
trw = pd.concat(trw_list)
del trw_list, dt
trw.IFsSector = trw.IFsSector.replace({"AllGoods":"Goods"})
trw.rename(columns={"Exporter":"Actor", "Importer":"Partner"}, inplace=True)

In [35]:
# Rev
for tb in tb_list:
    if "TariffRev" not in tb:
        continue
    sec = tb.replace("SeriesTariffRev", "")
    rev_s = trw[trw.IFsSector==sec].pivot(index=["Actor","Partner"], columns = "Year", values="Rev").reset_index()
    dt = pd.merge(left=df, right=rev_s, on=["Actor","Partner"], how="left")
    # takes about 1-2mins for all tables
    Ear=[]
    Rec=[]
    for i in range(dt.shape[0]):
        line=dt.iloc[i,4:]
        line.dropna(inplace=True)
        if not line.empty:
            Ear.append(line.values[0])
            Rec.append(line.values[-1])
        else:
            Ear.append(np.NaN)
            Rec.append(np.NaN)
    dt["Earliest"]=Ear
    dt["MostRecent"]=Rec
    dt.to_csv(f"output/IFs Format/{tb}.csv", index=False, encoding="latin-1")
del rev_s

In [36]:
# WtTariff
for tb in tb_list:
    if "WtTariff" not in tb:
        continue
    sec = tb.replace("SeriesWtTariff", "")
    wt_s = trw[trw.IFsSector==sec].pivot(index=["Actor","Partner"], columns = "Year", values="WtTariff").reset_index()
    dt = pd.merge(left=df, right=wt_s, on=["Actor","Partner"], how="left")
    # takes about 1-2mins for all tables
    Ear=[]
    Rec=[]
    for i in range(dt.shape[0]):
        line=dt.iloc[i,4:]
        line.dropna(inplace=True)
        if not line.empty:
            Ear.append(line.values[0])
            Rec.append(line.values[-1])
        else:
            Ear.append(np.NaN)
            Rec.append(np.NaN)
    dt["Earliest"]=Ear
    dt["MostRecent"]=Rec
    dt.to_csv(f"output/IFs Format/{tb}.csv", index=False, encoding="latin-1")
del wt_s

In [37]:
# trade
sec_dict = {"En" : "Ener", "Ag":"Agri", "Man": "Manu", "Mat":"Mate", "ICT": "ICT", "Goods":"Goods"}
for tb in tb_list:
    # In Export tables, actors are exporters
    if "Exports" in tb:
        sec = tb.replace("SeriesExports", "")
        sec = sec_dict[sec]
        trade_s = trw[trw.IFsSector==sec].pivot(index=["Actor","Partner"], columns = "Year", values="Trade").reset_index()
        dt = pd.merge(left=df, right=trade_s, on=["Actor","Partner"], how="left")
    # In Import tables, actors are importers
    elif "Imports" in tb:
        sec = tb.replace("SeriesImports", "")
        sec = sec_dict[sec]
        trade_s = trw[trw.IFsSector==sec].pivot(index=["Actor","Partner"], columns = "Year", values="Trade").reset_index()
        trade_s.rename(columns={"Actor":"Partner", "Partner":"Actor"}, inplace=True)
        dt = pd.merge(left=df, right=trade_s, on=["Actor","Partner"], how="left") 
    else:
        continue
    # takes about 1-2mins for all tables
    Ear=[]
    Rec=[]
    for i in range(dt.shape[0]):
        line=dt.iloc[i,4:]
        line.dropna(inplace=True)
        if not line.empty:
            Ear.append(line.values[0])
            Rec.append(line.values[-1])
        else:
            Ear.append(np.NaN)
            Rec.append(np.NaN)
    dt["Earliest"]=Ear
    dt["MostRecent"]=Rec
    dt.to_csv(f"output/IFs Format/{tb}.csv", index=False, encoding="latin-1")
del trade_s, trw

### To DB file
- Run this whenever you have all the IFs formatted files ready!

In [5]:
tb_list_all = [*tb_list, *["SeriesExportsTotal", 'SeriesImportsTotal', "SeriesExportsServ", 'SeriesImportsServ']]
# took less than 2mins
conn = sqlite3.connect('D:/IFsDyadSeries/DyadicHistSeriesAndDataDict 20250204/IFsHistSeriesDyadic.db')
cursor = conn.cursor()
#
for tb in tb_list_all:
    dt = pd.read_csv(f"output/IFs Format/{tb}.csv", encoding="latin-1")
    sql_drop_table = f"DROP TABLE IF EXISTS [{tb}];"
    sql_create_table = f"CREATE TABLE [{tb}] (Actor VARCHAR (255), Actor_FIPS VARCHAR (255), Partner VARCHAR (255), Partner_FIPS VARCHAR (255), "
    for c in dt.columns[4:]:
        sql_create_table += f"[{c}] DOUBLE(53),"
    sql_create_table = sql_create_table[:-1] + ");"
    cursor.execute(sql_drop_table)
    cursor.execute(sql_create_table)
    dt.to_sql(name=f'{tb}', con=conn, if_exists="append", index=False)
    conn.commit()
conn.close()
#
dd = pd.read_excel("DataDict Trade Dyadic 20250204.xlsx")
conn = sqlite3.connect('D:/IFsDyadSeries/DyadicHistSeriesAndDataDict 20250204/DataDictDyadic.db')
cursor = conn.cursor()
for tb in tb_list_all:
    sql_del_row_dd = 'DELETE FROM DataDict WHERE "Table" = ?;'
    cursor.execute(sql_del_row_dd, (tb,))
dd.to_sql(name=f'DataDict', con=conn, if_exists="append", index=False)  
conn.commit()
conn.close()