In [1]:
# This programs require the following from Python:
# pip install pandas 
# pip install wbgapi
# pip install openpyxl
import pandas as pd
import wbgapi as wb
import openpyxl

In [2]:
# This program also refers to an number of programs in a separate file called DataFunctions.py. 
# This files contains the following functions
#     GetDatWB(WBcode, years) - extracts data from World Bank using code and year/s. This is used the get data like GDP, remittances etc
#     GetDatIMFex(filename, indicator, BOPType, years) - extracts receipts and payments from IMF Balance of payments data downloaded into an excel (e.g., workers compensation).
#         IMF does have an API but could not get it to work
#     DatFill(df, mappingfile, fill) - fills a dataframe (df) using other data fill (e.g., fill remittances using GDP).
#     DatFillEq(Paid, Rec, mappingfile, fill) - fills two data frames and ensures they are equal.  For instance remittances 
#         paid andreceived by country are filled using a data series named fill and are also scaled to ensure payments equal receipts. 
#         Note this function has 3 outputs - updated payments, receipts and the scale.  The closer scale is to 1 the less scaling done to make sure payments equal receipts.
import DataFunctions as DF

In [3]:
# Year for which we need data
year = 2019
# Ideally this could be an input
# year = int(input("Please enter the year: "))

In [4]:
# Gets POP from World Bank API
POP = DF.GetDatWB('SP.POP.TOTL',[year])

<class 'pandas.DataFrame'>
Index: 265 entries, ABW to ZWE
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2019    265 non-null    float64
dtypes: float64(1)
memory usage: 4.1+ KB


In [5]:
# Gets GDP from World Bank API
GDP = DF.GetDatWB('NY.GDP.MKTP.CD',[year])

<class 'pandas.DataFrame'>
Index: 262 entries, ABW to ZWE
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2019    262 non-null    float64
dtypes: float64(1)
memory usage: 4.1+ KB


In [6]:
# Fill GDP so as to avoid missing tiny countries
GDP = DF.DatFill(GDP, 'GTAPMap.xlsx', POP)

In [7]:
# Gets Worker's compensation payments for IMF excel file
WCompPaid = DF.GetDatIMFex(filename = 'dataset_2026-02-11WComp.xlsx', indicator = 'Compensation of employees', BOPType = 'DB_T', years = [year])

<class 'pandas.DataFrame'>
Index: 181 entries, USA to DEU
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2019    178 non-null    float64
dtypes: float64(1)
memory usage: 2.8+ KB


In [8]:
# Gets Worker's compensation payments for IMF excel file
WCompRec = DF.GetDatIMFex(filename = 'dataset_2026-02-11WComp.xlsx', indicator = 'Compensation of employees', BOPType = 'CD_T', years = [year])

<class 'pandas.DataFrame'>
Index: 183 entries, DNK to NGA
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2019    181 non-null    float64
dtypes: float64(1)
memory usage: 2.9+ KB


In [9]:
# Fills workers compensation data and ensure receipts equal payments
WCompPaid, WCompRec, scale = DF.DatFillEq(WCompPaid, WCompRec, 'Mappings.xlsx', GDP)
# Print how much scaling was required
print("This is how much scaling was required (1 means no scaling):", scale)

This is how much scaling was required (1 means no scaling): 0.9764899166511319


In [10]:
# Aggregate to GTAP using mapping file
WCompPaid = DF.DatAgg(WCompPaid,'GTAPMap.xlsx')
WCompRec = DF.DatAgg(WCompRec,'GTAPMap.xlsx')

In [11]:
# Gets Primary income from World Bank API
PIPaid = DF.GetDatWB('BM.GSR.FCTY.CD',[year]) 

<class 'pandas.DataFrame'>
Index: 202 entries, ABW to ZWE
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2019    202 non-null    float64
dtypes: float64(1)
memory usage: 3.2+ KB


In [12]:
# Gets Primary income from World Bank API
PIRec = DF.GetDatWB('BX.GSR.FCTY.CD',[year]) 

<class 'pandas.DataFrame'>
Index: 202 entries, ABW to ZWE
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2019    202 non-null    float64
dtypes: float64(1)
memory usage: 3.2+ KB


In [13]:
# Fills primary income data and ensure receipts equal payments
PIPaid, PIRec, scale = DF.DatFillEq(PIPaid, PIRec, 'Mappings.xlsx',GDP)
# Print how much scaling was required
print("This is how much scaling was required (1 means no scaling):", scale)

This is how much scaling was required (1 means no scaling): 1.005409619011683


In [14]:
# Aggregate to GTAP using mapping file
PIPaid = DF.DatAgg(PIPaid,'GTAPMap.xlsx')
PIRec = DF.DatAgg(PIRec,'GTAPMap.xlsx')

In [15]:
# Gets Remittances from World Bank API
RemRec = DF.GetDatWB('BX.TRF.PWKR.CD.DT',[year])

<class 'pandas.DataFrame'>
Index: 245 entries, ABW to ZWE
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2019    245 non-null    float64
dtypes: float64(1)
memory usage: 3.8+ KB


In [None]:
# Gets Remittances from World Bank API
RemPaid = DF.GetDatWB('BM.TRF.PWKR.CD.DT',[year])

In [None]:
# Fills remittances data and ensure receipts equal payments
RemPaid, RemRec, scale = DF.DatFillEq(RemPaid, RemRec, 'Mappings.xlsx',GDP)
# Print how much scaling was required
print("This is how much scaling was required (1 means no scaling):", scale)

In [None]:
# Aggregate to GTAP using mapping file
RemPaid = DF.DatAgg(RemPaid,'GTAPMap.xlsx')
RemRec = DF.DatAgg(RemRec,'GTAPMap.xlsx')

In [None]:
# Gets Aid from World Bank API
AidRec = DF.GetDatWB('DT.ODA.ODAT.CD',[year])

In [None]:
# Gets Aid from World Bank API
AidPaid = DF.GetDatWB('DC.ODA.TOTL.CD',[year])

In [None]:
# Fills aid data and ensure receipts equal payments
AidPaid, AidRec, scale = DF.DatFillEq(AidPaid, AidRec, 'Mappings.xlsx',GDP)
# Print how much scaling was required
print("This is how much scaling was required (1 means no scaling):", scale)

In [None]:
# Aggregate to GTAP using mapping file
AidPaid = DF.DatAgg(AidPaid,'GTAPMap.xlsx')
AidRec = DF.DatAgg(AidRec,'GTAPMap.xlsx')

In [None]:
# Note we want foreign income related to workers and capital - Primary income includes workers compensation so it is subtracted.
InvPaid = PIPaid - WCompPaid
InvRec = PIRec - WCompRec

In [None]:
# Write data to excel
with pd.ExcelWriter("MyGTAPoutput.xlsx", engine="openpyxl") as writer:
    RemPaid.to_excel(writer, sheet_name="RemPaid", index=True)
    RemRec.to_excel(writer, sheet_name="RemRec", index=True)
    InvPaid.to_excel(writer, sheet_name="InvPaid", index=True)
    InvRec.to_excel(writer, sheet_name="InvRec", index=True)
    AidPaid.to_excel(writer, sheet_name="AidPaid", index=True)
    AidRec.to_excel(writer, sheet_name="AidRec", index=True)

#Need to look into Harpy to see if I can get it to send to a har file
# Example conceptual code (refer to HARPY documentation for exact function calls)
# 1. Define your data (e.g., a simple 2D array)
#data_array = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
# 2. Define dimensions and variable names
# GEMPACK uses short (up to 4 chars) names for variables (e.g., 'WELF')
#variable_name = 'WELF'
#dimensions = ['REG', 'COMM'] 
# 3. Use HARPY to write the data to a file
# The library handles generating the necessary GEMPACK headers
#try:
    # Example function call (check HARPY docs for exact usage)
#    harpy.write_har(filename="output.har", data={variable_name: data_array}, dimensions={variable_name: dimensions})
#    print("Successfully wrote data to output.har")
#except Exception as e:
#    print(f"An error occurred: {e}")