In [2]:
import pandas as pd
import requests
import time
import os
import json

In [3]:
# read all downloaded files
list_files = os.listdir("../../data/imports")
list_files = [tuple(x.split(".csv")[0].split("_")[1:]) for x in list_files]
list_files = set(list_files)
len(list_files)

117922

In [5]:
df_exports = pd.read_csv("../../data/hhi_data/df_hhi_exports.csv")
countries_list = json.loads(df_exports[["reporterCode", "reporterDesc"]].drop_duplicates().to_json(orient='records')) #columns, split, records

classification_codes = {
    "Antimony": 261710,
    "Asbestos": 2524,
    "Barytes": 2511,
    "Bismuth": 8106,
    "Cadmium": 8107,
    "Chromium": 2610,
    "Coal": 2701,
    "Cobalt": 810520,
    "Copper": 2603,
    "Gold": 7108,
    "Graphite": 2504,
    "Iron": 2601,
    "Lead": 2607,
    "Lithium": 283691,
    "Magnesite": 251910,
    "Magnesium": 251910,
    "Manganese": 2602,
    "Mercury": 280540,
    "Molybdenum": 2613,
    "Natural gas": 271111,
    "Nickel": 2604,
    "Petroleum": 2709,
    "Rare earth": 2846,
    "Silver": 261610,
    "Tin": 2609,
    "Titanium": 2614,
    "Tungsten": 2611,
    "Uranium": 261210,
    "Zinc": 2608,
    "Zirconium": 261510,
    "Lithium-ion batteries": 850760 # igual que Litio?
}
classification_codes = {key.upper(): val for key, val in classification_codes.items()}
classification_codes_inverted = {val: key.upper() for key, val in classification_codes.items()}

years = range(2000, 2021)

In [6]:
# Get all possible combinations


possible_list_files = list()

for year in years:
    for commodity, commodity_code in classification_codes.items():
        for country in countries_list:
            possible_list_files.append((f'{year}', commodity, country['reporterDesc']))

possible_list_files = set(possible_list_files)
len(possible_list_files)


124992

In [7]:
# get rest of files to download
files_to_download = list(possible_list_files - list_files)
len(files_to_download)

7070

In [14]:
# Save total files into 4 equal parts
import pickle
indx = 0
size_list = len(files_to_download) // 4
for i in range(0, len(files_to_download), size_list):
    with open(f"list_files/files_to_download_{indx}.txt", "wb") as fp:
        pickle.dump(files_to_download[i:i + size_list], fp)
    indx += 1

### Get distinct countries/codes in the export file

In [2]:
import pickle
# Read n-list
indx = 4
with open(f"list_files/files_to_download_{indx}.txt", "rb") as fp:
    sublist_files_to_download = pickle.load(fp)

print("Len of list 0: ", len(sublist_files_to_download))


# indx = 3
# with open(f"list_files/files_to_download_{indx}.txt", "rb") as fp:
#     sublist_files_to_download += pickle.load(fp)

# print("Len of list [0] + [1]: ", len(sublist_files_to_download))


Len of list 0:  3


In [8]:
# classification_codes["INDIA"]
def get_country_code(country):
    for country_info in countries_list:
        if country.upper() == country_info["reporterDesc"].upper():
            return country_info["reporterCode"]
    return -1

In [9]:
# Only for last step
sublist_files_to_download = files_to_download

for year, commodity, country in sublist_files_to_download:
    try:
        country_code = get_country_code(country)
        url = f"https://comtradeplus.un.org/api/Trade/getTrade?" \
                f"selectedProductOptionsModified=C&" \
                f"selectedFrequencyOptionsModified=A&" \
                f"selectedClassificationOptionsModified=HS&" \
                f"selectValuePeriodsModified={year}&" \
                f"selectValueReportersModified={country_code}&" \
                f"selectValuePartnersModified=all&" \
                f"selectValueTradeflowsModified=m&" \
                f"selectValueCommodityCodesModified={classification_codes[commodity]}&" \
                f"selectValueCustomsCodesModified=c00&" \
                f"selectValueTransportCodesModified=0&" \
                f"selectValueSecondPartnersModified=0&" \
                f"selectValueAggregateByModified=none&" \
                f"selectValueBreakdownModeModified=plus&" \
                f"selectValueincludeDescModified=True&" \
                f"selectValuecountOnlyModified=False"
        
        payload={}
        headers = {
            'Accept': 'application/json, text/plain, */*',
            'Referer': f'https://comtradeplus.un.org/TradeFlow?Frequency=A&Flows=X&CommodityCodes={classification_codes[commodity]}&Partners=0&Reporters=all&period={year}&AggregateBy=none&BreakdownMode=plus'
        }

        try:
            response = requests.get(url=url, headers=headers, data=payload)
            json = response.json()
            pd.json_normalize(json["data"]).to_csv(f"../../data/imports/imports_{year}_{commodity}_{country}.csv", index=False)
        except Exception as e:
            print(f"[ERROR][{country}][{year}][{commodity_code}] Request: {e}")

    except Exception as e:
        print(f"[ERROR][TOP] {year}, {commodity}, {commodity_code}, {country}: [{e}]")

    # time.sleep(3)
        

[ERROR][United Kingdom][2014][850760] Request: 'data'
[ERROR][Jamaica][2011][850760] Request: 'data'
[ERROR][Malta][2019][850760] Request: 'data'
[ERROR][Mauritius][2009][850760] Request: 'data'
[ERROR][Jordan][2004][850760] Request: 'data'
[ERROR][Greece][2013][850760] Request: 'data'
[ERROR][India][2016][850760] Request: 'data'
[ERROR][Mauritania][2000][850760] Request: 'data'
[ERROR][Spain][2011][850760] Request: 'data'
[ERROR][Eswatini][2002][850760] Request: 'data'


In [4]:
# Lista de elementos de interés



for country in countries_list:
    for year in years:
        for commodity, commodity_code in classification_codes.items():            
            url = f"https://comtradeplus.un.org/api/Trade/getTrade?" \
                f"selectedProductOptionsModified=C&" \
                f"selectedFrequencyOptionsModified=A&" \
                f"selectedClassificationOptionsModified=HS&" \
                f"selectValuePeriodsModified={year}&" \
                f"selectValueReportersModified={country['reporterCode']}&" \
                f"selectValuePartnersModified=all&" \
                f"selectValueTradeflowsModified=m&" \
                f"selectValueCommodityCodesModified={commodity_code}&" \
                f"selectValueCustomsCodesModified=c00&" \
                f"selectValueTransportCodesModified=0&" \
                f"selectValueSecondPartnersModified=0&" \
                f"selectValueAggregateByModified=none&" \
                f"selectValueBreakdownModeModified=plus&" \
                f"selectValueincludeDescModified=True&" \
                f"selectValuecountOnlyModified=False"


            payload={}
            headers = {
                'Accept': 'application/json, text/plain, */*',
                'Referer': f'https://comtradeplus.un.org/TradeFlow?Frequency=A&Flows=X&CommodityCodes={commodity_code}&Partners=0&Reporters=all&period={year}&AggregateBy=none&BreakdownMode=plus'
            }

            try:
                response = requests.get(url=url, headers=headers, data=payload)
                json = response.json()
                pd.json_normalize(json["data"]).to_csv(f"../../data/imports/imports_{year}_{commodity}_{country['reporterDesc']}.csv", index=False)
            except Exception as e:
                print(f"[ERROR][{year}][{commodity_code}] Request: {e}")

            time.sleep(5)
            

[ERROR][2003][261710] Request: 'data'
[ERROR][2010][2604] Request: 'data'
[ERROR][2010][2604] Request: 'data'
[ERROR][2015][2611] Request: HTTPSConnectionPool(host='comtradeplus.un.org', port=443): Max retries exceeded with url: /api/Trade/getTrade?selectedProductOptionsModified=C&selectedFrequencyOptionsModified=A&selectedClassificationOptionsModified=HS&selectValuePeriodsModified=2015&selectValueReportersModified=251&selectValuePartnersModified=all&selectValueTradeflowsModified=m&selectValueCommodityCodesModified=2611&selectValueCustomsCodesModified=c00&selectValueTransportCodesModified=0&selectValueSecondPartnersModified=0&selectValueAggregateByModified=none&selectValueBreakdownModeModified=plus&selectValueincludeDescModified=True&selectValuecountOnlyModified=False (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000261797EAFC8>: Failed to establish a new connection: [WinError 10060] Se produjo un error durante el intento de conexión ya que la parte c

## Let's union them into a only one file

In [48]:
list_files = os.listdir("../../data/exports")
# Take 1st CSV as reference and then attach the rest of files
df_exports = pd.read_csv(f"../../data/exports/{list_files[0]}")
df_exports["Product"] = classification_codes_inverted[df_exports.iloc[0]["cmdCode"]] # Create a product column

for file in list_files[1:]:
    if file == "exports_consolidated_file.csv":
        continue
    try:
        df_tmp = pd.read_csv(f"../../data/exports/{file}")
        df_tmp["Product"] = classification_codes_inverted[df_tmp.iloc[0]["cmdCode"]] # Create a product column
        df_exports = pd.concat([df_exports, df_tmp], ignore_index=True)        
    except Exception as e:
        print("=" * 100)
        print(f"[ERROR][UNION] An issue happened for file: {file}")

# Save consolidated dataframe into a CSV
df_exports.to_csv("../../data/exports/exports_consolidated_file.csv", index=False)

[ERROR][UNION] An issue happened for file: exports_2000_COBALT.csv
[ERROR][UNION] An issue happened for file: exports_2000_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exports_2001_COBALT.csv
[ERROR][UNION] An issue happened for file: exports_2001_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exports_2002_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exports_2003_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exports_2004_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exports_2005_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exports_2006_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exports_2007_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exports_2008_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exports_2009_LITHIUM-ION BATTERIES.csv
[ERROR][UNION] An issue happened for file: exp