In [5]:
import pandas as pd
import requests
import time
import os

In [6]:
# Lista de elementos de interés
classification_codes = {
    "Aluminium": 2606,
    
    "Antimony": 261710,
    "Asbestos": 2524,
    "Barytes": 2511,
    "Bismuth": 8106,
    "Cadmium": 8107,
    "Chromium": 2610,
    "Coal": 2701,
    "Cobalt": 810520,
    "Copper": 2603,
    "Gold": 7108,
    "Graphite": 2504,
    "Iron": 2601,
    "Lead": 2607,
    "Lithium": 283691,
    "Magnesite": 251910,

    "Magnesium": 810411,

    "Manganese": 2602,
    "Mercury": 280540,
    "Molybdenum": 2613,
    "Natural gas": 271111,
    "Nickel": 2604,
    "Petroleum": 2709,
    "Rare earth": 2846,
    "Silver": 261610,
    "Tin": 2609,
    "Titanium": 2614,
    "Tungsten": 2611,
    "Uranium": 261210,
    "Zinc": 2608,
    "Zirconium": 261510,
    "Lithium-ion batteries": 850760 # igual que Litio?
}
classification_codes = {key.upper(): val for key, val in classification_codes.items()}
classification_codes_inverted = {val: key.upper() for key, val in classification_codes.items()}

years = range(2000, 2021)


        

In [None]:
for year in years:
    for commodity, commodity_code in classification_codes.items():
        url = f"https://comtradeplus.un.org/api/Trade/getTrade?" \
            f"selectedProductOptionsModified=C&" \
            f"selectedFrequencyOptionsModified=A&" \
            f"selectedClassificationOptionsModified=HS&" \
            f"selectValuePeriodsModified={year}&" \
            f"selectValueReportersModified=all&" \
            f"selectValuePartnersModified=0&" \
            f"selectValueTradeflowsModified=x&" \
            f"selectValueCommodityCodesModified={commodity_code}&" \
            f"selectValueCustomsCodesModified=c00&" \
            f"selectValueTransportCodesModified=0&" \
            f"selectValueSecondPartnersModified=0&" \
            f"selectValueAggregateByModified=none&" \
            f"selectValueBreakdownModeModified=plus&" \
            f"selectValueincludeDescModified=True&" \
            f"selectValuecountOnlyModified=False"

        payload={}
        headers = {
            'Accept': 'application/json, text/plain, */*',
            'Referer': f'https://comtradeplus.un.org/TradeFlow?Frequency=A&Flows=X&CommodityCodes={commodity_code}&Partners=0&Reporters=all&period={year}&AggregateBy=none&BreakdownMode=plus'
        }

        try:
            response = requests.get(url=url, headers=headers, data=payload)
            json = response.json()
            pd.json_normalize(json["data"]).to_csv(f"../../data/exports/exports_{year}_{commodity}.csv", index=False)
        except Exception as e:
            print(f"[ERROR][{year}][{commodity_code}] Request: {e}")

        time.sleep(5)

In [14]:
pd.set_option('display.max_columns', None)


In [57]:
# Reference
# https://unstats.un.org/wiki/display/comtrade/New+Comtrade+User+Guide#NewComtradeUserGuide-ParameterCodesList/ReferenceTable
# https://comtradeapi.un.org/files/v1/app/reference/QuantityUnits.json

units = [
    {
      "qtyCode": -1,
      "qtyAbbr": "N/A",
      "qtyDescription": "Not available or not specified or no quantity."
    },
    {
      "qtyCode": 2,
      "qtyAbbr": "m²",
      "qtyDescription": "Area in square meters"
    },
    {
      "qtyCode": 3,
      "qtyAbbr": "1000 kWh",
      "qtyDescription": "Electrical energy in thousands of kilowatt-hours"
    },
    {
      "qtyCode": 4,
      "qtyAbbr": "m",
      "qtyDescription": "Length in meters"
    },
    {
      "qtyCode": 5,
      "qtyAbbr": "u",
      "qtyDescription": "Number of items"
    },
    {
      "qtyCode": 6,
      "qtyAbbr": "2u",
      "qtyDescription": "Number of pairs"
    },
    {
      "qtyCode": 7,
      "qtyAbbr": "l",
      "qtyDescription": "Volume in liters"
    },
    {
      "qtyCode": 8,
      "qtyAbbr": "kg",
      "qtyDescription": "Weight in kilograms"
    },
    {
      "qtyCode": 9,
      "qtyAbbr": "1000u",
      "qtyDescription": "Thousand of items"
    },
    {
      "qtyCode": 10,
      "qtyAbbr": "U (jeu/pack)",
      "qtyDescription": "Number of packages"
    },
    {
      "qtyCode": 11,
      "qtyAbbr": "12u",
      "qtyDescription": "Dozen of items"
    },
    {
      "qtyCode": 12,
      "qtyAbbr": "m³",
      "qtyDescription": "Volume in cubic meters"
    },
    {
      "qtyCode": 13,
      "qtyAbbr": "carat",
      "qtyDescription": "Weight in carats"
    },
    {
      "qtyCode": 14,
      "qtyAbbr": "km",
      "qtyDescription": "Length in Kilometers"
    },
    {
      "qtyCode": 15,
      "qtyAbbr": "g",
      "qtyDescription": "Weight in grams"
    },
    {
      "qtyCode": 16,
      "qtyAbbr": "hive",
      "qtyDescription": "Beehive"
    },
    {
      "qtyCode": 17,
      "qtyAbbr": "1000 m³",
      "qtyDescription": "Volume in thousand cubic meters"
    },
    {
      "qtyCode": 18,
      "qtyAbbr": "TJ",
      "qtyDescription": "Terajoule (gross calorific value)"
    },
    {
      "qtyCode": 19,
      "qtyAbbr": "BBL",
      "qtyDescription": "Barrels"
    },
    {
      "qtyCode": 20,
      "qtyAbbr": "1000 L",
      "qtyDescription": "Volume in thousands of liters"
    },
    {
      "qtyCode": 21,
      "qtyAbbr": "1000 KG",
      "qtyDescription": "Weight in thousand of kilograms"
    },
    {
      "qtyCode": 22,
      "qtyAbbr": "kWH",
      "qtyDescription": "Electrical energy in kilowatt-hours"
    },
    {
      "qtyCode": 23,
      "qtyAbbr": "l alc 100%",
      "qtyDescription": "Litre pure (100 %) alcohol - l alc. 100%"
    },
    {
      "qtyCode": 24,
      "qtyAbbr": "head",
      "qtyDescription": "Head"
    },
    {
      "qtyCode": 25,
      "qtyAbbr": "kg/net eda",
      "qtyDescription": "Kilogram drained net weight"
    },
    {
      "qtyCode": 26,
      "qtyAbbr": "kg C5H14ClNO",
      "qtyDescription": "Kilogram of choline chloride"
    },
    {
      "qtyCode": 27,
      "qtyAbbr": "kg P2O5",
      "qtyDescription": "Kilogram of diphosphorus pentaoxide"
    },
    {
      "qtyCode": 28,
      "qtyAbbr": "kg H2O2",
      "qtyDescription": "Kilogram of hydrogen peroxide"
    },
    {
      "qtyCode": 29,
      "qtyAbbr": "kg met.am.",
      "qtyDescription": "Kilogram of methylamines"
    },
    {
      "qtyCode": 30,
      "qtyAbbr": "kg N",
      "qtyDescription": "Kilogram of nitrogen"
    },
    {
      "qtyCode": 31,
      "qtyAbbr": "kg KOH",
      "qtyDescription": "Kilogram of potassium hydroxide (caustic potash)"
    },
    {
      "qtyCode": 32,
      "qtyAbbr": "kg K2O",
      "qtyDescription": "Kilogram of potassium oxide"
    },
    {
      "qtyCode": 33,
      "qtyAbbr": "kg NaOH",
      "qtyDescription": "Kilogram of sodium hydroxide (caustic soda)"
    },
    {
      "qtyCode": 34,
      "qtyAbbr": "kg 90% sdt",
      "qtyDescription": "Kilogram of substance 90 % dry"
    },
    {
      "qtyCode": 35,
      "qtyAbbr": "kg U",
      "qtyDescription": "Kilogram of uranium"
    },
    {
      "qtyCode": 36,
      "qtyAbbr": "ct/l",
      "qtyDescription": "Carrying capacity in tonnes"
    },
    {
      "qtyCode": 37,
      "qtyAbbr": "Bq",
      "qtyDescription": "Becquerels"
    },
    {
      "qtyCode": 38,
      "qtyAbbr": "gi F/S",
      "qtyDescription": "Gram of fissile isotopes"
    },
    {
      "qtyCode": 39,
      "qtyAbbr": "GRT",
      "qtyDescription": "Gross register ton"
    },
    {
      "qtyCode": 40,
      "qtyAbbr": "GT",
      "qtyDescription": "Gross tonnage"
    },
    {
      "qtyCode": 41,
      "qtyAbbr": "ce/el",
      "qtyDescription": "Number of cells/elements"
    }
  ]

units_df = pd.DataFrame(units)
units_df

Unnamed: 0,qtyCode,qtyAbbr,qtyDescription
0,-1,,Not available or not specified or no quantity.
1,2,m²,Area in square meters
2,3,1000 kWh,Electrical energy in thousands of kilowatt-hours
3,4,m,Length in meters
4,5,u,Number of items
5,6,2u,Number of pairs
6,7,l,Volume in liters
7,8,kg,Weight in kilograms
8,9,1000u,Thousand of items
9,10,U (jeu/pack),Number of packages


## Let's union them into a only one file

In [74]:
# dd = pd.read_csv(f"../../data/exports/exports_2000_ANTIMONY.csv")
# # # dd[~dd["qtyUnitAbbr"].isna()]
# # # Remove NaNs entries in unit column but consider "world" as well
# # valid_rows = dd["qtyUnitAbbr"].str.lower().str.contains("kg") == True
# # dd[valid_rows]
# # # Check that world units are kilograms, if not convert it
# # # dd["qtyUnitAbbr"].isna()

# # Worry about NANs entries in "qtyUnitAbbr" column. Remember to add unit to the rest of data for join process
# valid_rows = ~dd["qtyUnitAbbr"].isna()
# dd = dd[valid_rows]
# # then, worry about find factors in the units like "2u" that means "two units".
# # Multiply the netweigth for that factor
# rows_wFactor = dd["qtyUnitAbbr"].str.contains("1|2|3|4|5|6|7|8|9|0").sum()
# rows_wFactor.sum()


18

In [75]:
list_files = os.listdir("../../data/exports")
# Take 1st CSV as reference and then attach the rest of files
df_exports = pd.read_csv(f"../../data/exports/{list_files[0]}")
df_exports["Product"] = classification_codes_inverted[df_exports.iloc[0]["cmdCode"]] # Create a product column

for file in list_files[1:]:
    if file == "exports_consolidated_file.csv":
        continue
    try:
        df_tmp = pd.read_csv(f"../../data/exports/{file}")
        # Worry about NANs entries in "qtyUnitAbbr" column. Remember to add unit to the rest of data for join process
        valid_rows = ~df_tmp["qtyUnitAbbr"].isna()
        df_tmp = df_tmp[valid_rows]
        # then, worry about find factors in the units like "2u" that means "two units".
        # Multiply the netweigth for that factor
        rows_wFactor = df_tmp["qtyUnitAbbr"].str.contains("1|2|3|4|5|6|7|8|9|0")
        if rows_wFactor.sum() > 0:
            raise ValueError("Check this file. It has factors in its units")
        
        df_tmp["Product"] = classification_codes_inverted[df_tmp.iloc[0]["cmdCode"]] # Create a product column
        df_exports = pd.concat([df_exports, df_tmp], ignore_index=True)        
    except Exception as e:
        print("=" * 100)
        print(f"[ERROR][UNION] An issue happened for file: {file} ::: {e}")

# Save consolidated dataframe into a CSV
df_exports.to_csv("../../data/exports/exports_consolidated_file.csv", index=False)

[ERROR][UNION] An issue happened for file: exports_2000_COBALT.csv ::: No columns to parse from file
[ERROR][UNION] An issue happened for file: exports_2000_LITHIUM-ION BATTERIES.csv ::: No columns to parse from file
[ERROR][UNION] An issue happened for file: exports_2001_COBALT.csv ::: No columns to parse from file
[ERROR][UNION] An issue happened for file: exports_2001_LITHIUM-ION BATTERIES.csv ::: No columns to parse from file
[ERROR][UNION] An issue happened for file: exports_2002_LITHIUM-ION BATTERIES.csv ::: No columns to parse from file
[ERROR][UNION] An issue happened for file: exports_2003_LITHIUM-ION BATTERIES.csv ::: No columns to parse from file
[ERROR][UNION] An issue happened for file: exports_2004_LITHIUM-ION BATTERIES.csv ::: No columns to parse from file
[ERROR][UNION] An issue happened for file: exports_2005_LITHIUM-ION BATTERIES.csv ::: No columns to parse from file
[ERROR][UNION] An issue happened for file: exports_2006_LITHIUM-ION BATTERIES.csv ::: No columns to pa