This is an auxiliary script to the main scripts calculating the consumption intensity for the products in each of the different Baskets of Products (BoPs) within the european averaged citizen consumption footprint by country project.

It contains five functions which perform a number of operations (e.g filtering names, checking repeted product codes, converting country codes from one system to another etc) that take place before the actual downloading the data from EUROSTAT (for FAO data a different set of functions is used). These functions are therefore never to be run alone but within other functions.

### Loading packages

In addition to Python 3.3 built-in packages, some other generalistic packages, such as *pandas*, and specific packages, such as [*eurostat*](https://pypi.org/project/eurostat/), have to be loaded too.

In [3]:
import os
import sys
import pandas as pd
import eurostat as estat
from collections import Counter



In [1]:
def ds_conversion(DS, col_name):  
    
    """
    Provides information on the inputs necessary for the downloading and processing of data from EUROSTATA and FAO, the corresponance between the naming systems
    of the FAO and EUROSTAT dataset PRODCOM (DS-066341) and COMEXT (DS-016890), the units conversion factors, the columns names etc.
    
    :param DS : data set number inf the case of EUROSTAT, "FAO" for FAO datasets.
    :type DS : str.
    :param col_name : reference to the information of interest. At the end of the function there is a dictionary with the correpondence 
    between col_names and the different dictionaries.
    :return : the information required.
    :rtype : list/dict.
    
    """
    # The correspondance between PRODCOM columns names and COMEXT and FAO columns names.
    
    #EDITED col_dict = {"DS-066341" : {"DECL" : "DECL", "PRCCODE" : "PRCCODE"}, "DS-016890" : {"DECL" : "PARTNER", "PRCCODE" : "PRODUCT"}, "FAO" : {"DECL" : "Area", "PRCCODE" : "Item Code", "INDICATORS" : "Element","EXPQNT" : "Export Quantity", "IMPQNT" : "Import Quantity", "EXPVAL" : "Export Value","IMPVAL" : "Import Value", "CONSQNT" : "Food supply quantity (tonnes)", "PRODQNT" : "Production"}              }
    
    col_dict = {"DS-066341" : {"DECL" : "DECL", "PRCCODE" : "PRCCODE"}, 
                "DS-016890" : {"DECL" : "REPORTER", "PRCCODE" : "PRODUCT"},
                "Comext_Bulk" : {"DECL" : "DECLARANT_ISO", "PRCCODE" : "PRODUCT_NC", "PARTNER" : "PARTNER_ISO"},
                 "FAO" : {"DECL" : "Area", "PRCCODE" : "Item Code", "INDICATORS" : "Element",
                          "EXPQNT" : "Export Quantity", "IMPQNT" : "Import Quantity", "EXPVAL" : "Export Value",
                         "IMPVAL" : "Import Value", "CONSQNT" : "Food supply quantity (tonnes)", "PRODQNT" : "Production"}
               }
   
    
    # Columns to be droped.
    
    cols_to_drop = {"DS-066341" : ["FREQ"],
                    "DS-016890" : ["FREQ"],
                    "Comext_Bulk" : ["SUPP_UNIT"], 
                    "FAO" : ["Area Code", "Element Code", "Year Code", "Flag", "Unit", "Item"]}
    
    # Filters to be used when downloading data from EUROSTAT data sets. Each of the filters entries will be
    # filled out with the corresponding information.
    
    filters_dict = {"DS-066341" : {"FREQ" : ['A',], "PRCCODE" : [], "DECL" : [], "INDICATORS" : ["EXPQNT", "EXPVAL", "IMPQNT", "IMPVAL", "PRODQNT", "PRODVAL"]},
                    "DS-016890" : {"FREQ" : ['A',], "PRODUCT" : [], "PARTNER" : [], "INDICATORS" : ["QUANTITY_IN_100KG", "VALUE_IN_EUROS"], "FLOW" : ["1", "2"], "REPORTER" : []},
                    "FAO" : {}}
    
    
    # EUROSTAT data sets countries dictionaries.
    
    #EDITED country_dict = {"DS-066341" : estat.get_sdmx_dic("DS-066341", "DECL"),
    #                "DS-016890" : {k: str.title(v) for k, v in estat.get_sdmx_dic("DS-016890", "PARTNER").items()},
    #                "FAO" : {}}
    country_dict = {"DS-066341" : estat.get_sdmx_dic("DS-066341", "DECL"),
                    "DS-016890" : {k: str.title(v) for k, v in estat.get_sdmx_dic("DS-016890", "REPORTER").items()},
                    "Comext_Bulk" : {k: str.title(v) for k, v in estat.get_sdmx_dic("DS-016890", "REPORTER").items()},
                    "FAO" : {}}
    
    # Correspondance between FAO, COMEXT and PRODCOM countries naming system.
    
    country_change = {"DS-066341" : {},
                      "DS-016890" : {"Belgium" : 'Belgium (And Luxbg -> 1998)', "Germany" : 'Germany (Incl Dd From 1991)',
                                     "Czechia" : 'Czechia (Cs->1992)', "Luxemburg" : 'Luxembourg', "EUROPEAN UNION (28)" : "Eu28_Intra"},
                      "Comext_Bulk" : {"Belgium" : 'Belgium (And Luxbg -> 1998)', "Germany" : 'Germany (Incl Dd From 1991)',
                                     "Czechia" : 'Czechia (Cs->1992)', "Luxemburg" : 'Luxembourg', "EUROPEAN UNION (28)" : "Eu28_Intra"},
                      "FAO" : {"Luxemburg" : "Luxembourg"}
                     }
    
    # List of the columns to be aggregated in each data set. I.e. if two rows share the values in the given columns 
    # the figures in the remaining ones shall be added up. 
        
    agg_dict = {"DS-066341" : [],
                "DS-016890" : ["DECL", "FLOW", "INDICATORS", "PRCCODE", "Year"],
                "Comext_Bulk" : ["DECL", "FLOW", "INDICATORS", "PRCCODE", "Year"],
                "FAO" : []}
    
    # In some cases, the information that in a data set is displayed in one column is divided into two columns 
    # in other data set. In those cases, this dictionary provides the information to create the required 
    # columns to match those in PRODCOM data frames. 
    
    combine_dict = {"DS-066341" : [],
                    "DS-016890" : [{"STD_COL" : "INDICATORS", "DS_COL" : ["FLOW", "INDICATORS"], 
                                   "RPL" : {"1_QUANTITY_IN_100KG" : "IMPQNT", 
                                            "1_VALUE_IN_EUROS" : "IMPVAL",
                                            "2_QUANTITY_IN_100KG" : "EXPQNT",
                                            "2_VALUE_IN_EUROS" : "EXPVAL"}}],
                    "Comext_Bulk" : [{"STD_COL" : "INDICATORS", "DS_COL" : ["FLOW", "INDICATORS"], 
                                   "RPL" : {"1_QUANTITY_IN_KG" : "IMPQNT", 
                                            "1_VALUE_IN_EUROS" : "IMPVAL",
                                            "2_QUANTITY_IN_KG" : "EXPQNT",
                                            "2_VALUE_IN_EUROS" : "EXPVAL"}}],
                    "FAO" : []}
    
    # Units conversion factors.
    
    units_dict = {"DS-066341" : {},
                  "DS-016890" : {"EXPQNT" : 100, "EXPVAL" : 1, "IMPQNT" : 100, "IMPVAL" : 1, "PRODQNT" : 100, "PRODVAL" : 1},
                  "Comext_Bulk" : {"EXPQNT" : 1, "EXPVAL" : 1, "IMPQNT" : 1, "IMPVAL" : 1, "PRODQNT" : 1, "PRODVAL" : 1},
                  "FAO" : {"CONSQNT" : 1000, "PRODQNT" : 1000, "IMPQNT" : 1000, "EXPQNT" : 1000, "EXPVAL" : 1, "IMPVAL" : 1},
                  "FAO_BS" : {"CONSQNT" : 1000000, "PRODQNT" : 1000000, "IMPQNT" : 1000000, "EXPQNT" : 1000000, "EXPVAL" : 1, "IMPVAL" : 1}}
                  
    
                  
    dict_names = {"FILTERS" : filters_dict, "COUNTRY" : country_dict, "COUNTRY_CHANGE" : country_change, "DROP_COLS" : cols_to_drop,
                   "AGGREGATE" : agg_dict, "COMBINE" : combine_dict, "UNIT" : units_dict, "COLNAME" :col_dict}

    
    if col_name in list(dict_names.keys()):
        
        return(dict_names[col_name][DS])
    
    else:
        
        return(col_dict[DS][col_name])

In [11]:
def check_repeated(x):
    
    """Look for repeated elements in a list.
    
    :param x : the list to look for repeated elements in.
    :type x : list
    :return : a list with the repeated elements.
    :rtype : list

    """
    
    ct = Counter(x)

    repeated = [elm for elm in ct.keys() if ct[elm] != 1]
    
    if repeated != []:
        
        print("These elements are repeated: ",repeated)
        
    return(repeated)
    

In [12]:
def check_belong(look_for_list, look_in_list):
    
    """Check whether the elements of a list are present in another list.
    
    :param look_for_list : the list of elements to be checked.
    :type look_for_list : list
    :param look_in_list : the list of elements where the elements of "look_for_list" will be looked for.
    :type look_in_list : list
    :return : a list with the elements present in "look_for_list" but not in "look_in_list".
    :rtype : list
    
    """
    
    elm_in_list = [elm for elm in look_for_list if elm in look_in_list]       
    
    elm_not_in_list = [elm for elm in look_for_list if elm not in elm_in_list]
    
    if elm_not_in_list != []:
        
        print("These elements do not belong to the DS: ",elm_not_in_list)
    
    return(elm_not_in_list)

In [13]:
def check_dataset(x, y):
    
    """Check whether the elements of a list are repeated and whether they are present in another list.
    
    :param x : elements to be checked.
    :type x : list.
    :param y : elements where the elements of x will be looked for.
    :type y : list
    :return : repeated elements in x and elements not present in y.
    
    If there are either repeated elements or elements not present in y, the function will exit.
    
    """
        
    ch_rep = check_repeated(x)
        
    ch_bel = check_belong(x,y)
    
    check_result = {"Repeated":[],"NotInY":[]}
    

    if ch_rep and ch_bel:

        check_result["Repeated"] = ch_rep
        
        check_result["NotInY"] = ch_bel
        
        
    return(check_result)

In [7]:
def ccode(ccode_list, DS):
    
    """Translate country names into country codes and the other way around.
    
    :param ccode_list : list of countries or country codes.
    :type ccode_list : list.
    :return : a list of countries or country codes, depending on the input.
    :rtype : list.
    
    """
    
    country_dict = ds_conversion(DS, "COUNTRY")
    
    country_dict_inv = {v: k for k, v in country_dict.items()}
    
    
    if any(elem in country_dict.keys() for elem in ccode_list):
        
        result_list = [country_dict.get(obj) for obj in ccode_list]
        
        
    else:
        
        result_list = [country_dict_inv.get(obj) for obj in ccode_list]

   
    
    return(result_list)