In [None]:
from nbdev import *

%nbdev_default_export retrieval

Cells will be exported to istatapi.retrieval,
unless a different module is specified after an export flag: `%nbdev_export special.module`


In [None]:
%nbdev_hide
%load_ext autoreload
%autoreload 2

In [None]:
%nbdev_export
from istatapi.discovery import DataSet
from istatapi.base import ISTAT
import pandas as pd
import io

# Retrieval

> Functions used to retrieve data from ISTAT datasets.

In [None]:
RESOURCE = "data"
FILTERS = {
    "FREQ": "A",
    "CITTADINANZA": ".TOTAL",
    "DURATA_DISOCCUPAZ": ".TOTAL",
    "CLASSE_ETA": ".Y15-24+Y25-34+Y35-44+Y45-54+Y55-64",
    "ITTER107": ".IT",
    "SESSO": ".9",
    "TIPO_DATO": ".",
    "TITOLO_STUDIO": ".99",
}
# TODO: accept json response as well (?)


def get_data(dataset: DataSet, filters: dict, **kwargs):
    # TODO: make filters easier
    flowRef = dataset.identifiers["df_id"]
    key = make_url_key(filters)
    print(key)
    path_parts = [RESOURCE, flowRef, key]
    path = "/".join(path_parts)
    request = ISTAT()
    response = request._request(path, headers={"Accept": "text/csv"})
    df = pd.read_csv(io.StringIO(response.text))

    return df


def set_filters(dataset: DataSet, **kwargs):
    filters = {}

    # TODO: set values to 'TOTAL' where possible
    # no filter equals all values (not recommended)
    for dimension in dataset.dimensions:
        filters[dimension] = "."
        filters = set_default_values(dataset, filters)
    # add kwargs in case passed
    for arg, arg_value in kwargs.items():
        filters[arg.upper()] = arg_value
    print(filters)
    return filters


def make_url_key(filters: dict):
    key = ""
    for i, filter_tuple in enumerate(filters.items()):

        filter = filter_tuple[0]
        filter_value = filter_tuple[1]

        # add a + and convert to str
        if type(filter_value) == list:
            filter_value = "+".join(filter_value)
        # add a point
        if i != 0:
            if filter_value != ".":
                filter_value = (
                    "." + filter_value
                )  # in this case all filters are selected
                print(filter_value)

        key += filter_value

    return key

def set_default_values(dataset, filters)

In [None]:
freq = "A"
cittadinanza = "TOTAL"
durata_disoccupaz = "TOTAL"
classe_eta = ["Y15-24", "Y25-34", "Y35-44", "Y45-54", "Y55-64"]
itter107 = "IT"
sesso = "9"
titolo_studio = "99"

dataset = DataSet(dataflow_identifier="151_914")
filters = set_filters(
    dataset,
    freq=freq,
    cittadinanza=cittadinanza,
    durata_disoccupaz=durata_disoccupaz,
    classe_eta=classe_eta,
    itter107=itter107,
    sesso=sesso,
    titolo_studio=titolo_studio,
)
unempl_df = get_data(dataset, filters=filters)
unempl_df.columns

{'FREQ': 'A', 'CITTADINANZA': 'TOTAL', 'DURATA_DISOCCUPAZ': 'TOTAL', 'CLASSE_ETA': ['Y15-24', 'Y25-34', 'Y35-44', 'Y45-54', 'Y55-64'], 'ITTER107': 'IT', 'SESSO': '9', 'TIPO_DATO': '.', 'TITOLO_STUDIO': '99'}
.TOTAL
.TOTAL
.Y15-24+Y25-34+Y35-44+Y45-54+Y55-64
.IT
.9
.99
A.TOTAL.TOTAL.Y15-24+Y25-34+Y35-44+Y45-54+Y55-64.IT.9..99


Index(['DATAFLOW', 'FREQ', 'CITTADINANZA', 'DURATA_DISOCCUPAZ', 'CLASSE_ETA',
       'ITTER107', 'SESSO', 'TIPO_DATO', 'TITOLO_STUDIO', 'TIME_PERIOD',
       'OBS_VALUE', 'BREAK', 'CONF_STATUS', 'OBS_PRE_BREAK', 'OBS_STATUS',
       'BASE_PER', 'UNIT_MEAS', 'UNIT_MULT', 'METADATA_EN', 'METADATA_IT'],
      dtype='object')