# BOExplorer
### Sumary fetch and diary entries extraction

#### 1. Imports

In [1]:
import inspect
import functools
import datetime

from helpers import boe
from helpers import helpers
from helpers import boe_diary_processing as diary

#### 2. Helper functions

In [2]:
db_host = 'localhost'
db_database = 'boe'
db_user = 'root'
db_password = 'pass'

def with_db_settings(f):
    def f(*args, **kwargs):
        keyword_args = dict(kwargs)
        keyword_args.update({
            'host': db_host,
            'database': db_database,
            'user': db_user,
            'password': db_password
        })
        f(*args, keyword_args)
    return f

def get_entries_for_date(date:datetime.datetime):
    items = helpers.pipe(
        date,
        boe.summary_url_for_date,
        helpers.fetch_page,
        helpers.tree_from_response,
        diary.get_sections,
        diary.get_departments_per_section,
        diary.get_items_per_department,
        diary.get_details_per_item,
        tuple
    )

    return items

#### 3. Imported processing functions

! All processing functions in the `helpers/boe_diary_processing.py` module start with 'get'.

In [3]:
helpers.pipe(
    dir(diary),
    functools.partial(filter, lambda x: x.startswith('get_')),
    helpers.partialmap(lambda x: diary.__dict__[x]),
    helpers.partialmap(inspect.getsource),
    '\n'.join,
    print
)

def get_departments_per_section(sections) -> Generator:
    departments = (
        (section_number, department.get(boe.SummaryAttribute.department_name), department)
        for (section_number, section)
        in sections
        for department
        in helpers.use_tree_for_search(section)(boe.SummaryXpath.department))
    
    return departments

def get_details_per_item(items) -> Generator:
    details = (get_item_details(*item) for item in items)
    
    return details

def get_item_details(section_number:str, department_name:str, node) -> Dict:
    search_details = helpers.use_tree_for_search(node)
    title_node = search_details(boe.SummaryXpath.item_title)[0]
    pdf_url_node = search_details(boe.SummaryXpath.item_pdf_url)[0]
    xml_url_node = search_details(boe.SummaryXpath.item_xml_url)[0]
    htm_url_node = search_details(boe.SummaryXpath.item_htm_url)[0]
    
    parent = node.getparent()
    is_epigraph = parent.tag.lower() == 'epigrafe'
    epigraph = parent = parent

#### 4. Execution

In [4]:
date = datetime.datetime(2021, 1, 30)
entries = get_entries_for_date(date)
entries[0]

{'id': 'BOE-A-2021-1345',
 'epigraph': 'Tabaco. Precios',
 'section': '1',
 'department': 'MINISTERIO DE HACIENDA',
 'title': 'Resolución de 29 de enero de 2021, de la Presidencia del Comisionado para el Mercado de Tabacos, por la que se publican los precios de venta al público de determinadas labores de tabaco en Expendedurías de Tabaco y Timbre del área de Península y Baleares.',
 'pdf_url': '/boe/dias/2021/01/30/pdfs/BOE-A-2021-1345.pdf',
 'xml_url': '/diario_boe/xml.php?id=BOE-A-2021-1345',
 'htm_url': '/diario_boe/txt.php?id=BOE-A-2021-1345'}