Skip to content

Commit

Permalink
Merge pull request #10 from PabloCastellano/config
Browse files Browse the repository at this point in the history
Config
  • Loading branch information
PabloCastellano committed Mar 11, 2017
2 parents 1212777 + 9f6aed3 commit 0585850
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 32 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ Version 0.3.0 [unreleased]
- Parser thread-safe (issue #7)
- Nombres de actos repetidos en el mismo anuncio (issue #3)
- Usar requests en lugar de urllib
- Archivo de configuración ~/.bormecfg
- Mejoras en el parser
- Añadidos 4 nuevos actos y 41 cargos directivos
- Borme.to_json ahora permite especificar un path (archivo o directorio) en lugar de solo archivo
- Borme._set_url evita conexión a Internet si existe BORME-XML
- Sociedades y registros tienen su propio módulo
- Funciones de limpieza de datos en bormeparser.clean
- Incluye nombre del R.M. en BORME-JSON
Expand Down
1 change: 1 addition & 0 deletions bormeparser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .acto import ACTO
from .borme import Borme, BormeXML
from .cargo import CARGO
from .config import CONFIG
from .download import download_xml, download_pdfs, download_pdf
from .download import get_url_pdf, get_url_pdfs, get_url_xml
from .emisor import EMISOR
Expand Down
12 changes: 9 additions & 3 deletions bormeparser/borme.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,19 @@
from .acto import ACTO
#from .download import download_pdf
from .download import get_url_pdf, URL_BASE, get_url_xml, download_url, download_urls_multi, USE_HTTPS
from .download import download_urls_multi_names
from .download import download_urls_multi_names, get_url_pdf_from_xml
#from .exceptions import BormeInvalidActoException
from .exceptions import BormeAlreadyDownloadedException, BormeAnuncioNotFound, BormeDoesntExistException
from .regex import is_acto_cargo, is_acto_noarg
#from .parser import parse as parse_borme
from .seccion import SECCION
from .provincia import Provincia, PROVINCIA
from .utils import get_borme_xml_filepath

import datetime
import logging
import json
import os
import os.path
import re
import requests
import six
Expand Down Expand Up @@ -425,7 +427,11 @@ def _set_anuncios(self, anuncios):
self.anuncios_rango = (min(self.anuncios.keys()), max(self.anuncios.keys()))

def _set_url(self):
self._url = get_url_pdf(self.date, self.seccion, self.provincia)
xml_path = get_borme_xml_filepath(self.date)
if os.path.isfile(xml_path):
self._url = get_url_pdf_from_xml(self.date, self.seccion, self.provincia, xml_path)
else:
self._url = get_url_pdf(self.date, self.seccion, self.provincia)

@property
def url(self):
Expand Down
40 changes: 40 additions & 0 deletions bormeparser/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# bormeparser.config.py -
# Copyright (C) 2017 Pablo Castellano <pablo@anche.no>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import os.path

try:
# Python 3
import configparser
config = configparser.ConfigParser()
except ImportError:
import ConfigParser
config = ConfigParser.ConfigParser()

CONFIG_FILE = os.path.expanduser("~/.bormecfg")
DEFAULTS = {
'borme_root': os.path.expanduser("~/.bormes")
}

if os.path.isfile(CONFIG_FILE):
config.read(CONFIG_FILE)
CONFIG = dict(config["general"])
else:
CONFIG = DEFAULTS

21 changes: 18 additions & 3 deletions bormeparser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


import os
import re
import datetime
import unicodedata

from .seccion import SECCION
from .config import CONFIG

FIRST_BORME = {2009: datetime.date(2009, 1, 2),
2010: datetime.date(2010, 1, 4),
Expand Down Expand Up @@ -52,4 +52,19 @@ def acto_to_attr(acto):
attr = remove_accents(acto).replace(' del ', ' ').replace(' por ', ' ').replace(' de ', ' ')
attr = attr.replace(' ', '_').replace('/', '_').replace('.', '_').lower()
attr = re.sub('[^A-Za-z_]+', '', attr)
return attr.rstrip('_')
return attr.rstrip('_')


def get_borme_xml_filepath(date, directory=CONFIG["borme_root"]):
year = str(date.year)
month = '{:02d}'.format(date.month)
day = '{:02d}'.format(date.day)
filename = 'BORME-S-{}{}{}.xml'.format(year, month, day)
return os.path.join(os.path.expanduser(directory), 'xml', year, month, filename)


def get_borme_pdf_path(date, directory=CONFIG["borme_root"]):
year = str(date.year)
month = '{:02d}'.format(date.month)
day = '{:02d}'.format(date.day)
return os.path.join(os.path.expanduser(directory), 'pdf', year, month, day)
5 changes: 3 additions & 2 deletions scripts/borme_json_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import bormeparser
import bormeparser.borme
from common import DEFAULT_BORME_ROOT, get_git_revision_short_hash
from common import get_git_revision_short_hash

from bormeparser.backends.defaults import OPTIONS
OPTIONS['SANITIZE_COMPANY_NAME'] = True
Expand All @@ -31,6 +31,7 @@
from threading import Thread
from queue import Queue

BORME_ROOT = bormeparser.CONFIG["borme_root"]
THREADS = 6


Expand Down Expand Up @@ -79,7 +80,7 @@ def walk_borme_root(bormes_root, json_root=None):

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert all BORME PDF files to JSON.')
parser.add_argument('-d', '--directory', default=DEFAULT_BORME_ROOT, help='Directory to download files (default is {})'.format(DEFAULT_BORME_ROOT))
parser.add_argument('-d', '--directory', default=BORME_ROOT, help='Directory to download files (default is {})'.format(BORME_ROOT))
args = parser.parse_args()

bormes_root = os.path.expanduser(args.directory)
Expand Down
7 changes: 4 additions & 3 deletions scripts/check_bormes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@
import bormeparser
from bormeparser.exceptions import BormeDoesntExistException
from bormeparser.borme import BormeXML
from bormeparser.utils import FIRST_BORME
from common import DEFAULT_BORME_ROOT, get_borme_xml_filepath, get_borme_pdf_path
from bormeparser.utils import FIRST_BORME, get_borme_xml_filepath, get_borme_pdf_path

import argparse
import datetime
import logging
import os

BORME_ROOT = bormeparser.CONFIG["borme_root"]

logger = logging.getLogger('check_bormes')
ch = logging.StreamHandler()
logger.addHandler(ch)
Expand Down Expand Up @@ -98,7 +99,7 @@ def check_range(begin, end, provincia, seccion, directory, download_xml):
parser = argparse.ArgumentParser(description='Check BORME files are present and not corrupt.')
parser.add_argument('-f', '--fromdate', default='init', help='ISO formatted date (ex. 2015-01-01). Default: init')
parser.add_argument('-t', '--to', default='today', help='ISO formatted date (ex. 2016-01-01). Default: today')
parser.add_argument('-d', '--directory', default=DEFAULT_BORME_ROOT, help='Directory to download files (default is {})'.format(DEFAULT_BORME_ROOT))
parser.add_argument('-d', '--directory', default=BORME_ROOT, help='Directory to download files (default is {})'.format(BORME_ROOT))
parser.add_argument('-s', '--seccion', default=bormeparser.SECCION.A, choices=['A', 'B', 'C'], help='BORME seccion')
parser.add_argument('-p', '--provincia', choices=bormeparser.provincia.ALL_PROVINCIAS, help='BORME provincia')
parser.add_argument('-x', '--download-xml', action='store_true', default=False, help='Download missing XML BORME files')
Expand Down
18 changes: 0 additions & 18 deletions scripts/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,6 @@
import subprocess


DEFAULT_BORME_ROOT = '~/.bormes'


def get_borme_xml_filepath(date, directory):
year = str(date.year)
month = '{:02d}'.format(date.month)
day = '{:02d}'.format(date.day)
filename = 'BORME-S-{}{}{}.xml'.format(year, month, day)
return os.path.join(os.path.expanduser(directory), 'xml', year, month, filename)


def get_borme_pdf_path(date, directory):
year = str(date.year)
month = '{:02d}'.format(date.month)
day = '{:02d}'.format(date.day)
return os.path.join(os.path.expanduser(directory), 'pdf', year, month, day)


def get_git_revision_short_hash():
try:
version = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip()
Expand Down
7 changes: 4 additions & 3 deletions scripts/download_borme_pdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@
import bormeparser
from bormeparser.exceptions import BormeDoesntExistException
from bormeparser.borme import BormeXML
from bormeparser.utils import FIRST_BORME
from common import DEFAULT_BORME_ROOT, get_borme_xml_filepath, get_borme_pdf_path
from bormeparser.utils import FIRST_BORME, get_borme_xml_filepath, get_borme_pdf_path

import argparse
import datetime
import logging
import os

BORME_ROOT = bormeparser.CONFIG["borme_root"]

logger = logging.getLogger(__name__)
ch = logging.StreamHandler()
logger.addHandler(ch)
Expand Down Expand Up @@ -83,7 +84,7 @@ def download_range(begin, end, directory, seccion, provincia=None):
parser = argparse.ArgumentParser(description='Download BORME PDF files.')
parser.add_argument('-f', '--fromdate', default='today', help='ISO formatted date (ex. 2015-01-01) or "init". Default: today')
parser.add_argument('-t', '--to', default='today', help='ISO formatted date (ex. 2016-01-01). Default: today')
parser.add_argument('-d', '--directory', default=DEFAULT_BORME_ROOT, help='Directory to download files (default is {})'.format(DEFAULT_BORME_ROOT))
parser.add_argument('-d', '--directory', default=BORME_ROOT, help='Directory to download files (default is {})'.format(BORME_ROOT))
parser.add_argument('-s', '--seccion', default=bormeparser.SECCION.A, choices=['A', 'B', 'C'], help='BORME seccion')
parser.add_argument('-p', '--provincia', choices=bormeparser.provincia.ALL_PROVINCIAS, help='BORME provincia')
parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose mode')
Expand Down

0 comments on commit 0585850

Please sign in to comment.