Skip to content

Commit

Permalink
Parse /Codigo_verificacion (cve)
Browse files Browse the repository at this point in the history
  • Loading branch information
PabloCastellano committed Jun 29, 2015
1 parent 2c6b150 commit 81bcab0
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 5 deletions.
2 changes: 1 addition & 1 deletion bormeparser/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def parse(self):

fecha = regex_fecha(actos['borme_fecha'])
# FIXME: provincia, seccion objects
return Borme(fecha, actos['borme_seccion'], actos['borme_provincia'], actos['borme_num'], bormeactos)
return Borme(fecha, actos['borme_seccion'], actos['borme_provincia'], actos['borme_num'], actos['borme_cve'], bormeactos)

def _parse(self):
"""
Expand Down
16 changes: 13 additions & 3 deletions bormeparser/backends/pypdf2/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
import logging
from PyPDF2 import PdfFileReader

from bormeparser.regex import regex_cargos, REGEX_EMPRESA, REGEX_TEXT, REGEX_BORME_NUM
from bormeparser.regex import regex_cargos, REGEX_EMPRESA, REGEX_TEXT, REGEX_BORME_NUM, REGEX_BORME_CVE
from bormeparser.acto import ACTO

logger = logging.getLogger(__name__)
#logger.setLevel(logging.DEBUG)
logger.setLevel(logging.WARN)

DATA = {'borme_fecha': None, 'borme_num': None, 'borme_seccion': None, 'borme_provincia': None}
DATA = {'borme_fecha': None, 'borme_num': None, 'borme_seccion': None, 'borme_provincia': None, 'borme_cve': None}


def clean_data(data):
Expand All @@ -30,6 +30,7 @@ def parse_content(content):
numero = False
seccion = False
provincia = False
cve = False

# Python 3
if isinstance(content, bytes):
Expand Down Expand Up @@ -68,6 +69,11 @@ def parse_content(content):
provincia = True
continue

if line.startswith('/Codigo_verificacion'):
if not DATA['borme_cve']:
cve = True
continue

if line == 'BT':
# Begin text object
continue
Expand All @@ -87,7 +93,7 @@ def parse_content(content):
DATA[acto_id] = {'Empresa': empresa, 'Actos': actos}
continue

if not any([texto, cabecera, fecha, numero, seccion, provincia]):
if not any([texto, cabecera, fecha, numero, seccion, provincia, cve]):
continue

if line == '/F1 8 Tf':
Expand Down Expand Up @@ -131,6 +137,10 @@ def parse_content(content):
if provincia:
DATA['borme_provincia'] = m.group(1)
provincia = False
if cve:
text = m.group(1)
DATA['borme_cve'] = REGEX_BORME_CVE.match(text).group(1)
cve = False
logger.debug(m.group(1))
data += ' ' + m.group(1)

Expand Down
3 changes: 2 additions & 1 deletion bormeparser/borme.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,14 @@ class BormeXML(object):
# TODO: Create instance directly from filename
class Borme(object):

def __init__(self, date, seccion, provincia, num, actos=None, url=None, filename=None):
def __init__(self, date, seccion, provincia, num, cve, actos=None, url=None, filename=None):
if isinstance(date, tuple):
date = datetime.date(year=date[0], month=date[1], day=date[2])
self.date = date
self.seccion = seccion
self.provincia = provincia
self.num = num
self.cve = cve
self.url = url
self.filename = filename
self._parsed = False
Expand Down
2 changes: 2 additions & 0 deletions bormeparser/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
REGEX_TEXT = re.compile('^\((.*)\)Tj$')
REGEX_BORME_NUM = re.compile(u'^Núm\. (\d+)', re.UNICODE)
REGEX_BORME_FECHA = re.compile('^\w+ (\d+) de (\w+) de (\d+)')
REGEX_BORME_CVE = re.compile('^cve: (.*)$')


MESES = {'enero': 1, 'febrero': 2, 'marzo': 3, 'abril': 4, 'mayo': 5, 'junio': 6, 'julio': 7,
'agosto': 8, 'septiembre': 9, 'octubre': 10, 'noviembre': 11, 'diciembre': 12}
Expand Down
1 change: 1 addition & 0 deletions docs/backends.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Para implementar un nuevo backend, es necesario crear un nuevo paquete en el dir
214017: {'Actos': {'Datos registrales': 'T 2226, L 1139, F 102, S 8, H MA 33737, I/A 6 (25.05.15).',
'Modificaciones estatutarias': '8. Administración y Representacion.-.'},
'Empresa': 'EMPRESA ALEATORIA SL.'},
'borme_cve': 'BORME-A-2015-102-29',
'borme_fecha': 'Martes 2 de junio de 2015',
'borme_num': 102,
'borme_provincia': 'MÁLAGA',
Expand Down
2 changes: 2 additions & 0 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ De la instancia BORME puedes obtener información básica como la fecha, la secc

.. code-block:: python
>>> borme.cve
'BORME-A-2015-102-29'
>>> borme.num
102
>>> borme.info
Expand Down

0 comments on commit 81bcab0

Please sign in to comment.