Skip to content

Commit

Permalink
Use regex instead of datetime.strftime
Browse files Browse the repository at this point in the history
  • Loading branch information
PabloCastellano committed Jun 28, 2015
1 parent 7a85ff5 commit 54af238
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 10 deletions.
10 changes: 2 additions & 8 deletions bormeparser/backends/base.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import datetime
import locale
import os

from bormeparser.borme import Borme, BormeActo

# TODO: What if the system hasn't generated this locale?
# locale -a
locale.setlocale(locale.LC_TIME, 'es_ES.utf8')
from bormeparser.regex import regex_fecha


class BormeParserBackend(object):
Expand All @@ -30,8 +25,7 @@ def parse(self):
a = BormeActo(id_acto, data['Empresa'], data['Actos'])
bormeactos.append(a)

fecha = datetime.datetime.strptime(actos['borme_fecha'], '%A %d de %B de %Y')
fecha = datetime.date(fecha.year, fecha.month, fecha.day)
fecha = regex_fecha(actos['borme_fecha'])
# FIXME: provincia, seccion objects
return Borme(fecha, actos['borme_seccion'], actos['borme_provincia'], actos['borme_num'], bormeactos)

Expand Down
16 changes: 16 additions & 0 deletions bormeparser/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
REGEX_EMPRESA = re.compile('^(\d+)\s+-\s+(.*)$')
REGEX_TEXT = re.compile('^\((.*)\)Tj$')
REGEX_BORME_NUM = re.compile('^Núm\. (\d+)')
REGEX_BORME_FECHA = re.compile('^\w+ (\d+) de (\w+) de (\d+)')

MESES = {'enero': 1, 'febrero': 2, 'marzo': 3, 'abril': 4, 'mayo': 5, 'junio': 6, 'julio': 7,
'agosto': 8, 'septiembre': 9, 'octubre': 10, 'noviembre': 11, 'diciembre': 12}

def regex_cargos(data):
"""
Expand All @@ -57,3 +61,15 @@ def regex_cargos(data):
for cargo in re.findall(RE_CARGOS_MATCH, data, re.UNICODE):
cargos.append((cargo[0], set(cargo[1].split(';'))))
return cargos

# This is a way not to use datetime.strftime, which requires es_ES.utf8 locale generated.
def regex_fecha(data):
"""
Martes 2 de junio de 2015
>>> REGEX_BORME_FECHA.match(dd).groups()
('2', 'junio', '2015')
"""

day, month, year = re.match('\w+ (\d+) de (\w+) de (\d+)', data).groups()
return (int(year), MESES[month], int(day))
2 changes: 0 additions & 2 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,6 @@ Para analizar un acto mercantil en concreto, podemos obtenerlo de la instancia B
>>> acto = borme.get_acto(223988)
>>> acto.get_datos_registrales()
'T 5367, L 4274, F 64, S 8, H MA126720, I/A 2 (22.05.15).'
>>> acto.get_actos()
{'Ceses/Dimisiones': [('Adm. Unico', {'MARTINEZ MORALES IVAN KARIM'})], 'Nombramientos': [('Adm. Unico', {'NIKOLAEKO MARIA'})], 'Cambio de domicilio social': 'URB PUEBLO MARINERO DE RIBERA S/N 9C - EDF. DE LA (MARBELLA).'}
>>> import pprint
>>> actos = acto.get_actos()
>>> pprint.pprint(actos)
Expand Down

0 comments on commit 54af238

Please sign in to comment.