Skip to content

Commit

Permalink
Mejora en la expresion para capturar cargos
Browse files Browse the repository at this point in the history
Fallaba en BORME-A-2013-227-15, anuncio nº 505781, ya que falta el punto final.
  • Loading branch information
PabloCastellano committed Oct 9, 2016
1 parent 6848693 commit d626e0c
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions bormeparser/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@
# -- CARGOS --
# OR de las palabras clave
RE_CARGOS_KEYWORDS = '(%s)' % '|'.join(esc_cargos_keywords)
RE_CARGOS_KEYWORDS2 = '(?=%s|$)' % '|'.join(esc_cargos_keywords)
# RE para capturar el cargo y los nombres
RE_CARGOS_MATCH = RE_CARGOS_KEYWORDS + ":\s(.*?)(?:\.$|\. |\s*$)"
RE_CARGOS_MATCH = RE_CARGOS_KEYWORDS + ': (.*?)' + RE_CARGOS_KEYWORDS2

REGEX_NOARG = re.compile(RE_NOARG_KEYWORDS + '\.\s*(.*)', re.UNICODE)
REGEX_ARGCOLON = re.compile(RE_COLON_KEYWORDS + ': (.*?)(?:\.\s+)(.*)', re.UNICODE)
Expand Down Expand Up @@ -182,8 +183,7 @@ def regex_cargos(data, sanitize=True):
for cargo in re.findall(RE_CARGOS_MATCH, data, re.UNICODE):
entidades = set()
for e in cargo[1].split(';'):
e = e.rstrip('.')
e = e.strip()
e = e.strip(" .")
if sanitize:
e = clean_empresa(e)
entidades.add(e)
Expand Down

0 comments on commit d626e0c

Please sign in to comment.