Skip to content

Commit

Permalink
acto_to_attr + test utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
PabloCastellano committed Apr 21, 2016
1 parent e94489c commit b149256
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 3 deletions.
1 change: 0 additions & 1 deletion bormeparser/acto.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

#from .utils import remove_accents
import six


Expand Down
2 changes: 1 addition & 1 deletion bormeparser/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def parse_capital(amount):
date = datetime.datetime.strptime(date, '%d.%m.%y').date()
date = date.isoformat()
except ValueError:
print('ERROR CON DATE: {0}'.format(date))
print('ERROR CON Comienzo de operaciones: {0}'.format(date))

duration = re.search('Duración: (.*?){0}'.format(all_or_ng), data)
if duration:
Expand Down
47 changes: 47 additions & 0 deletions bormeparser/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# test_utils.py -
# Copyright (C) 2016 Pablo Castellano <pablo@anche.no>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import datetime
import unittest

from bormeparser.utils import get_borme_website
from bormeparser.utils import acto_to_attr
from bormeparser.seccion import SECCION


class BormeparserUtilsTestCase(unittest.TestCase):

def test_get_borme_website(self):
date = datetime.date(2016, 4, 21)
url = get_borme_website(date, SECCION.A)
self.assertEqual(url, 'https://www.boe.es/borme/dias/2016/04/21/index.php?s=A')
url = get_borme_website(date, SECCION.C)
self.assertEqual(url, 'https://www.boe.es/borme/dias/2016/04/21/index.php?s=C')

def test_acto_to_attr(self):
attr1 = acto_to_attr('Nombramientos')
attr2 = acto_to_attr('Ceses/Dimisiones')
attr3 = acto_to_attr('Fusión por absorción')
self.assertEqual(attr1, 'nombramientos')
self.assertEqual(attr2, 'ceses_dimisiones')
self.assertEqual(attr3, 'fusion_absorcion')


if __name__ == '__main__':
unittest.main()
11 changes: 10 additions & 1 deletion bormeparser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unicodedata

import re
import datetime
import unicodedata

from .seccion import SECCION

Expand All @@ -44,3 +46,10 @@ def remove_accents(string):
return ''.join((c for c in unicodedata.normalize('NFKD', string) if unicodedata.category(c) != 'Mn'))
except TypeError:
return ''.join((c for c in unicodedata.normalize('NFKD', unicode(string, 'utf-8')) if unicodedata.category(c) != 'Mn'))


def acto_to_attr(acto):
attr = remove_accents(acto).replace(' del ', ' ').replace(' por ', ' ').replace(' de ', ' ')
attr = attr.replace(' ', '_').replace('/', '_').replace('.', '_').lower()
attr = re.sub('[^A-Za-z_]+', '', attr)
return attr.rstrip('_')

0 comments on commit b149256

Please sign in to comment.