Skip to content

Commit

Permalink
borme_json_all: crea los json en un directorio asociado a la revision…
Browse files Browse the repository at this point in the history
… git de bormeparser
  • Loading branch information
PabloCastellano committed Dec 15, 2016
1 parent 9a80ce2 commit 7046007
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
17 changes: 13 additions & 4 deletions scripts/borme_json_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@

import bormeparser
import bormeparser.borme
from common import DEFAULT_BORME_ROOT
from common import DEFAULT_BORME_ROOT, get_git_revision_short_hash

from bormeparser.backends.defaults import OPTIONS
OPTIONS['SANITIZE_COMPANY_NAME'] = True

import argparse
import os
import sys
import time

from threading import Thread
Expand All @@ -51,9 +52,10 @@ def run(self):
self.queue.task_done()


def walk_borme_root(bormes_root):
def walk_borme_root(bormes_root, json_root=None):
pdf_root = os.path.join(bormes_root, 'pdf')
json_root = os.path.join(bormes_root, 'json')
if json_root is None:
json_root = os.path.join(bormes_root, 'json')

_, year_dirs, _ = next(os.walk(pdf_root))
for year in year_dirs:
Expand All @@ -74,6 +76,7 @@ def walk_borme_root(bormes_root):
for filename in files:
yield day_dir, json_day_dir, filename


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert all BORME PDF files to JSON.')
parser.add_argument('-d', '--directory', default=DEFAULT_BORME_ROOT, help='Directory to download files (default is {})'.format(DEFAULT_BORME_ROOT))
Expand All @@ -88,7 +91,13 @@ def walk_borme_root(bormes_root):
t.setDaemon(True)
t.start()

for day_dir, json_day_dir, filename in walk_borme_root(bormes_root):
json_folder = 'json_' + get_git_revision_short_hash()
json_root = os.path.join(bormes_root, json_folder)
if os.path.exists(json_root):
print('{} already exists'.format(json_root))
sys.exit(1)

for day_dir, json_day_dir, filename in walk_borme_root(bormes_root, json_root):
if not filename.endswith('.pdf') or filename.endswith('-99.pdf'):
continue
pdf_path = os.path.join(day_dir, filename)
Expand Down
13 changes: 13 additions & 0 deletions scripts/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import os
import subprocess


DEFAULT_BORME_ROOT = '~/.bormes'


def get_borme_xml_filepath(date, directory):
year = str(date.year)
month = '{:02d}'.format(date.month)
Expand All @@ -34,3 +37,13 @@ def get_borme_pdf_path(date, directory):
month = '{:02d}'.format(date.month)
day = '{:02d}'.format(date.day)
return os.path.join(os.path.expanduser(directory), 'pdf', year, month, day)


def get_git_revision_short_hash():
try:
version = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip()
if isinstance(version, bytes):
version = version.decode('unicode_escape')
except subprocess.CalledProcessError:
version = 'Unknown'
return version

0 comments on commit 7046007

Please sign in to comment.