Skip to content

Commit

Permalink
create_scraper update ingest/__init__.py (Recidiviz/recidiviz-data#889)
Browse files Browse the repository at this point in the history
GitOrigin-RevId: 435b96fb230525cfa6930ff9ae1ec38854149b88
  • Loading branch information
jamwalla authored and Helper committed Apr 5, 2022
1 parent 87d337e commit 3596367
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 12 deletions.
53 changes: 45 additions & 8 deletions recidiviz/tools/create_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
from typing import Optional
import us

import recidiviz
import recidiviz.ingest
import recidiviz.ingest.scrape.regions
import recidiviz.tests.ingest.scrape.regions


def populate_file(template_path, target_path, subs):
Expand All @@ -48,7 +50,6 @@ def populate_file(template_path, target_path, subs):
with open(target_path, 'w') as target:
target.write(contents)


def create_scraper_files(subs, vendor: Optional[str]):
"""Creates __init__.py, region_name_scraper.py, and region_name.yaml files
in recidiviz/ingest/scrape/regions/region_name
Expand All @@ -62,8 +63,12 @@ def create_yaml(template):
target = os.path.join(target_dir, subs['region'] + '.yaml')
populate_file(template, target, subs)

regions_dir = os.path.join(os.path.dirname(recidiviz.__file__),
'ingest/scrape/regions')
ingest_init_file = recidiviz.ingest.__file__
region_import_statement = 'import {}.{}'.format(
recidiviz.ingest.scrape.regions.__name__, subs['region'])
_rewrite_init_file(ingest_init_file, region_import_statement)

regions_dir = os.path.dirname(recidiviz.ingest.scrape.regions.__file__)
if not os.path.exists(regions_dir):
raise OSError("Couldn't find directory "
"recidiviz/ingest/scrape/regions.")
Expand All @@ -86,17 +91,49 @@ def create_yaml(template):
yaml_template = os.path.join(template_dir, 'region.txt')
create_yaml(yaml_template)

def _rewrite_init_file(filename, import_statement):
"""rewrites recidiviz/ingest/__init__.py to include the new import
statement."""
gpl = []
docstring = []
imports = [import_statement + '\n']
with open(filename) as f:
stage = 'LICENSE'
for line in f.readlines():
if line == '\n':
continue
if line.startswith('#'):
gpl.append(line)
assert stage == 'LICENSE'
elif line.startswith('"""'):
if stage == 'LICENSE':
stage = 'DOCSTRING'
elif stage == 'DOCSTRING':
stage = 'IMPORTS'
docstring.append(line)
elif line.startswith('import'):
assert stage == 'IMPORTS'
imports.append(line)
else:
assert stage == 'DOCSTRING'
docstring.append(line)

with open(filename, 'w') as f:
f.writelines(gpl)
f.write('\n')
f.writelines(docstring)
f.write('\n')
f.writelines(sorted(imports))


def create_test_files(subs, vendor: Optional[str]):
def create_test(template):
test_target_file_name = subs['region'] + '_scraper_test.py'
test_target = os.path.join(target_test_dir, test_target_file_name)
populate_file(template, test_target, subs)

ingest_dir = os.path.join(os.path.dirname(recidiviz.__file__),
'ingest/scrape/regions')
test_dir = os.path.join(os.path.dirname(recidiviz.__file__),
'tests/ingest/scrape/regions')
ingest_dir = os.path.dirname(recidiviz.ingest.scrape.regions.__file__)
test_dir = os.path.dirname(recidiviz.tests.ingest.scrape.regions.__file__)
if not os.path.exists(ingest_dir):
raise OSError('Couldn\'t find directory '
'recidiviz/tests/ingest/scrape/regions.')
Expand Down
4 changes: 0 additions & 4 deletions recidiviz/tools/scraper_template/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ It will also create a test file
In addition, the script will append the region to [queue.yaml](/queue.yaml),
[region_manifest.yaml](/region_manifest.yaml) and [cron.yaml](/cron.yaml).

You will need to manually edit the following file:
- In [`recidiviz/ingest/__init__.py`](/recidiviz/ingest/__init__.py), add an
`import` statement for your scraper.

Note: Calling `create_scraper` with the `--vendor` option will generate a slightly different setup according to the vendor type. Explore the generated files for pertinent instructions.

Writing the main scraper file
Expand Down

0 comments on commit 3596367

Please sign in to comment.