Skip to content

Commit

Permalink
Convert updated_date into timestamp when it is supplied as a date
Browse files Browse the repository at this point in the history
  • Loading branch information
jdddog committed Aug 10, 2023
1 parent c59e770 commit 8f2684f
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 9 deletions.
Git LFS file not shown
Git LFS file not shown
17 changes: 12 additions & 5 deletions academic_observatory_workflows/workflows/openalex_telescope.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,10 +331,10 @@ def __init__(
("concepts", True),
("institutions", True),
("works", True),
("authors", False),
("publishers", False),
("sources", False),
("funders", False),
("authors", True),
("publishers", True),
("sources", True),
("funders", True),
]

super().__init__(
Expand Down Expand Up @@ -1014,9 +1014,9 @@ def transform_object(obj: dict):
value = []
obj[field] = [x for x in value if x is not None]

# TODO: when re-ingesting entire dataset: change schema to new version
field = "abstract_inverted_index"
if field in obj:

def parse_abstract(dict_: dict):
keys_ = list(dict_.keys())
values_ = [str(value_)[1:-1] for value_ in dict_.values()]
Expand All @@ -1039,3 +1039,10 @@ def parse_abstract(dict_: dict):
values = list(obj[field][nested_field].values())

obj[field][nested_field] = {"keys": keys, "values": values}

# Transform updated_date from a date into a datetime
# TODO: when re-ingesting entire dataset: change to date
field = "updated_date"
if field in obj:
obj[field] = pendulum.parse(obj[field]).to_iso8601_string()

Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,11 @@ def test_dag_structure(self):
"aws_to_gcs_transfer": ["download_concepts"],
"download_concepts": ["download_institutions"],
"download_institutions": ["download_works"],
"download_works": ["transform"],
"download_works": ["download_authors"],
"download_authors": ["download_publishers"],
"download_publishers": ["download_sources"],
"download_sources": ["download_funders"],
"download_funders": ["transform"],
"transform": ["upload_upsert_files"],
"upload_upsert_files": ["bq_load_upsert_tables"],
"bq_load_upsert_tables": ["bq_upsert_records"],
Expand Down

0 comments on commit 8f2684f

Please sign in to comment.