Skip to content

Commit

Permalink
Made requested changes
Browse files Browse the repository at this point in the history
  • Loading branch information
alexmassen-hane committed Jul 27, 2023
1 parent 40180d0 commit 8777079
Showing 1 changed file with 12 additions and 23 deletions.
35 changes: 12 additions & 23 deletions academic_observatory_workflows/workflows/openalex_telescope.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,30 +1016,19 @@ def transform_object(obj: dict):

field = "abstract_inverted_index"
if field in obj:
if not isinstance(obj.get(field), (dict, str)):
return
else:
# If data is held in a string dump, load json string again.
if isinstance(obj.get(field), str):
obj_part = json.loads(obj[field])
field2 = "InvertedIndex"
if isinstance(obj_part.get(field2), dict):
keys = list(obj_part[field2].keys())
values = [str(value)[1:-1] for value in obj_part[field2].values()]

index_sum = sum(len(value.split(", ")) for value in values)
assert (
index_sum == obj_part["IndexLength"]
), f"Calculated IndexLength {index_sum} does not match value from file {obj_part['IndexLength']}."

obj[field] = {"keys": keys, "values": values}
else:
raise TypeError(f"obj_part['InvertedIndex'] is not a dictionary: {obj_part}")
else:
keys = list(obj[field].keys())
values = [str(value)[1:-1] for value in obj[field].values()]

obj[field] = {"keys": keys, "values": values}
def parse_abstract(dict_: dict):
keys_ = list(dict_.keys())
values_ = [str(value_)[1:-1] for value_ in dict_.values()]
return {"keys": keys_, "values": values_}

if isinstance(obj.get(field), str):
data = json.loads(obj[field])
obj[field] = parse_abstract(data["InvertedIndex"])
elif isinstance(obj.get(field), dict):
obj[field] = parse_abstract(obj[field])
else:
return

field = "international"
if field in obj:
Expand Down

0 comments on commit 8777079

Please sign in to comment.