Skip to content

Commit

Permalink
Update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jdddog committed Jul 24, 2023
1 parent 904b096 commit e1eacfa
Show file tree
Hide file tree
Showing 13 changed files with 24 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ bronze.percent FLOAT NULLABLE
green_only RECORD NULLABLE
green_only.total_outputs INTEGER NULLABLE
green_only.percent FLOAT NULLABLE
breakdown RECORD REPEATED
black.total_outputs INTEGER NULLABLE
black.percent FLOAT NULLABLE
* breakdown object array captured in compute_conditional_citations schema above
#}
Expand Down Expand Up @@ -367,17 +368,6 @@ CREATE TEMP FUNCTION compute_access_types(
ROUND(SAFE_DIVIDE( (COUNTIF(black is True)) * 100 , COUNT(doi)), 2) as percent
) as black,

ARRAY_CONCAT(
compute_conditional_citations(ARRAY_AGG(STRUCT(citations, IFNULL(is_oa, false) as is_x)), "oa", "Open Access", "Not Open Access"),
compute_conditional_citations(ARRAY_AGG(STRUCT(citations, IFNULL(green, false) as is_x)), "green", "Green", "Not Green"),
compute_conditional_citations(ARRAY_AGG(STRUCT(citations, IFNULL(gold, false) as is_x)), "gold", "Gold", "Not Gold"),
compute_conditional_citations(ARRAY_AGG(STRUCT(citations, IFNULL(gold_just_doaj, false) as is_x)), "gold_just_doaj", "Gold just DOAJ", "Not Gold just DOAJ"),
compute_conditional_citations(ARRAY_AGG(STRUCT(citations, IFNULL(hybrid, false) as is_x)), "hybrid", "Hybrid", "Not Hybrid"),
compute_conditional_citations(ARRAY_AGG(STRUCT(citations, IFNULL(bronze, false) as is_x)), "bronze", "Bronze", "Not Bronze"),
compute_conditional_citations(ARRAY_AGG(STRUCT(citations, IFNULL(green_only, false) as is_x)), "green_only", "Green Only", "Not Green Only"),
compute_conditional_citations(ARRAY_AGG(STRUCT(citations, IFNULL(black, false) as is_x)), "black", "Black", "Not Black")
) as breakdown

FROM UNNEST(items))
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ repositories as (
GROUP BY doi
),

-- Scihub has duplicate DOIs after doing an upper trim
scihub as (
SELECT DISTINCT UPPER(TRIM(doi)) as doi
FROM `{{ scihub.project_id }}.{{ scihub.dataset_id }}.{{ scihub.table_id }}`
),

-- The OA colour and license calculations
base_oa_calcs as (
SELECT
Expand Down Expand Up @@ -309,7 +315,7 @@ base_oa_calcs as (

FROM {{ crossref_metadata.project_id }}.{{ crossref_metadata.dataset_id }}.{{ crossref_metadata.table_id }} AS crossref
LEFT JOIN `{{ unpaywall.project_id }}.{{ unpaywall.dataset_id }}.{{ unpaywall.table_id }}` AS unpaywall ON UPPER(TRIM(unpaywall.doi)) = UPPER(TRIM(crossref.doi))
LEFT JOIN `{{ scihub.project_id }}.{{ scihub.dataset_id }}.{{ scihub.table_id }}` AS scihub ON UPPER(TRIM(scihub.doi)) = UPPER(TRIM(crossref.doi))
LEFT JOIN scihub AS scihub ON UPPER(TRIM(scihub.doi)) = UPPER(TRIM(crossref.doi))
LEFT JOIN issnl_index ON issnl_index.identifier = unpaywall.journal_issn_l
LEFT JOIN repositories AS repo ON repo.doi = unpaywall.doi
)
Expand Down
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown

This file was deleted.

This file was deleted.

This file was deleted.

Git LFS file not shown
Git LFS file not shown

This file was deleted.

9 changes: 3 additions & 6 deletions academic_observatory_workflows/workflows/doi_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def make_dataset_transforms(
Transform(
inputs={
"observatory_intermediate": Table(output_project_id, dataset_id_observatory_intermediate),
"unpaywall": Table(output_project_id, dataset_id_unpaywall, "unpaywall"),
"unpaywall": Table(input_project_id, dataset_id_unpaywall, "unpaywall"),
"crossref_metadata": Table(
input_project_id, dataset_id_crossref_metadata, "crossref_metadata", sharded=True
),
Expand Down Expand Up @@ -320,7 +320,6 @@ class DoiWorkflow(Workflow):
Aggregation(
"country",
"countries",
relate_to_members=True,
relate_to_journals=True,
relate_to_funders=True,
relate_to_publishers=True,
Expand All @@ -331,15 +330,13 @@ class DoiWorkflow(Workflow):
relate_to_institutions=True,
relate_to_countries=True,
relate_to_groups=True,
relate_to_members=True,
relate_to_funders=True,
relate_to_publishers=True,
),
Aggregation(
"group",
"groupings",
relate_to_institutions=True,
relate_to_members=True,
relate_to_journals=True,
relate_to_funders=True,
relate_to_publishers=True,
Expand Down Expand Up @@ -377,8 +374,8 @@ class DoiWorkflow(Workflow):
"publishers",
relate_to_institutions=True,
relate_to_countries=True,
relate_to_groups=True,
relate_to_funders=True,
relate_to_groups=False,
relate_to_funders=False,
),
Aggregation(
"region",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def setup_tables(
True,
dataset_id_all,
institution,
bq_find_schema(path=oa_web_schema_path, table_name="institution", release_date=snapshot_date),
bq_find_schema(path=oa_web_schema_path, table_name="institution"),
),
Table(
"country",
Expand Down

0 comments on commit e1eacfa

Please sign in to comment.