Skip to content

Commit

Permalink
Update query
Browse files Browse the repository at this point in the history
  • Loading branch information
jdddog committed Jun 19, 2023
1 parent 1261a4d commit 8958cbb
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
# limitations under the License.
# Author: Richard Hosking #}

{#These types will be excluded by the aggregation#}
{% set CROSSREF_TYPES_TO_EXCLUDE = '("dataset")' %}
{#Include:journal-article,proceedings-article,report,posted-content,edited-book,book,book-chapter,reference-book,monograph,other,book-section,book-part,reference-entry#}
{#Exclude: the types below and NULL#}
{% set CROSSREF_TYPES_TO_EXCLUDE = '("dataset","database","component","report-component","peer-review","grant","proceedings","journal-issue","report-series","book-track")' %}

# Helper Function: Counting Access Types
{#
Expand Down Expand Up @@ -744,6 +745,7 @@ WITH tmp_disciplines AS
UNNEST(dois.affiliations.{{ aggregation_field }}) as aggregrate
WHERE
aggregrate.identifier IS NOT NULL
AND dois.crossref.type IS NOT NULL
AND dois.crossref.type NOT IN {{ CROSSREF_TYPES_TO_EXCLUDE }}
GROUP BY
aggregrate.identifier,
Expand All @@ -768,6 +770,7 @@ tmp_access_types AS (
UNNEST(dois.affiliations.{{ aggregation_field }}) as aggregrate
WHERE
aggregrate.identifier IS NOT NULL
AND dois.crossref.type IS NOT NULL
AND dois.crossref.type NOT IN {{ CROSSREF_TYPES_TO_EXCLUDE }}
GROUP BY
aggregrate.identifier,
Expand Down Expand Up @@ -1022,6 +1025,7 @@ SELECT

FROM `{{ project_id }}.{{ dataset_id }}.doi{{ snapshot_date.strftime('%Y%m%d') }}` as dois, UNNEST(dois.affiliations.{{ aggregation_field }}) as aggregrate
WHERE aggregrate.identifier IS NOT NULL
AND dois.crossref.type IS NOT NULL
AND dois.crossref.type NOT IN {{ CROSSREF_TYPES_TO_EXCLUDE }}
GROUP BY aggregrate.identifier, crossref.{{ group_by_time_field }}
)
Expand Down
4 changes: 4 additions & 0 deletions academic_observatory_workflows/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ class Paper:
id: int
doi: str = None
title: str = None
type: str = None
published_date: pendulum.Date = None
output_type: str = None
authors: List[Author] = None
Expand Down Expand Up @@ -790,6 +791,7 @@ def make_papers(
# Make paper
paper = Paper(
i,
type="journal-article",
doi=doi_,
title=title_,
published_date=published_date_,
Expand Down Expand Up @@ -1099,6 +1101,7 @@ def make_crossref_metadata(dataset: ObservatoryDataset) -> List[Dict]:
# Add Crossref record
records.append(
{
"type": paper.type,
"title": [paper.title],
"DOI": paper.doi,
"is_referenced_by_count": len(paper.cited_by),
Expand Down Expand Up @@ -1483,6 +1486,7 @@ def make_doi_table(dataset: ObservatoryDataset) -> List[Dict]:
{
"doi": doi,
"crossref": {
"type": paper.type,
"title": paper.title,
"published_year": paper.published_date.year,
"published_month": paper.published_date.month,
Expand Down
10 changes: 5 additions & 5 deletions academic_observatory_workflows/workflows/oa_web_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
("outputs_public", "n_outputs_other_platform_open"),
("outputs_other_internet", "n_outputs_other_platform_open"),
]
INCLUSION_THRESHOLD = {"country": 15, "institution": 800}
INCLUSION_THRESHOLD = {"country": 15, "institution": 1000}
MAX_REPOSITORIES = 200
START_YEAR = 2000
END_YEAR = pendulum.now().year - 1
Expand Down Expand Up @@ -352,10 +352,10 @@ def __init__(
self.add_task(self.download_logos)
self.add_task(self.download_wiki_descriptions)
self.add_task(self.build_datasets)
self.add_task(self.publish_zenodo_version)
self.add_task(self.upload_dataset)
self.add_task(self.repository_dispatch)
self.add_task(self.cleanup)
# self.add_task(self.publish_zenodo_version)
# self.add_task(self.upload_dataset)
# self.add_task(self.repository_dispatch)
# self.add_task(self.cleanup)

######################################
# Airflow tasks
Expand Down

0 comments on commit 8958cbb

Please sign in to comment.