diff --git a/academic_observatory_workflows/database/sql/create_aggregate.sql.jinja2 b/academic_observatory_workflows/database/sql/create_aggregate.sql.jinja2 index 564b0b685..56f855e16 100644 --- a/academic_observatory_workflows/database/sql/create_aggregate.sql.jinja2 +++ b/academic_observatory_workflows/database/sql/create_aggregate.sql.jinja2 @@ -13,8 +13,9 @@ # limitations under the License. # Author: Richard Hosking #} -{#These types will be excluded by the aggregation#} -{% set CROSSREF_TYPES_TO_EXCLUDE = '("dataset")' %} +{#Include:journal-article,proceedings-article,report,posted-content,edited-book,book,book-chapter,reference-book,monograph,other,book-section,book-part,reference-entry#} +{#Exclude: the types below and NULL#} +{% set CROSSREF_TYPES_TO_EXCLUDE = '("dataset","database","component","report-component","peer-review","grant","proceedings","journal-issue","report-series","book-track")' %} # Helper Function: Counting Access Types {# @@ -744,6 +745,7 @@ WITH tmp_disciplines AS UNNEST(dois.affiliations.{{ aggregation_field }}) as aggregrate WHERE aggregrate.identifier IS NOT NULL + AND dois.crossref.type IS NOT NULL AND dois.crossref.type NOT IN {{ CROSSREF_TYPES_TO_EXCLUDE }} GROUP BY aggregrate.identifier, @@ -768,6 +770,7 @@ tmp_access_types AS ( UNNEST(dois.affiliations.{{ aggregation_field }}) as aggregrate WHERE aggregrate.identifier IS NOT NULL + AND dois.crossref.type IS NOT NULL AND dois.crossref.type NOT IN {{ CROSSREF_TYPES_TO_EXCLUDE }} GROUP BY aggregrate.identifier, @@ -1022,6 +1025,7 @@ SELECT FROM `{{ project_id }}.{{ dataset_id }}.doi{{ snapshot_date.strftime('%Y%m%d') }}` as dois, UNNEST(dois.affiliations.{{ aggregation_field }}) as aggregrate WHERE aggregrate.identifier IS NOT NULL +AND dois.crossref.type IS NOT NULL AND dois.crossref.type NOT IN {{ CROSSREF_TYPES_TO_EXCLUDE }} GROUP BY aggregrate.identifier, crossref.{{ group_by_time_field }} ) diff --git a/academic_observatory_workflows/model.py b/academic_observatory_workflows/model.py index 46c82bf51..a0e71ad44 100644 --- a/academic_observatory_workflows/model.py +++ b/academic_observatory_workflows/model.py @@ -206,6 +206,7 @@ class Paper: id: int doi: str = None title: str = None + type: str = None published_date: pendulum.Date = None output_type: str = None authors: List[Author] = None @@ -790,6 +791,7 @@ def make_papers( # Make paper paper = Paper( i, + type="journal-article", doi=doi_, title=title_, published_date=published_date_, @@ -1099,6 +1101,7 @@ def make_crossref_metadata(dataset: ObservatoryDataset) -> List[Dict]: # Add Crossref record records.append( { + "type": paper.type, "title": [paper.title], "DOI": paper.doi, "is_referenced_by_count": len(paper.cited_by), @@ -1483,6 +1486,7 @@ def make_doi_table(dataset: ObservatoryDataset) -> List[Dict]: { "doi": doi, "crossref": { + "type": paper.type, "title": paper.title, "published_year": paper.published_date.year, "published_month": paper.published_date.month, diff --git a/academic_observatory_workflows/workflows/oa_web_workflow.py b/academic_observatory_workflows/workflows/oa_web_workflow.py index fb1f486f2..4b75ca2f1 100644 --- a/academic_observatory_workflows/workflows/oa_web_workflow.py +++ b/academic_observatory_workflows/workflows/oa_web_workflow.py @@ -83,7 +83,7 @@ ("outputs_public", "n_outputs_other_platform_open"), ("outputs_other_internet", "n_outputs_other_platform_open"), ] -INCLUSION_THRESHOLD = {"country": 15, "institution": 800} +INCLUSION_THRESHOLD = {"country": 15, "institution": 1000} MAX_REPOSITORIES = 200 START_YEAR = 2000 END_YEAR = pendulum.now().year - 1 @@ -352,10 +352,10 @@ def __init__( self.add_task(self.download_logos) self.add_task(self.download_wiki_descriptions) self.add_task(self.build_datasets) - self.add_task(self.publish_zenodo_version) - self.add_task(self.upload_dataset) - self.add_task(self.repository_dispatch) - self.add_task(self.cleanup) + # self.add_task(self.publish_zenodo_version) + # self.add_task(self.upload_dataset) + # self.add_task(self.repository_dispatch) + # self.add_task(self.cleanup) ###################################### # Airflow tasks