diff --git a/academic_observatory_workflows/database/schema/crossref_events_2017-02-17.json b/academic_observatory_workflows/database/schema/crossref_events/crossref_events_2017-02-17.json similarity index 100% rename from academic_observatory_workflows/database/schema/crossref_events_2017-02-17.json rename to academic_observatory_workflows/database/schema/crossref_events/crossref_events_2017-02-17.json diff --git a/academic_observatory_workflows/database/schema/crossref_fundref_2014-03-01.json b/academic_observatory_workflows/database/schema/crossref_fundref/crossref_fundref_2014-03-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/crossref_fundref_2014-03-01.json rename to academic_observatory_workflows/database/schema/crossref_fundref/crossref_fundref_2014-03-01.json diff --git a/academic_observatory_workflows/database/schema/crossref_metadata_2018-04-01.json b/academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2018-04-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/crossref_metadata_2018-04-01.json rename to academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2018-04-01.json diff --git a/academic_observatory_workflows/database/schema/crossref_metadata_2020-09-01.json b/academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2020-09-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/crossref_metadata_2020-09-01.json rename to academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2020-09-01.json diff --git a/academic_observatory_workflows/database/schema/crossref_metadata_2021-01-01.json b/academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2021-01-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/crossref_metadata_2021-01-01.json rename to academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2021-01-01.json diff --git a/academic_observatory_workflows/database/schema/crossref_metadata_2021-07-07.json b/academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2021-07-07.json similarity index 100% rename from academic_observatory_workflows/database/schema/crossref_metadata_2021-07-07.json rename to academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2021-07-07.json diff --git a/academic_observatory_workflows/database/schema/crossref_metadata_2021-08-07.json b/academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2021-08-07.json similarity index 100% rename from academic_observatory_workflows/database/schema/crossref_metadata_2021-08-07.json rename to academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2021-08-07.json diff --git a/academic_observatory_workflows/database/schema/crossref_metadata_2021-11-07.json b/academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2021-11-07.json similarity index 100% rename from academic_observatory_workflows/database/schema/crossref_metadata_2021-11-07.json rename to academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2021-11-07.json diff --git a/academic_observatory_workflows/database/schema/crossref_metadata_2022-03-07.json b/academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2022-03-07.json similarity index 100% rename from academic_observatory_workflows/database/schema/crossref_metadata_2022-03-07.json rename to academic_observatory_workflows/database/schema/crossref_metadata/crossref_metadata_2022-03-07.json diff --git a/academic_observatory_workflows/database/schema/aggregate_2021-11-25.json b/academic_observatory_workflows/database/schema/doi/aggregate_2021-11-25.json similarity index 100% rename from academic_observatory_workflows/database/schema/aggregate_2021-11-25.json rename to academic_observatory_workflows/database/schema/doi/aggregate_2021-11-25.json diff --git a/academic_observatory_workflows/database/schema/book.json b/academic_observatory_workflows/database/schema/doi/book.json similarity index 100% rename from academic_observatory_workflows/database/schema/book.json rename to academic_observatory_workflows/database/schema/doi/book.json diff --git a/academic_observatory_workflows/database/schema/country.json b/academic_observatory_workflows/database/schema/doi/country.json similarity index 100% rename from academic_observatory_workflows/database/schema/country.json rename to academic_observatory_workflows/database/schema/doi/country.json diff --git a/academic_observatory_workflows/database/schema/doi_2021-11-25.json b/academic_observatory_workflows/database/schema/doi/doi_2021-11-25.json similarity index 100% rename from academic_observatory_workflows/database/schema/doi_2021-11-25.json rename to academic_observatory_workflows/database/schema/doi/doi_2021-11-25.json diff --git a/academic_observatory_workflows/database/schema/groupings.json b/academic_observatory_workflows/database/schema/doi/groupings.json similarity index 100% rename from academic_observatory_workflows/database/schema/groupings.json rename to academic_observatory_workflows/database/schema/doi/groupings.json diff --git a/academic_observatory_workflows/database/schema/mag_affiliation_override.json b/academic_observatory_workflows/database/schema/doi/mag_affiliation_override.json similarity index 100% rename from academic_observatory_workflows/database/schema/mag_affiliation_override.json rename to academic_observatory_workflows/database/schema/doi/mag_affiliation_override.json diff --git a/academic_observatory_workflows/database/schema/repository.json b/academic_observatory_workflows/database/schema/doi/repository.json similarity index 100% rename from academic_observatory_workflows/database/schema/repository.json rename to academic_observatory_workflows/database/schema/doi/repository.json diff --git a/academic_observatory_workflows/database/schema/ror_to_home_url.json b/academic_observatory_workflows/database/schema/doi/ror_to_home_url.json similarity index 100% rename from academic_observatory_workflows/database/schema/ror_to_home_url.json rename to academic_observatory_workflows/database/schema/doi/ror_to_home_url.json diff --git a/academic_observatory_workflows/database/schema/geonames_2020-06-01.json b/academic_observatory_workflows/database/schema/geonames/geonames_2020-06-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/geonames_2020-06-01.json rename to academic_observatory_workflows/database/schema/geonames/geonames_2020-06-01.json diff --git a/academic_observatory_workflows/database/schema/grid_2015-09-22.json b/academic_observatory_workflows/database/schema/grid/grid_2015-09-22.json similarity index 100% rename from academic_observatory_workflows/database/schema/grid_2015-09-22.json rename to academic_observatory_workflows/database/schema/grid/grid_2015-09-22.json diff --git a/academic_observatory_workflows/database/schema/grid_2016-04-28.json b/academic_observatory_workflows/database/schema/grid/grid_2016-04-28.json similarity index 100% rename from academic_observatory_workflows/database/schema/grid_2016-04-28.json rename to academic_observatory_workflows/database/schema/grid/grid_2016-04-28.json diff --git a/academic_observatory_workflows/database/schema/grid_2020-10-06.json b/academic_observatory_workflows/database/schema/grid/grid_2020-10-06.json similarity index 100% rename from academic_observatory_workflows/database/schema/grid_2020-10-06.json rename to academic_observatory_workflows/database/schema/grid/grid_2020-10-06.json diff --git a/academic_observatory_workflows/database/schema/grid_2021-03-25.json b/academic_observatory_workflows/database/schema/grid/grid_2021-03-25.json similarity index 100% rename from academic_observatory_workflows/database/schema/grid_2021-03-25.json rename to academic_observatory_workflows/database/schema/grid/grid_2021-03-25.json diff --git a/academic_observatory_workflows/database/schema/MagAffiliations_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagAffiliations_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagAffiliations_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagAffiliations_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagAffiliations_2020-09-01.json b/academic_observatory_workflows/database/schema/mag/MagAffiliations_2020-09-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagAffiliations_2020-09-01.json rename to academic_observatory_workflows/database/schema/mag/MagAffiliations_2020-09-01.json diff --git a/academic_observatory_workflows/database/schema/MagAuthorExtendedAttributes_2020-07-02.json b/academic_observatory_workflows/database/schema/mag/MagAuthorExtendedAttributes_2020-07-02.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagAuthorExtendedAttributes_2020-07-02.json rename to academic_observatory_workflows/database/schema/mag/MagAuthorExtendedAttributes_2020-07-02.json diff --git a/academic_observatory_workflows/database/schema/MagAuthors_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagAuthors_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagAuthors_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagAuthors_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagConferenceInstances_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagConferenceInstances_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagConferenceInstances_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagConferenceInstances_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagConferenceSeries_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagConferenceSeries_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagConferenceSeries_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagConferenceSeries_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagEntityRelatedEntities_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagEntityRelatedEntities_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagEntityRelatedEntities_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagEntityRelatedEntities_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagFieldOfStudyChildren_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagFieldOfStudyChildren_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagFieldOfStudyChildren_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagFieldOfStudyChildren_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagFieldOfStudyExtendedAttributes_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagFieldOfStudyExtendedAttributes_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagFieldOfStudyExtendedAttributes_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagFieldOfStudyExtendedAttributes_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagFieldsOfStudy_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagFieldsOfStudy_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagFieldsOfStudy_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagFieldsOfStudy_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagJournals_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagJournals_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagJournals_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagJournals_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperAbstractsInvertedIndex_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperAbstractsInvertedIndex_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperAbstractsInvertedIndex_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperAbstractsInvertedIndex_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperAuthorAffiliations_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperAuthorAffiliations_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperAuthorAffiliations_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperAuthorAffiliations_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperCitationContexts_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperCitationContexts_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperCitationContexts_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperCitationContexts_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperExtendedAttributes_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperExtendedAttributes_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperExtendedAttributes_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperExtendedAttributes_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperFieldsOfStudy_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperFieldsOfStudy_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperFieldsOfStudy_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperFieldsOfStudy_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperLanguages_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperLanguages_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperLanguages_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperLanguages_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperMeSH_2020-09-25.json b/academic_observatory_workflows/database/schema/mag/MagPaperMeSH_2020-09-25.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperMeSH_2020-09-25.json rename to academic_observatory_workflows/database/schema/mag/MagPaperMeSH_2020-09-25.json diff --git a/academic_observatory_workflows/database/schema/MagPaperRecommendations_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperRecommendations_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperRecommendations_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperRecommendations_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperReferences_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperReferences_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperReferences_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperReferences_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperResources_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperResources_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperResources_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperResources_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPaperUrls_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPaperUrls_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPaperUrls_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPaperUrls_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPapers_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagPapers_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPapers_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagPapers_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/MagPapers_2020-06-05.json b/academic_observatory_workflows/database/schema/mag/MagPapers_2020-06-05.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPapers_2020-06-05.json rename to academic_observatory_workflows/database/schema/mag/MagPapers_2020-06-05.json diff --git a/academic_observatory_workflows/database/schema/MagPapers_2020-09-01.json b/academic_observatory_workflows/database/schema/mag/MagPapers_2020-09-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPapers_2020-09-01.json rename to academic_observatory_workflows/database/schema/mag/MagPapers_2020-09-01.json diff --git a/academic_observatory_workflows/database/schema/MagPapers_2021-05-24.json b/academic_observatory_workflows/database/schema/mag/MagPapers_2021-05-24.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagPapers_2021-05-24.json rename to academic_observatory_workflows/database/schema/mag/MagPapers_2021-05-24.json diff --git a/academic_observatory_workflows/database/schema/MagRelatedFieldOfStudy_2020-05-21.json b/academic_observatory_workflows/database/schema/mag/MagRelatedFieldOfStudy_2020-05-21.json similarity index 100% rename from academic_observatory_workflows/database/schema/MagRelatedFieldOfStudy_2020-05-21.json rename to academic_observatory_workflows/database/schema/mag/MagRelatedFieldOfStudy_2020-05-21.json diff --git a/academic_observatory_workflows/database/schema/open_citations_2018-07-05.json b/academic_observatory_workflows/database/schema/open_citations/open_citations_2018-07-05.json similarity index 100% rename from academic_observatory_workflows/database/schema/open_citations_2018-07-05.json rename to academic_observatory_workflows/database/schema/open_citations/open_citations_2018-07-05.json diff --git a/academic_observatory_workflows/database/schema/open_citations_2018-11-12.json b/academic_observatory_workflows/database/schema/open_citations/open_citations_2018-11-12.json similarity index 100% rename from academic_observatory_workflows/database/schema/open_citations_2018-11-12.json rename to academic_observatory_workflows/database/schema/open_citations/open_citations_2018-11-12.json diff --git a/academic_observatory_workflows/database/schema/openalex/concepts.json b/academic_observatory_workflows/database/schema/openalex/concepts.json index 0f7dd6dce..1f149b27a 100644 --- a/academic_observatory_workflows/database/schema/openalex/concepts.json +++ b/academic_observatory_workflows/database/schema/openalex/concepts.json @@ -147,7 +147,6 @@ "name": "international", "type": "RECORD", "mode": "NULLABLE", - "description": "This concept's display name in many languages, derived from article titles on each language's wikipedia. See the Wikidata entry for \"Java Bytecode\" for example source data.", "fields": [ { "name": "display_name", @@ -157,18 +156,38 @@ "name": "keys", "type": "STRING", "mode": "REPEATED", - "description": "Custom field created by COKI. Originally each language was a key and the display name in that language the corresponding value." + "description": "The language codes in wikidata language code format." }, { "name": "values", "type": "STRING", "mode": "REPEATED", - "description": "Custom field created by COKI. Originally each language was a key and the display name in that language the corresponding value." + "description": "The translated display_names in each language." } ], - "description": "This concept's display name in many languages." + "description": "This concept's display name in many languages, derived from article titles on each language's wikipedia." + }, + { + "name": "description", + "type": "RECORD", + "fields": [ + { + "name": "keys", + "type": "STRING", + "mode": "REPEATED", + "description": "The language codes in wikidata language code format." + }, + { + "name": "values", + "type": "STRING", + "mode": "REPEATED", + "description": "The translated descriptions in each language." + } + ], + "description": "This concept's description in many languages, derived from article titles on each language's wikipedia." } - ] + ], + "description": "Translation of the display_name and description into multiple languages." }, { "name": "level", diff --git a/academic_observatory_workflows/database/schema/openalex/institutions.json b/academic_observatory_workflows/database/schema/openalex/institutions.json index 03f34d58c..34e1e4f7c 100644 --- a/academic_observatory_workflows/database/schema/openalex/institutions.json +++ b/academic_observatory_workflows/database/schema/openalex/institutions.json @@ -230,28 +230,29 @@ { "name": "display_name", "type": "RECORD", - "mode": "NULLABLE", "fields": [ { "name": "keys", "type": "STRING", "mode": "REPEATED", - "description": "Custom field created by COKI. Originally each language was a key and the display name in that language the corresponding value." + "description": "The language codes in wikidata language code format." }, { "name": "values", "type": "STRING", "mode": "REPEATED", - "description": "Custom field created by COKI. Originally each language was a key and the display name in that language the corresponding value." + "description": "The translated display_names in each language." } - ] + ], + "description": "The institution's display name in different languages. Derived from the wikipedia page for the institution in the given language." } ], - "description": "The institution's display name in different languages. Derived from the wikipedia page for the institution in the given language." + "description": "Translation of the display_name and description into multiple languages." }, { "name": "repositories", "type": "RECORD", + "mode": "REPEATED", "description": "Repositories (Sources with type: repository) that have this institution as their host_organization", "fields": [ { diff --git a/academic_observatory_workflows/database/schema/openalex/works.json b/academic_observatory_workflows/database/schema/openalex/works.json index 0d263338a..e6a21c1ea 100644 --- a/academic_observatory_workflows/database/schema/openalex/works.json +++ b/academic_observatory_workflows/database/schema/openalex/works.json @@ -19,89 +19,6 @@ ], "description": "The abstract of the work, as an inverted index, which encodes information about the abstract's words and their positions within the text. Like Microsoft Academic Graph, OpenAlex doesn't include plaintext abstracts due to legal constraints." }, - { - "name": "alternate_host_venues", - "type": "RECORD", - "mode": "REPEATED", - "fields": [ - { - "name": "display_name", - "type": "STRING", - "mode": "NULLABLE", - "description": "The name of the venue." - }, - { - "name": "host_organization", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "host_organization_name", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "id", - "type": "STRING", - "mode": "NULLABLE", - "description": "The OpenAlex ID for this venue." - }, - { - "name": "is_oa", - "type": "BOOLEAN", - "mode": "NULLABLE", - "description": "Set to true if the work hosted here can be read for free, without registration." - }, - { - "name": "issn", - "type": "STRING", - "mode": "REPEATED", - "description": "The ISSNs used by this venue. Many publications have multiple ISSNs (see above), so ISSN-L should be used when possible." - }, - { - "name": "issn_l", - "type": "STRING", - "mode": "NULLABLE", - "description": "The ISSN-L identifying this venue. ISSN is a global and unique ID for serial publications. However, different media versions of a given publication (e.g., print and electronic) often have different ISSNs. This is why we can't have nice things. The ISSN-L or Linking ISSN solves the problem by designating a single canonical ISSN for all media versions of the title. It's usually the same as the print ISSN." - }, - { - "name": "license", - "type": "STRING", - "mode": "NULLABLE", - "description": "The license applied to this work at this host. Most toll-access works don't have an explicit license (they're under \"all rights reserved\" copyright), so this field generally has content only if is_oa is true." - }, - { - "name": "publisher", - "type": "STRING", - "mode": "NULLABLE", - "description": "The name of this venue's publisher. Publisher is a tricky category, as journals often change publishers, publishers merge, publishers have subsidiaries (\"imprints\"), and of course no one is consistent in their naming. In the future, we plan to roll out support for a more structured publisher field, but for now it's just a string." - }, - { - "name": "publisher_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "type", - "type": "STRING", - "mode": "NULLABLE", - "description": "" - }, - { - "name": "url", - "type": "STRING", - "mode": "NULLABLE", - "description": "The URL where you can access this work." - }, - { - "name": "version", - "type": "STRING", - "mode": "NULLABLE", - "description": "The version of the work, based on the DRIVER Guidelines versioning scheme. Possible values are: -publishedVersion: The document\u2019s version of record. This is the most authoritative version. -acceptedVersion: The document after having completed peer review and being officially accepted for publication. It will lack publisher formatting, but the content should be interchangeable with the that of the publishedVersion. -submittedVersion: the document as submitted to the publisher by the authors, but before peer-review. Its content may differ significantly from that of the accepted article." - } - ], - "description": "List of HostVenue objects describing places this work lives. This work's primary hosting venue isn't in this list; it's at host_venue. Some venues in this list are missing the id field! This should be fixed by February 2022." - }, { "name": "authorships", "type": "RECORD", @@ -455,74 +372,6 @@ } ] }, - { - "name": "host_venue", - "type": "RECORD", - "mode": "NULLABLE", - "fields": [ - { - "name": "display_name", - "type": "STRING", - "description": "The name of the venue.", - "mode": "NULLABLE" - }, - { - "name": "id", - "type": "STRING", - "description": "The OpenAlex ID for this venue.", - "mode": "NULLABLE" - }, - { - "name": "is_oa", - "type": "BOOLEAN", - "description": "Set to true if the work hosted here can be read for free, without registration.", - "mode": "NULLABLE" - }, - { - "name": "issn", - "type": "STRING", - "mode": "REPEATED", - "description": "The ISSNs used by this venue. Many publications have multiple ISSNs (see above), so ISSN-L should be used when possible." - }, - { - "name": "issn_l", - "type": "STRING", - "mode": "NULLABLE", - "description": "The ISSN-L identifying this venue. ISSN is a global and unique ID for serial publications. However, different media versions of a given publication (e.g., print and electronic) often have different ISSNs. This is why we can't have nice things. The ISSN-L or Linking ISSN solves the problem by designating a single canonical ISSN for all media versions of the title. It's usually the same as the print ISSN." - }, - { - "name": "license", - "type": "STRING", - "mode": "NULLABLE", - "description": "The license applied to this work at this host. Most toll-access works don't have an explicit license (they're under \"all rights reserved\" copyright), so this field generally has content only if is_oa is true." - }, - { - "name": "publisher", - "type": "STRING", - "mode": "NULLABLE", - "description": "The name of this venue's publisher. Publisher is a tricky category, as journals often change publishers, publishers merge, publishers have subsidiaries (\"imprints\"), and of course no one is consistent in their naming. In the future, we plan to roll out support for a more structured publisher field, but for now it's just a string." - }, - { - "name": "type", - "type": "STRING", - "mode": "NULLABLE", - "description": "" - }, - { - "name": "url", - "type": "STRING", - "mode": "NULLABLE", - "description": "The URL where you can access this work." - }, - { - "name": "version", - "type": "STRING", - "mode": "NULLABLE", - "description": "The version of the work, based on the DRIVER Guidelines versioning scheme. Possible values are: -publishedVersion: The document\u2019s version of record. This is the most authoritative version. -acceptedVersion: The document after having completed peer review and being officially accepted for publication. It will lack publisher formatting, but the content should be interchangeable with the that of the publishedVersion. -submittedVersion: the document as submitted to the publisher by the authors, but before peer-review. Its content may differ significantly from that of the accepted article." - } - ], - "description": "A HostVenue object describing how and where this work is being hosted online." - }, { "name": "id", "type": "STRING", @@ -587,22 +436,26 @@ { "name": "is_oa", "type": "BOOLEAN", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "True if this work is Open Access (OA)." }, { "name": "landing_page_url", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The landing page URL for this location." }, { "name": "license", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The location's publishing license. This can be a Create Commons license such as cc0 or cc-by, a publisher-specific license, or null which means we are not able to determine a license for this location." }, { "name": "pdf_url", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "A URL where you can find this location as a PDF." }, { "name": "source", @@ -612,56 +465,67 @@ { "name": "display_name", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The name of the source." }, { "name": "host_organization", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The host organization for this source as an OpenAlex ID. This will be an Institution.id if the source is a repository, and a Publisher.id if the source is a journal, conference, or eBook platform (based on the type field)." }, { "name": "host_organization_name", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The display_name from the host_organization, shown for convenience." }, { "name": "id", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The OpenAlex ID for this source." }, { "name": "issn", "type": "STRING", - "mode": "REPEATED" + "mode": "REPEATED", + "description": "The ISSNs used by this source. Many publications have multiple ISSNs (see above), so ISSN-L should be used when possible." }, { "name": "issn_l", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The ISSN-L identifying this source. This is the Canonical External ID for sources." }, { "name": "publisher", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The publisher name." }, { "name": "publisher_id", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The OpenAlex publisher ID." }, { "name": "type", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The type of source." } ] }, { "name": "version", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The version of the work, based on the DRIVER Guidelines versioning scheme. Possible values are: publishedVersion: The document’s version of record. This is the most authoritative version.\nacceptedVersion: The document after having completed peer review and being officially accepted for publication. It will lack publisher formatting, but the content should be interchangeable with the that of the publishedVersion.\nsubmittedVersion: the document as submitted to the publisher by the authors, but before peer-review. Its content may differ significantly from that of the accepted article." } - ] + ], + "description": "A list of Location objects describing all unique places where this work lives." }, { "name": "mesh", @@ -730,22 +594,26 @@ { "name": "is_oa", "type": "BOOLEAN", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "True if this work is Open Access (OA)." }, { "name": "landing_page_url", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The landing page URL for this location." }, { "name": "license", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The location's publishing license. This can be a Create Commons license such as cc0 or cc-by, a publisher-specific license, or null which means we are not able to determine a license for this location." }, { "name": "pdf_url", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "A URL where you can find this location as a PDF." }, { "name": "source", @@ -755,56 +623,67 @@ { "name": "display_name", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The name of the source." }, { "name": "host_organization", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The host organization for this source as an OpenAlex ID. This will be an Institution.id if the source is a repository, and a Publisher.id if the source is a journal, conference, or eBook platform (based on the type field)." }, { "name": "host_organization_name", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The display_name from the host_organization, shown for convenience." }, { "name": "id", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The OpenAlex ID for this source." }, { "name": "issn", "type": "STRING", - "mode": "REPEATED" + "mode": "REPEATED", + "description": "The ISSNs used by this source. Many publications have multiple ISSNs (see above), so ISSN-L should be used when possible." }, { "name": "issn_l", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The ISSN-L identifying this source. This is the Canonical External ID for sources." }, { "name": "publisher", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The publisher name." }, { "name": "publisher_id", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The OpenAlex ID of the publisher." }, { "name": "type", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The type of source." } ] }, { "name": "version", "type": "STRING", - "mode": "NULLABLE" + "mode": "NULLABLE", + "description": "The version of the work, based on the DRIVER Guidelines versioning scheme. Possible values are:.\npublishedVersion: The document’s version of record. This is the most authoritative version.\nacceptedVersion: The document after having completed peer review and being officially accepted for publication. It will lack publisher formatting, but the content should be interchangeable with the that of the publishedVersion.\nsubmittedVersion: the document as submitted to the publisher by the authors, but before peer-review. Its content may differ significantly from that of the accepted article." } - ] + ], + "description": "A Location object with the primary location of this work." }, { "name": "publication_date", diff --git a/academic_observatory_workflows/database/schema/orcid_2020-01-01.json b/academic_observatory_workflows/database/schema/orcid/orcid_2020-01-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/orcid_2020-01-01.json rename to academic_observatory_workflows/database/schema/orcid/orcid_2020-01-01.json diff --git a/academic_observatory_workflows/database/schema/ror_2021-09-01.json b/academic_observatory_workflows/database/schema/ror/ror_2021-09-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/ror_2021-09-01.json rename to academic_observatory_workflows/database/schema/ror/ror_2021-09-01.json diff --git a/academic_observatory_workflows/database/schema/scopus_2020-09-01.json b/academic_observatory_workflows/database/schema/scopus/scopus_2020-09-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/scopus_2020-09-01.json rename to academic_observatory_workflows/database/schema/scopus/scopus_2020-09-01.json diff --git a/academic_observatory_workflows/database/schema/unpaywall.json b/academic_observatory_workflows/database/schema/unpaywall/unpaywall.json similarity index 100% rename from academic_observatory_workflows/database/schema/unpaywall.json rename to academic_observatory_workflows/database/schema/unpaywall/unpaywall.json diff --git a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2018-09-24.json b/academic_observatory_workflows/database/schema/unpaywall_snapshot_2018-09-24.json deleted file mode 100644 index 13db018e8..000000000 --- a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2018-09-24.json +++ /dev/null @@ -1,339 +0,0 @@ -[ - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - } - ], - "mode": "NULLABLE", - "name": "best_oa_location", - "type": "RECORD", - "description": "The best OA Location Object we could find for this DOI. The \"best\" location is determined using an algorithm that prioritizes publisher-hosted content first (eg Hybrid or Gold), then prioritizes versions closer to the version of record (PublishedVersion over AcceptedVersion), then more authoritative repositories (PubMed Central over CiteSeerX). Returns null if we couldn't find any OA Locations." - }, - { - "mode": "NULLABLE", - "name": "data_standard", - "type": "INTEGER", - "description": "Indicates the data collection approaches used for this resource. Possible values: '1' First-generation hybrid detection. Uses only data from the Crossref API to determine hybrid status. Does a good job for Elsevier articles and a few other publishers, but most publishers are not checked for hybrid. '2' Second-generation hybrid detection. Uses additional sources, checks all publishers for hybrid. Gets about 10x as much hybrid. data_standard==2 is the version used in the paper we wrote about the dataset." - }, - { - "mode": "NULLABLE", - "name": "doi", - "type": "STRING", - "description": "The DOI of this resource. This is always lowercase." - }, - { - "mode": "NULLABLE", - "name": "doi_url", - "type": "STRING", - "description": "The DOI in hyperlink form. This field simply contains \"https://doi.org/\" prepended to the doi field. It expresses the DOI in its correct format according to the Crossref DOI display guidelines." - }, - { - "mode": "NULLABLE", - "name": "genre", - "type": "STRING", - "description": "The type of resource. Currently the genre is identical to the Crossref-reported type of a given resource. The \"journal-article\" type is most common, but there are many others." - }, - { - "mode": "NULLABLE", - "name": "is_oa", - "type": "BOOLEAN", - "description": "Is there an OA copy of this resource. Convenience attribute; returns true when best_oa_location is not null." - }, - { - "mode": "NULLABLE", - "name": "journal_is_in_doaj", - "type": "BOOLEAN", - "description": "Is this resource published in a DOAJ-indexed journal. Useful for defining whether a resource is Gold OA (depending on your definition, see also journal_is_oa)." - }, - { - "mode": "NULLABLE", - "name": "journal_is_oa", - "type": "BOOLEAN", - "description": "Is this resource published in a completely OA journal.\tUseful for defining whether a resource is Gold OA. Includes any fully-OA journal, regardless of inclusion in DOAJ. This includes journals by all-OA publishers and journals that would otherwise be all Hybrid or Bronze OA." - }, - { - "mode": "NULLABLE", - "name": "journal_issns", - "type": "STRING", - "description": "Any ISSNs assigned to the journal publishing this resource. Separate ISSNs are sometimes assigned to print and electronic versions of the same journal. If there are multiple ISSNs, they are separated by commas. Example: 1232-1203,1532-6203" - }, - { - "mode": "NULLABLE", - "name": "journal_issn_l", - "type": "STRING", - "description": "A single ISSN for the journal publishing this resource. An ISSN-L can be used as a primary key for a journal when more than one ISSN is assigned to it. Resources' journal_issns are mapped to ISSN-Ls using the issn.org table, with some manual corrections." - }, - { - "mode": "NULLABLE", - "name": "journal_name", - "type": "STRING", - "description": "The name of the journal publishing this resource. The same journal may have multiple name strings (eg, \"J. Foo\", \"Journal of Foo\", \"JOURNAL OF FOO\", etc). These have not been fully normalized within our database, so use with care." - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "oa_locations", - "type": "RECORD", - "description": "List of all the OA Location objects associated with this resource. This list is unnecessary for the vast majority of use-cases, since you probably just want the best_oa_location. It's included primarily for research purposes." - }, - { - "mode": "NULLABLE", - "name": "oa_status", - "type": "STRING", - "description": "The OA status, or color, of this resource. Classifies OA resources by location and license terms as one of: gold, hybrid, bronze, green or closed. See here for more information on how we assign an oa_status: https://support.unpaywall.org/support/solutions/articles/44001777288-what-do-the-types-of-oa-status-green-gold-hybrid-and-bronze-mean-" - }, - { - "mode": "NULLABLE", - "name": "published_date", - "type": "DATE", - "description": "The date this resource was published. As reported by the publishers, who unfortunately have inconsistent definitions of what counts as officially \"published.\" Returned as an ISO8601-formatted timestamp, generally with only year-month-day." - }, - { - "mode": "NULLABLE", - "name": "publisher", - "type": "STRING", - "description": "The name of this resource's publisher. Keep in mind that publisher name strings change over time, particularly as publishers are acquired or split up." - }, - { - "mode": "NULLABLE", - "name": "title", - "type": "STRING", - "description": "The title of this resource." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this resource was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "year", - "type": "INTEGER", - "description": "The year this resource was published. Just the year part of the published_date" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "family", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "given", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "ORCID", - "type": "STRING", - "description": "URL-form of an ORCID identifier" - }, - { - "mode": "NULLABLE", - "name": "authenticated_orcid", - "type": "BOOLEAN", - "description": "If true, record owner asserts that the ORCID user completed ORCID OAuth authentication" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "affiliation", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "sequence", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "suffix", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "z_authors", - "type": "RECORD", - "description": "The authors of this resource. These are formatted as a list of Crossref Contributor objects, which are described in the Crossref API docs here: https://github.com/CrossRef/rest-api-doc/blob/master/api_format.md#contributor" - }, - { - "mode": "NULLABLE", - "name": "has_repository_copy", - "type": "BOOLEAN", - "description": "Is a full-text available in a repository?" - }, - { - "mode": "NULLABLE", - "name": "issn_l", - "type": "STRING" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "blank", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "x_reported_noncompliant_copies", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "x_error", - "type": "BOOLEAN" - } -] \ No newline at end of file diff --git a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2019-11-22.json b/academic_observatory_workflows/database/schema/unpaywall_snapshot_2019-11-22.json deleted file mode 100644 index 03d74a0e9..000000000 --- a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2019-11-22.json +++ /dev/null @@ -1,349 +0,0 @@ -[ - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "NULLABLE", - "name": "best_oa_location", - "type": "RECORD", - "description": "The best OA Location Object we could find for this DOI. The \"best\" location is determined using an algorithm that prioritizes publisher-hosted content first (eg Hybrid or Gold), then prioritizes versions closer to the version of record (PublishedVersion over AcceptedVersion), then more authoritative repositories (PubMed Central over CiteSeerX). Returns null if we couldn't find any OA Locations." - }, - { - "mode": "NULLABLE", - "name": "data_standard", - "type": "INTEGER", - "description": "Indicates the data collection approaches used for this resource. Possible values: '1' First-generation hybrid detection. Uses only data from the Crossref API to determine hybrid status. Does a good job for Elsevier articles and a few other publishers, but most publishers are not checked for hybrid. '2' Second-generation hybrid detection. Uses additional sources, checks all publishers for hybrid. Gets about 10x as much hybrid. data_standard==2 is the version used in the paper we wrote about the dataset." - }, - { - "mode": "NULLABLE", - "name": "doi", - "type": "STRING", - "description": "The DOI of this resource. This is always lowercase." - }, - { - "mode": "NULLABLE", - "name": "doi_url", - "type": "STRING", - "description": "The DOI in hyperlink form. This field simply contains \"https://doi.org/\" prepended to the doi field. It expresses the DOI in its correct format according to the Crossref DOI display guidelines." - }, - { - "mode": "NULLABLE", - "name": "genre", - "type": "STRING", - "description": "The type of resource. Currently the genre is identical to the Crossref-reported type of a given resource. The \"journal-article\" type is most common, but there are many others." - }, - { - "mode": "NULLABLE", - "name": "is_oa", - "type": "BOOLEAN", - "description": "Is there an OA copy of this resource. Convenience attribute; returns true when best_oa_location is not null." - }, - { - "mode": "NULLABLE", - "name": "journal_is_in_doaj", - "type": "BOOLEAN", - "description": "Is this resource published in a DOAJ-indexed journal. Useful for defining whether a resource is Gold OA (depending on your definition, see also journal_is_oa)." - }, - { - "mode": "NULLABLE", - "name": "journal_is_oa", - "type": "BOOLEAN", - "description": "Is this resource published in a completely OA journal.\tUseful for defining whether a resource is Gold OA. Includes any fully-OA journal, regardless of inclusion in DOAJ. This includes journals by all-OA publishers and journals that would otherwise be all Hybrid or Bronze OA." - }, - { - "mode": "NULLABLE", - "name": "journal_issns", - "type": "STRING", - "description": "Any ISSNs assigned to the journal publishing this resource. Separate ISSNs are sometimes assigned to print and electronic versions of the same journal. If there are multiple ISSNs, they are separated by commas. Example: 1232-1203,1532-6203" - }, - { - "mode": "NULLABLE", - "name": "journal_issn_l", - "type": "STRING", - "description": "A single ISSN for the journal publishing this resource. An ISSN-L can be used as a primary key for a journal when more than one ISSN is assigned to it. Resources' journal_issns are mapped to ISSN-Ls using the issn.org table, with some manual corrections." - }, - { - "mode": "NULLABLE", - "name": "journal_name", - "type": "STRING", - "description": "The name of the journal publishing this resource. The same journal may have multiple name strings (eg, \"J. Foo\", \"Journal of Foo\", \"JOURNAL OF FOO\", etc). These have not been fully normalized within our database, so use with care." - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "oa_locations", - "type": "RECORD", - "description": "List of all the OA Location objects associated with this resource. This list is unnecessary for the vast majority of use-cases, since you probably just want the best_oa_location. It's included primarily for research purposes." - }, - { - "mode": "NULLABLE", - "name": "oa_status", - "type": "STRING", - "description": "The OA status, or color, of this resource. Classifies OA resources by location and license terms as one of: gold, hybrid, bronze, green or closed. See here for more information on how we assign an oa_status: https://support.unpaywall.org/support/solutions/articles/44001777288-what-do-the-types-of-oa-status-green-gold-hybrid-and-bronze-mean-" - }, - { - "mode": "NULLABLE", - "name": "published_date", - "type": "DATE", - "description": "The date this resource was published. As reported by the publishers, who unfortunately have inconsistent definitions of what counts as officially \"published.\" Returned as an ISO8601-formatted timestamp, generally with only year-month-day." - }, - { - "mode": "NULLABLE", - "name": "publisher", - "type": "STRING", - "description": "The name of this resource's publisher. Keep in mind that publisher name strings change over time, particularly as publishers are acquired or split up." - }, - { - "mode": "NULLABLE", - "name": "title", - "type": "STRING", - "description": "The title of this resource." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this resource was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "year", - "type": "INTEGER", - "description": "The year this resource was published. Just the year part of the published_date" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "family", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "given", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "ORCID", - "type": "STRING", - "description": "URL-form of an ORCID identifier" - }, - { - "mode": "NULLABLE", - "name": "authenticated_orcid", - "type": "BOOLEAN", - "description": "If true, record owner asserts that the ORCID user completed ORCID OAuth authentication" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "affiliation", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "sequence", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "suffix", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "z_authors", - "type": "RECORD", - "description": "The authors of this resource. These are formatted as a list of Crossref Contributor objects, which are described in the Crossref API docs here: https://github.com/CrossRef/rest-api-doc/blob/master/api_format.md#contributor" - }, - { - "mode": "NULLABLE", - "name": "has_repository_copy", - "type": "BOOLEAN", - "description": "Is a full-text available in a repository?" - }, - { - "mode": "NULLABLE", - "name": "issn_l", - "type": "STRING" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "blank", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "x_reported_noncompliant_copies", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "x_error", - "type": "BOOLEAN" - } -] \ No newline at end of file diff --git a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2020-02-25.json b/academic_observatory_workflows/database/schema/unpaywall_snapshot_2020-02-25.json deleted file mode 100644 index 66d83536f..000000000 --- a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2020-02-25.json +++ /dev/null @@ -1,355 +0,0 @@ -[ - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "NULLABLE", - "name": "best_oa_location", - "type": "RECORD", - "description": "The best OA Location Object we could find for this DOI. The \"best\" location is determined using an algorithm that prioritizes publisher-hosted content first (eg Hybrid or Gold), then prioritizes versions closer to the version of record (PublishedVersion over AcceptedVersion), then more authoritative repositories (PubMed Central over CiteSeerX). Returns null if we couldn't find any OA Locations." - }, - { - "mode": "NULLABLE", - "name": "data_standard", - "type": "INTEGER", - "description": "Indicates the data collection approaches used for this resource. Possible values: '1' First-generation hybrid detection. Uses only data from the Crossref API to determine hybrid status. Does a good job for Elsevier articles and a few other publishers, but most publishers are not checked for hybrid. '2' Second-generation hybrid detection. Uses additional sources, checks all publishers for hybrid. Gets about 10x as much hybrid. data_standard==2 is the version used in the paper we wrote about the dataset." - }, - { - "mode": "NULLABLE", - "name": "doi", - "type": "STRING", - "description": "The DOI of this resource. This is always lowercase." - }, - { - "mode": "NULLABLE", - "name": "doi_url", - "type": "STRING", - "description": "The DOI in hyperlink form. This field simply contains \"https://doi.org/\" prepended to the doi field. It expresses the DOI in its correct format according to the Crossref DOI display guidelines." - }, - { - "mode": "NULLABLE", - "name": "genre", - "type": "STRING", - "description": "The type of resource. Currently the genre is identical to the Crossref-reported type of a given resource. The \"journal-article\" type is most common, but there are many others." - }, - { - "mode": "NULLABLE", - "name": "is_paratext", - "type": "BOOLEAN", - "description": "Is the item an ancillary part of a journal, like a table of contents? See here for more information on how we determine whether an article is paratext: https://support.unpaywall.org/support/solutions/articles/44001894783." - }, - { - "mode": "NULLABLE", - "name": "is_oa", - "type": "BOOLEAN", - "description": "Is there an OA copy of this resource. Convenience attribute; returns true when best_oa_location is not null." - }, - { - "mode": "NULLABLE", - "name": "journal_is_in_doaj", - "type": "BOOLEAN", - "description": "Is this resource published in a DOAJ-indexed journal. Useful for defining whether a resource is Gold OA (depending on your definition, see also journal_is_oa)." - }, - { - "mode": "NULLABLE", - "name": "journal_is_oa", - "type": "BOOLEAN", - "description": "Is this resource published in a completely OA journal.\tUseful for defining whether a resource is Gold OA. Includes any fully-OA journal, regardless of inclusion in DOAJ. This includes journals by all-OA publishers and journals that would otherwise be all Hybrid or Bronze OA." - }, - { - "mode": "NULLABLE", - "name": "journal_issns", - "type": "STRING", - "description": "Any ISSNs assigned to the journal publishing this resource. Separate ISSNs are sometimes assigned to print and electronic versions of the same journal. If there are multiple ISSNs, they are separated by commas. Example: 1232-1203,1532-6203" - }, - { - "mode": "NULLABLE", - "name": "journal_issn_l", - "type": "STRING", - "description": "A single ISSN for the journal publishing this resource. An ISSN-L can be used as a primary key for a journal when more than one ISSN is assigned to it. Resources' journal_issns are mapped to ISSN-Ls using the issn.org table, with some manual corrections." - }, - { - "mode": "NULLABLE", - "name": "journal_name", - "type": "STRING", - "description": "The name of the journal publishing this resource. The same journal may have multiple name strings (eg, \"J. Foo\", \"Journal of Foo\", \"JOURNAL OF FOO\", etc). These have not been fully normalized within our database, so use with care." - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "oa_locations", - "type": "RECORD", - "description": "List of all the OA Location objects associated with this resource. This list is unnecessary for the vast majority of use-cases, since you probably just want the best_oa_location. It's included primarily for research purposes." - }, - { - "mode": "NULLABLE", - "name": "oa_status", - "type": "STRING", - "description": "The OA status, or color, of this resource. Classifies OA resources by location and license terms as one of: gold, hybrid, bronze, green or closed. See here for more information on how we assign an oa_status: https://support.unpaywall.org/support/solutions/articles/44001777288-what-do-the-types-of-oa-status-green-gold-hybrid-and-bronze-mean-" - }, - { - "mode": "NULLABLE", - "name": "published_date", - "type": "DATE", - "description": "The date this resource was published. As reported by the publishers, who unfortunately have inconsistent definitions of what counts as officially \"published.\" Returned as an ISO8601-formatted timestamp, generally with only year-month-day." - }, - { - "mode": "NULLABLE", - "name": "publisher", - "type": "STRING", - "description": "The name of this resource's publisher. Keep in mind that publisher name strings change over time, particularly as publishers are acquired or split up." - }, - { - "mode": "NULLABLE", - "name": "title", - "type": "STRING", - "description": "The title of this resource." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this resource was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "year", - "type": "INTEGER", - "description": "The year this resource was published. Just the year part of the published_date" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "family", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "given", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "ORCID", - "type": "STRING", - "description": "URL-form of an ORCID identifier" - }, - { - "mode": "NULLABLE", - "name": "authenticated_orcid", - "type": "BOOLEAN", - "description": "If true, record owner asserts that the ORCID user completed ORCID OAuth authentication" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "affiliation", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "sequence", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "suffix", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "z_authors", - "type": "RECORD", - "description": "The authors of this resource. These are formatted as a list of Crossref Contributor objects, which are described in the Crossref API docs here: https://github.com/CrossRef/rest-api-doc/blob/master/api_format.md#contributor" - }, - { - "mode": "NULLABLE", - "name": "has_repository_copy", - "type": "BOOLEAN", - "description": "Is a full-text available in a repository?" - }, - { - "mode": "NULLABLE", - "name": "issn_l", - "type": "STRING" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "blank", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "x_reported_noncompliant_copies", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "x_error", - "type": "BOOLEAN" - } -] \ No newline at end of file diff --git a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2020-10-06.json b/academic_observatory_workflows/database/schema/unpaywall_snapshot_2020-10-06.json deleted file mode 100644 index d75c34f18..000000000 --- a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2020-10-06.json +++ /dev/null @@ -1,456 +0,0 @@ -[ - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "oa_date", - "type": "DATE", - "description": "When this document first became available at this location. oa_date is calculated differently for different host types and is not available for all oa_locations. See https://support.unpaywall.org/a/solutions/articles/44002063719 for details." - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "NULLABLE", - "name": "best_oa_location", - "type": "RECORD", - "description": "The best OA Location Object we could find for this DOI. The \"best\" location is determined using an algorithm that prioritizes publisher-hosted content first (eg Hybrid or Gold), then prioritizes versions closer to the version of record (PublishedVersion over AcceptedVersion), then more authoritative repositories (PubMed Central over CiteSeerX). Returns null if we couldn't find any OA Locations." - }, - { - "mode": "NULLABLE", - "name": "data_standard", - "type": "INTEGER", - "description": "Indicates the data collection approaches used for this resource. Possible values: '1' First-generation hybrid detection. Uses only data from the Crossref API to determine hybrid status. Does a good job for Elsevier articles and a few other publishers, but most publishers are not checked for hybrid. '2' Second-generation hybrid detection. Uses additional sources, checks all publishers for hybrid. Gets about 10x as much hybrid. data_standard==2 is the version used in the paper we wrote about the dataset." - }, - { - "mode": "NULLABLE", - "name": "doi", - "type": "STRING", - "description": "The DOI of this resource. This is always lowercase." - }, - { - "mode": "NULLABLE", - "name": "doi_url", - "type": "STRING", - "description": "The DOI in hyperlink form. This field simply contains \"https://doi.org/\" prepended to the doi field. It expresses the DOI in its correct format according to the Crossref DOI display guidelines." - }, - { - "mode": "NULLABLE", - "name": "genre", - "type": "STRING", - "description": "The type of resource. Currently the genre is identical to the Crossref-reported type of a given resource. The \"journal-article\" type is most common, but there are many others." - }, - { - "mode": "NULLABLE", - "name": "is_paratext", - "type": "BOOLEAN", - "description": "Is the item an ancillary part of a journal, like a table of contents? See here for more information on how we determine whether an article is paratext: https://support.unpaywall.org/support/solutions/articles/44001894783." - }, - { - "mode": "NULLABLE", - "name": "is_oa", - "type": "BOOLEAN", - "description": "Is there an OA copy of this resource. Convenience attribute; returns true when best_oa_location is not null." - }, - { - "mode": "NULLABLE", - "name": "journal_is_in_doaj", - "type": "BOOLEAN", - "description": "Is this resource published in a DOAJ-indexed journal. Useful for defining whether a resource is Gold OA (depending on your definition, see also journal_is_oa)." - }, - { - "mode": "NULLABLE", - "name": "journal_is_oa", - "type": "BOOLEAN", - "description": "Is this resource published in a completely OA journal.\tUseful for defining whether a resource is Gold OA. Includes any fully-OA journal, regardless of inclusion in DOAJ. This includes journals by all-OA publishers and journals that would otherwise be all Hybrid or Bronze OA." - }, - { - "mode": "NULLABLE", - "name": "journal_issns", - "type": "STRING", - "description": "Any ISSNs assigned to the journal publishing this resource. Separate ISSNs are sometimes assigned to print and electronic versions of the same journal. If there are multiple ISSNs, they are separated by commas. Example: 1232-1203,1532-6203" - }, - { - "mode": "NULLABLE", - "name": "journal_issn_l", - "type": "STRING", - "description": "A single ISSN for the journal publishing this resource. An ISSN-L can be used as a primary key for a journal when more than one ISSN is assigned to it. Resources' journal_issns are mapped to ISSN-Ls using the issn.org table, with some manual corrections." - }, - { - "mode": "NULLABLE", - "name": "journal_name", - "type": "STRING", - "description": "The name of the journal publishing this resource. The same journal may have multiple name strings (eg, \"J. Foo\", \"Journal of Foo\", \"JOURNAL OF FOO\", etc). These have not been fully normalized within our database, so use with care." - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "oa_date", - "type": "DATE", - "description": "When this document first became available at this location. oa_date is calculated differently for different host types and is not available for all oa_locations. See https://support.unpaywall.org/a/solutions/articles/44002063719 for details." - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "oa_locations", - "type": "RECORD", - "description": "List of all the OA Location objects associated with this resource. This list is unnecessary for the vast majority of use-cases, since you probably just want the best_oa_location. It's included primarily for research purposes." - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "oa_date", - "type": "DATE", - "description": "When this document first became available at this location. oa_date is calculated differently for different host types and is not available for all oa_locations. See https://support.unpaywall.org/a/solutions/articles/44002063719 for details." - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "NULLABLE", - "name": "first_oa_location", - "type": "RECORD", - "description": "The OA Location Object with the earliest oa_date. Returns null if we couldn't find any OA Locations." - }, - { - "mode": "NULLABLE", - "name": "oa_status", - "type": "STRING", - "description": "The OA status, or color, of this resource. Classifies OA resources by location and license terms as one of: gold, hybrid, bronze, green or closed. See here for more information on how we assign an oa_status: https://support.unpaywall.org/support/solutions/articles/44001777288-what-do-the-types-of-oa-status-green-gold-hybrid-and-bronze-mean-" - }, - { - "mode": "NULLABLE", - "name": "published_date", - "type": "DATE", - "description": "The date this resource was published. As reported by the publishers, who unfortunately have inconsistent definitions of what counts as officially \"published.\" Returned as an ISO8601-formatted timestamp, generally with only year-month-day." - }, - { - "mode": "NULLABLE", - "name": "publisher", - "type": "STRING", - "description": "The name of this resource's publisher. Keep in mind that publisher name strings change over time, particularly as publishers are acquired or split up." - }, - { - "mode": "NULLABLE", - "name": "title", - "type": "STRING", - "description": "The title of this resource." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this resource was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "year", - "type": "INTEGER", - "description": "The year this resource was published. Just the year part of the published_date" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "family", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "given", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "ORCID", - "type": "STRING", - "description": "URL-form of an ORCID identifier" - }, - { - "mode": "NULLABLE", - "name": "authenticated_orcid", - "type": "BOOLEAN", - "description": "If true, record owner asserts that the ORCID user completed ORCID OAuth authentication" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "affiliation", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "sequence", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "suffix", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "z_authors", - "type": "RECORD", - "description": "The authors of this resource. These are formatted as a list of Crossref Contributor objects, which are described in the Crossref API docs here: https://github.com/CrossRef/rest-api-doc/blob/master/api_format.md#contributor" - }, - { - "mode": "NULLABLE", - "name": "has_repository_copy", - "type": "BOOLEAN", - "description": "Is a full-text available in a repository?" - }, - { - "mode": "NULLABLE", - "name": "issn_l", - "type": "STRING" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "blank", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "x_reported_noncompliant_copies", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "x_error", - "type": "BOOLEAN" - } -] \ No newline at end of file diff --git a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2021-02-18.json b/academic_observatory_workflows/database/schema/unpaywall_snapshot_2021-02-18.json deleted file mode 100644 index a3a76efb9..000000000 --- a/academic_observatory_workflows/database/schema/unpaywall_snapshot_2021-02-18.json +++ /dev/null @@ -1,540 +0,0 @@ -[ - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "oa_date", - "type": "DATE", - "description": "When this document first became available at this location. oa_date is calculated differently for different host types and is not available for all oa_locations. See https://support.unpaywall.org/a/solutions/articles/44002063719 for details." - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "NULLABLE", - "name": "best_oa_location", - "type": "RECORD", - "description": "The best OA Location Object we could find for this DOI. The \"best\" location is determined using an algorithm that prioritizes publisher-hosted content first (eg Hybrid or Gold), then prioritizes versions closer to the version of record (PublishedVersion over AcceptedVersion), then more authoritative repositories (PubMed Central over CiteSeerX). Returns null if we couldn't find any OA Locations." - }, - { - "mode": "NULLABLE", - "name": "data_standard", - "type": "INTEGER", - "description": "Indicates the data collection approaches used for this resource. Possible values: '1' First-generation hybrid detection. Uses only data from the Crossref API to determine hybrid status. Does a good job for Elsevier articles and a few other publishers, but most publishers are not checked for hybrid. '2' Second-generation hybrid detection. Uses additional sources, checks all publishers for hybrid. Gets about 10x as much hybrid. data_standard==2 is the version used in the paper we wrote about the dataset." - }, - { - "mode": "NULLABLE", - "name": "doi", - "type": "STRING", - "description": "The DOI of this resource. This is always lowercase." - }, - { - "mode": "NULLABLE", - "name": "doi_url", - "type": "STRING", - "description": "The DOI in hyperlink form. This field simply contains \"https://doi.org/\" prepended to the doi field. It expresses the DOI in its correct format according to the Crossref DOI display guidelines." - }, - { - "mode": "NULLABLE", - "name": "genre", - "type": "STRING", - "description": "The type of resource. Currently the genre is identical to the Crossref-reported type of a given resource. The \"journal-article\" type is most common, but there are many others." - }, - { - "mode": "NULLABLE", - "name": "is_paratext", - "type": "BOOLEAN", - "description": "Is the item an ancillary part of a journal, like a table of contents? See here for more information on how we determine whether an article is paratext: https://support.unpaywall.org/support/solutions/articles/44001894783." - }, - { - "mode": "NULLABLE", - "name": "is_oa", - "type": "BOOLEAN", - "description": "Is there an OA copy of this resource. Convenience attribute; returns true when best_oa_location is not null." - }, - { - "mode": "NULLABLE", - "name": "journal_is_in_doaj", - "type": "BOOLEAN", - "description": "Is this resource published in a DOAJ-indexed journal. Useful for defining whether a resource is Gold OA (depending on your definition, see also journal_is_oa)." - }, - { - "mode": "NULLABLE", - "name": "journal_is_oa", - "type": "BOOLEAN", - "description": "Is this resource published in a completely OA journal.\tUseful for defining whether a resource is Gold OA. Includes any fully-OA journal, regardless of inclusion in DOAJ. This includes journals by all-OA publishers and journals that would otherwise be all Hybrid or Bronze OA." - }, - { - "mode": "NULLABLE", - "name": "journal_issns", - "type": "STRING", - "description": "Any ISSNs assigned to the journal publishing this resource. Separate ISSNs are sometimes assigned to print and electronic versions of the same journal. If there are multiple ISSNs, they are separated by commas. Example: 1232-1203,1532-6203" - }, - { - "mode": "NULLABLE", - "name": "journal_issn_l", - "type": "STRING", - "description": "A single ISSN for the journal publishing this resource. An ISSN-L can be used as a primary key for a journal when more than one ISSN is assigned to it. Resources' journal_issns are mapped to ISSN-Ls using the issn.org table, with some manual corrections." - }, - { - "mode": "NULLABLE", - "name": "journal_name", - "type": "STRING", - "description": "The name of the journal publishing this resource. The same journal may have multiple name strings (eg, \"J. Foo\", \"Journal of Foo\", \"JOURNAL OF FOO\", etc). These have not been fully normalized within our database, so use with care." - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "oa_date", - "type": "DATE", - "description": "When this document first became available at this location. oa_date is calculated differently for different host types and is not available for all oa_locations. See https://support.unpaywall.org/a/solutions/articles/44002063719 for details." - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "oa_locations", - "type": "RECORD", - "description": "List of all the OA Location objects associated with this resource. This list is unnecessary for the vast majority of use-cases, since you probably just want the best_oa_location. It's included primarily for research purposes." - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "oa_date", - "type": "DATE", - "description": "When this document first became available at this location. oa_date is calculated differently for different host types and is not available for all oa_locations. See https://support.unpaywall.org/a/solutions/articles/44002063719 for details." - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "id", - "type": "STRING" - } - ], - "mode": "NULLABLE", - "name": "first_oa_location", - "type": "RECORD", - "description": "The OA Location Object with the earliest oa_date. Returns null if we couldn't find any OA Locations." - }, - { - "mode": "NULLABLE", - "name": "oa_status", - "type": "STRING", - "description": "The OA status, or color, of this resource. Classifies OA resources by location and license terms as one of: gold, hybrid, bronze, green or closed. See here for more information on how we assign an oa_status: https://support.unpaywall.org/support/solutions/articles/44001777288-what-do-the-types-of-oa-status-green-gold-hybrid-and-bronze-mean-" - }, - { - "mode": "NULLABLE", - "name": "published_date", - "type": "DATE", - "description": "The date this resource was published. As reported by the publishers, who unfortunately have inconsistent definitions of what counts as officially \"published.\" Returned as an ISO8601-formatted timestamp, generally with only year-month-day." - }, - { - "mode": "NULLABLE", - "name": "publisher", - "type": "STRING", - "description": "The name of this resource's publisher. Keep in mind that publisher name strings change over time, particularly as publishers are acquired or split up." - }, - { - "mode": "NULLABLE", - "name": "title", - "type": "STRING", - "description": "The title of this resource." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this resource was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "year", - "type": "INTEGER", - "description": "The year this resource was published. Just the year part of the published_date" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "family", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "given", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "ORCID", - "type": "STRING", - "description": "URL-form of an ORCID identifier" - }, - { - "mode": "NULLABLE", - "name": "authenticated_orcid", - "type": "BOOLEAN", - "description": "If true, record owner asserts that the ORCID user completed ORCID OAuth authentication" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "affiliation", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "sequence", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "suffix", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "z_authors", - "type": "RECORD", - "description": "The authors of this resource. These are formatted as a list of Crossref Contributor objects, which are described in the Crossref API docs here: https://github.com/CrossRef/rest-api-doc/blob/master/api_format.md#contributor" - }, - { - "mode": "NULLABLE", - "name": "has_repository_copy", - "type": "BOOLEAN", - "description": "Is a full-text available in a repository?" - }, - { - "mode": "NULLABLE", - "name": "issn_l", - "type": "STRING" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "blank", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "x_reported_noncompliant_copies", - "type": "RECORD" - }, - { - "mode": "NULLABLE", - "name": "x_error", - "type": "BOOLEAN" - }, - { - "fields": [ - { - "mode": "NULLABLE", - "name": "evidence", - "type": "STRING", - "description": "How we found this OA location. Used for debugging. Don’t depend on the exact contents of this for anything, because values are subject to change without warning." - }, - { - "mode": "NULLABLE", - "name": "host_type", - "type": "STRING", - "description": "The type of host that serves this OA location. There are two possible values: 'publisher' means this location is served by the article’s publisher (in practice, this usually means it is hosted on the same domain the DOI resolves to). 'repository' means this location is served by an Open Access repository. Preprint servers are considered repositories even if the DOI resolves there." - }, - { - "mode": "NULLABLE", - "name": "is_best", - "type": "BOOLEAN", - "description": "Is this location the best_oa_location for its resource. See the DOI object's best_oa_location description for more on how we select which location is \"best.\"" - }, - { - "mode": "NULLABLE", - "name": "license", - "type": "STRING", - "description": "The license under which this copy is published. We return several types of licenses: Creative Commons licenses are uniformly abbreviated and lowercased. Example: 'cc-by-nc'. Publisher-specific licenses are normalized using this format: 'acs-specific: authorchoice/editors choice usage agreement'. When we have evidence that an OA license of some kind was used, but it’s not reported directly on the webpage at this location, this field returns 'implied-oa'" - }, - { - "mode": "NULLABLE", - "name": "oa_date", - "type": "DATE", - "description": "When this document first became available at this location. oa_date is calculated differently for different host types and is not available for all oa_locations. See https://support.unpaywall.org/a/solutions/articles/44002063719 for details." - }, - { - "mode": "NULLABLE", - "name": "pmh_id", - "type": "STRING", - "description": "OAI-PMH endpoint where we found this location. This is primarily for internal debugging. It's null for locations that weren't found using OAI-PMH." - }, - { - "mode": "NULLABLE", - "name": "updated", - "type": "TIMESTAMP", - "description": "Time when the data for this location was last updated. Returned as an ISO8601-formatted timestamp. Example: 2017-08-17T23:43:27.753663" - }, - { - "mode": "NULLABLE", - "name": "url", - "type": "STRING", - "description": "The url_for_pdf if there is one; otherwise landing page URL. When we can't find a url_for_pdf (or there isn't one), this field uses the url_for_landing_page, which is a useful fallback for some use cases." - }, - { - "mode": "NULLABLE", - "name": "url_for_landing_page", - "type": "STRING", - "description": "The URL for a landing page describing this OA copy. When the host_type is \"publisher\" the landing page usually includes HTML fulltext." - }, - { - "mode": "NULLABLE", - "name": "url_for_pdf", - "type": "STRING", - "description": "The URL with a PDF version of this OA copy." - }, - { - "mode": "NULLABLE", - "name": "version", - "type": "STRING", - "description": "The content version accessible at this location. We use the DRIVER Guidelines v2.0 VERSION standard (https://wiki.surfnet.nl/display/DRIVERguidelines/DRIVER-VERSION+Mappings) to define versions of a given article; see those docs for complete definitions of terms." - }, - { - "mode": "NULLABLE", - "name": "repository_institution", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "endpoint_id", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "oa_locations_embargoed", - "type": "RECORD", - "description": "List of OA Location objects associated with this resource that are not yet available. This list includes locations that we expect to be available in the future based on information like license metadata and journals' delayed OA policies. They do not affect the resource's oa_status and cannot be the best_oa_location or first_oa_location." - } -] \ No newline at end of file diff --git a/academic_observatory_workflows/database/schema/web_of_science_2020-09-01.json b/academic_observatory_workflows/database/schema/web_of_science/web_of_science_2020-09-01.json similarity index 100% rename from academic_observatory_workflows/database/schema/web_of_science_2020-09-01.json rename to academic_observatory_workflows/database/schema/web_of_science/web_of_science_2020-09-01.json diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/authors.json b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/authors.json index 723874c7c..a90eed227 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/authors.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/authors.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:afc8a5ccf0622898e380acfc448e92c5c58b1dabbc24e89765c34b6509cb8afe -size 3109 +oid sha256:3bd7abd62f450450d2a8a17645ee358af0fd4707c37b94ab5e6bbfe93729e072 +size 3549 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/concepts.json b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/concepts.json index c6867652e..f4a60de46 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/concepts.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/concepts.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:988200d47a2735a709a2e3f7ecd2a2c23fe67ea2176cf7baea9f48fbd5e84362 -size 56079 +oid sha256:e169145a2bf24ffcb0e44c20a7c1adcf86d6249f2ac6bd8b1b17af20965d2981 +size 63659 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/institutions.json b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/institutions.json index e4c86bd04..8f2e78cfd 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/institutions.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/institutions.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:feca1645be49678dd98e32e5ec0b4ffd7f6246ea311d12f01f50235851f5cce9 -size 38198 +oid sha256:15489b3de773811b568079c6faec6eed6b3eae93d5be839cd270ce73df73692f +size 40406 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/publishers.json b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/publishers.json index 375e75a25..29743dac5 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/publishers.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/publishers.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b49ae1aedf81fc4a8dd35379fa8a48dbf2ce13d02319c4691c5ed5a8e981da7 -size 7823 +oid sha256:073423dac00bcc41a554dece518f24ac7647fbc900da1417c8bfaeeb9066ad8b +size 8180 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/sources.json b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/sources.json index e2dbc29ff..22f864c3f 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/sources.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/sources.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ea484e9a3b91e514dba8fd52c42c9a2930996c78958f6590772faa2c453258b -size 23525 +oid sha256:a75e223b6812f803ae5b13340a53f6cb9703f9bcb0855bc6b0bc83d469e902e9 +size 23983 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/works.json b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/works.json index da2c8c66c..4f6c6fc64 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/works.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-02/expected/works.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9da5df45d1358debe55ebd123d68ed37d7ac07d01f715cf138609a66733a7c77 -size 64610 +oid sha256:1467ed5c6399cdfeb03739ae3ed59c72525165c1c7a908eccc0cf20e296b1efc +size 55858 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/authors.json b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/authors.json index 51b95a259..38a5cf053 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/authors.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/authors.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c80d56a1935f59b2fd3c43e0acdbde24151997a72fa4fc4fcd618e4cbe2bcde3 -size 3220 +oid sha256:ade95bb53f2ca4fb19b4cd73becd361fa4099fb5738c412a055008441fd50c57 +size 3660 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/concepts.json b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/concepts.json index 9344466f3..2ad233117 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/concepts.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/concepts.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b84fdad17c941c182f7741f59ca528a138dbc863de0fee655107f34a87f9744 -size 67440 +oid sha256:fd3b6945eb25e2925921d59465a39c5d848fa1e6bbf9bc5cfc701cbd13871a9c +size 75948 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/institutions.json b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/institutions.json index d34754f9b..62f8ca72d 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/institutions.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/institutions.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf405aa2cda7e680af83f5bed9e3d14ddd2e51eaf679f206fd0f8e3d5a183f53 -size 37563 +oid sha256:fdf6a182144f02c18d0ac02fc813a79f9f78062a2ea72bbf80aac909af60b869 +size 39267 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/publishers.json b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/publishers.json index cb509b74d..2a185c8fa 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/publishers.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/publishers.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d84cae48af88638403bb44254e872e9fd7d5a6258dce64ac5d109560645d0fa -size 9725 +oid sha256:73c4b4795ade705963241dfd4808c2f8f62395194b094ac93442d93b1489b3d9 +size 10176 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/sources.json b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/sources.json index 16bba62bb..2d0e789bc 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/sources.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/sources.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9dc4a2941f730087b8d8ba8668e51e3c35f136db2f68b6f0779e40694903fc79 -size 21697 +oid sha256:34587aa2c068d00e63bc9cac65d9c4562f1090e60221719b01d29df16db415f6 +size 22112 diff --git a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/works.json b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/works.json index ffe930f4b..9c90889e7 100644 --- a/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/works.json +++ b/academic_observatory_workflows/fixtures/openalex/2023-04-16/expected/works.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:922b592afb5d9b53dcdba737d5695b2447a32074ed62d326ef8456fef61c7e4c -size 53634 +oid sha256:eba5ae90442f039ce838f21726bfe822218a1007ef39f6bfe1c299fc7accd688 +size 46489 diff --git a/academic_observatory_workflows/fixtures/unpaywall/expected/run3_bq_upsert_records.json b/academic_observatory_workflows/fixtures/unpaywall/expected/run3_bq_upsert_records.json index 9c4adcf58..b06eab2aa 100644 --- a/academic_observatory_workflows/fixtures/unpaywall/expected/run3_bq_upsert_records.json +++ b/academic_observatory_workflows/fixtures/unpaywall/expected/run3_bq_upsert_records.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7362e353deee208ea2e27f2b7ed79ce8ee30a836e437069f01b7dec9c04bcb07 -size 58098 +oid sha256:dd759c19ead8b35cf0b6489a6a7460118b3e851bcff51211399a857cd0f88bfd +size 58096 diff --git a/academic_observatory_workflows/model.py b/academic_observatory_workflows/model.py index 0e6171864..2356cf698 100644 --- a/academic_observatory_workflows/model.py +++ b/academic_observatory_workflows/model.py @@ -1153,7 +1153,6 @@ def bq_load_observatory_dataset( mag_affiliation_override = load_jsonl(os.path.join(test_doi_path, "mag_affiliation_override.jsonl")) schema_path = schema_folder() - openalex_schema_path = os.path.join(schema_path, "openalex") with CliRunner().isolated_filesystem() as t: tables = [ Table( @@ -1161,105 +1160,135 @@ def bq_load_observatory_dataset( False, dataset_id_settings, repository, - bq_find_schema(path=schema_path, table_name="repository"), + bq_find_schema(path=os.path.join(schema_path, "doi"), table_name="repository"), ), Table( "crossref_events", False, dataset_id_all, crossref_events, - bq_find_schema(path=schema_path, table_name="crossref_events", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "crossref_events"), + table_name="crossref_events", + release_date=snapshot_date, + ), ), Table( "crossref_metadata", True, dataset_id_all, crossref_metadata, - bq_find_schema(path=schema_path, table_name="crossref_metadata", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "crossref_metadata"), + table_name="crossref_metadata", + release_date=snapshot_date, + ), ), Table( "crossref_fundref", True, dataset_id_all, crossref_fundref, - bq_find_schema(path=schema_path, table_name="crossref_fundref", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "crossref_fundref"), + table_name="crossref_fundref", + release_date=snapshot_date, + ), ), Table( "Affiliations", True, dataset_id_all, mag.affiliations, - bq_find_schema(path=schema_path, table_name="MagAffiliations", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), table_name="MagAffiliations", release_date=snapshot_date + ), ), Table( "FieldsOfStudy", True, dataset_id_all, mag.fields_of_study, - bq_find_schema(path=schema_path, table_name="MagFieldsOfStudy", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), table_name="MagFieldsOfStudy", release_date=snapshot_date + ), ), Table( "PaperAuthorAffiliations", True, dataset_id_all, mag.paper_author_affiliations, - bq_find_schema(path=schema_path, table_name="MagPaperAuthorAffiliations", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), + table_name="MagPaperAuthorAffiliations", + release_date=snapshot_date, + ), ), Table( "PaperFieldsOfStudy", True, dataset_id_all, mag.paper_fields_of_study, - bq_find_schema(path=schema_path, table_name="MagPaperFieldsOfStudy", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), + table_name="MagPaperFieldsOfStudy", + release_date=snapshot_date, + ), ), Table( "Papers", True, dataset_id_all, mag.papers, - bq_find_schema(path=schema_path, table_name="MagPapers", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), table_name="MagPapers", release_date=snapshot_date + ), ), Table( "open_citations", True, dataset_id_all, open_citations, - bq_find_schema(path=schema_path, table_name="open_citations", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "open_citations"), + table_name="open_citations", + release_date=snapshot_date, + ), ), Table( "unpaywall", False, dataset_id_all, unpaywall, - bq_find_schema(path=schema_path, table_name="unpaywall"), + bq_find_schema(path=os.path.join(schema_path, "unpaywall"), table_name="unpaywall"), ), Table( "ror", True, dataset_id_all, ror, - bq_find_schema(path=schema_path, table_name="ror", release_date=snapshot_date), + bq_find_schema(path=os.path.join(schema_path, "doi"), table_name="ror", release_date=snapshot_date), ), Table( "country", False, dataset_id_settings, country, - bq_find_schema(path=schema_path, table_name="country"), + bq_find_schema(path=os.path.join(schema_path, "doi"), table_name="country"), ), Table( "groupings", False, dataset_id_settings, groupings, - bq_find_schema(path=schema_path, table_name="groupings"), + bq_find_schema(path=os.path.join(schema_path, "doi"), table_name="groupings"), ), Table( "mag_affiliation_override", False, dataset_id_settings, mag_affiliation_override, - bq_find_schema(path=schema_path, table_name="mag_affiliation_override"), + bq_find_schema(path=os.path.join(schema_path, "doi"), table_name="mag_affiliation_override"), ), Table( "PaperAbstractsInvertedIndex", @@ -1267,7 +1296,9 @@ def bq_load_observatory_dataset( dataset_id_all, [], bq_find_schema( - path=schema_path, table_name="MagPaperAbstractsInvertedIndex", release_date=snapshot_date + path=os.path.join(schema_path, "mag"), + table_name="MagPaperAbstractsInvertedIndex", + release_date=snapshot_date, ), ), Table( @@ -1275,21 +1306,29 @@ def bq_load_observatory_dataset( True, dataset_id_all, [], - bq_find_schema(path=schema_path, table_name="MagJournals", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), table_name="MagJournals", release_date=snapshot_date + ), ), Table( "ConferenceInstances", True, dataset_id_all, [], - bq_find_schema(path=schema_path, table_name="MagConferenceInstances", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), + table_name="MagConferenceInstances", + release_date=snapshot_date, + ), ), Table( "ConferenceSeries", True, dataset_id_all, [], - bq_find_schema(path=schema_path, table_name="MagConferenceSeries", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), table_name="MagConferenceSeries", release_date=snapshot_date + ), ), Table( "FieldOfStudyExtendedAttributes", @@ -1297,7 +1336,9 @@ def bq_load_observatory_dataset( dataset_id_all, [], bq_find_schema( - path=schema_path, table_name="MagFieldOfStudyExtendedAttributes", release_date=snapshot_date + path=os.path.join(schema_path, "mag"), + table_name="MagFieldOfStudyExtendedAttributes", + release_date=snapshot_date, ), ), Table( @@ -1305,42 +1346,52 @@ def bq_load_observatory_dataset( True, dataset_id_all, [], - bq_find_schema(path=schema_path, table_name="MagPaperExtendedAttributes", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), + table_name="MagPaperExtendedAttributes", + release_date=snapshot_date, + ), ), Table( "PaperResources", True, dataset_id_all, [], - bq_find_schema(path=schema_path, table_name="MagPaperResources", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), table_name="MagPaperResources", release_date=snapshot_date + ), ), Table( "PaperUrls", True, dataset_id_all, [], - bq_find_schema(path=schema_path, table_name="MagPaperUrls", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), table_name="MagPaperUrls", release_date=snapshot_date + ), ), Table( "PaperMeSH", True, dataset_id_all, [], - bq_find_schema(path=schema_path, table_name="MagPaperMeSH", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_path, "mag"), table_name="MagPaperMeSH", release_date=snapshot_date + ), ), Table( "orcid", False, dataset_id_all, [], - bq_find_schema(path=schema_path, table_name="orcid", release_date=snapshot_date), + bq_find_schema(path=os.path.join(schema_path, "orcid"), table_name="orcid", release_date=snapshot_date), ), Table( "works", False, dataset_id_all, openalex, - bq_find_schema(path=openalex_schema_path, table_name="works"), + bq_find_schema(path=os.path.join(schema_path, "openalex"), table_name="works"), ), ] diff --git a/academic_observatory_workflows/workflows/crossref_fundref_telescope.py b/academic_observatory_workflows/workflows/crossref_fundref_telescope.py index c941e118a..c110cb792 100644 --- a/academic_observatory_workflows/workflows/crossref_fundref_telescope.py +++ b/academic_observatory_workflows/workflows/crossref_fundref_telescope.py @@ -79,7 +79,7 @@ def __init__( bq_dataset_id: str = "crossref", bq_table_name: str = "crossref_fundref", api_dataset_id: str = "crossref_fundref", - schema_folder: str = default_schema_folder(), + schema_folder: str = os.path.join(default_schema_folder(), "crossref_rundref"), dataset_description: str = "Datasets created by Crossref: https://www.crossref.org/", table_description: str = "The Crossref Funder Registry dataset: https://www.crossref.org/services/funder-registry/", observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API, diff --git a/academic_observatory_workflows/workflows/crossref_metadata_telescope.py b/academic_observatory_workflows/workflows/crossref_metadata_telescope.py index 6b1e43166..c1f7dd5e0 100644 --- a/academic_observatory_workflows/workflows/crossref_metadata_telescope.py +++ b/academic_observatory_workflows/workflows/crossref_metadata_telescope.py @@ -94,7 +94,7 @@ def __init__( bq_dataset_id: str = "crossref", bq_table_name: str = "crossref_metadata", api_dataset_id: str = "crossref_metadata", - schema_folder: str = default_schema_folder(), + schema_folder: str = os.path.join(default_schema_folder(), "crossref_metadata"), dataset_description: str = "Datasets created by Crossref: https://www.crossref.org/", table_description: str = "The Crossref Metadata Plus dataset: https://www.crossref.org/services/metadata-retrieval/metadata-plus/", crossref_metadata_conn_id: str = "crossref_metadata", diff --git a/academic_observatory_workflows/workflows/geonames_telescope.py b/academic_observatory_workflows/workflows/geonames_telescope.py index 1cddb61d5..54a18e7d7 100644 --- a/academic_observatory_workflows/workflows/geonames_telescope.py +++ b/academic_observatory_workflows/workflows/geonames_telescope.py @@ -98,7 +98,7 @@ def __init__( bq_dataset_id: str = "geonames", bq_table_name: str = "geonames", api_dataset_id: str = "geonames", - schema_folder: str = default_schema_folder(), + schema_folder: str = os.path.join(default_schema_folder(), "geonames"), dataset_description: str = "The GeoNames geographical database: https://www.geonames.org/", table_description: str = "The GeoNames geographical database: https://www.geonames.org/", observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API, diff --git a/academic_observatory_workflows/workflows/open_citations_telescope.py b/academic_observatory_workflows/workflows/open_citations_telescope.py index 00d1e406a..d16edad71 100644 --- a/academic_observatory_workflows/workflows/open_citations_telescope.py +++ b/academic_observatory_workflows/workflows/open_citations_telescope.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import os.path # Author: James Diprose, Tuan Chien import zipfile @@ -70,7 +70,7 @@ def __init__( bq_dataset_id: str = "open_citations", bq_table_name: str = "open_citations", api_dataset_id: str = "open_citations", - schema_folder: str = default_schema_folder(), + schema_folder: str = os.path.join(default_schema_folder(), "open_citations"), dataset_description: str = "The OpenCitations Indexes: http://opencitations.net/", table_description: str = "The OpenCitations COCI CSV table: http://opencitations.net/", observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API, diff --git a/academic_observatory_workflows/workflows/ror_telescope.py b/academic_observatory_workflows/workflows/ror_telescope.py index 87c6b9d44..1b4be120e 100644 --- a/academic_observatory_workflows/workflows/ror_telescope.py +++ b/academic_observatory_workflows/workflows/ror_telescope.py @@ -83,7 +83,7 @@ def __init__( bq_dataset_id: str = "ror", bq_table_name: str = "ror", api_dataset_id: str = "ror", - schema_folder: str = default_schema_folder(), + schema_folder: str = os.path.join(default_schema_folder(), "ror"), dataset_description: str = "The Research Organization Registry (ROR) database: https://ror.org/", table_description: str = "The Research Organization Registry (ROR) database: https://ror.org/", observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API, diff --git a/academic_observatory_workflows/workflows/scopus_telescope.py b/academic_observatory_workflows/workflows/scopus_telescope.py index dd67c1c6b..2fae4fd39 100644 --- a/academic_observatory_workflows/workflows/scopus_telescope.py +++ b/academic_observatory_workflows/workflows/scopus_telescope.py @@ -104,7 +104,7 @@ def __init__( bq_dataset_id: str = "elsevier", bq_table_name: str = "scopus", api_dataset_id: str = "scopus", - schema_folder: str = default_schema_folder(), + schema_folder: str = os.path.join(default_schema_folder(), "scopus"), dataset_description: str = "The Scopus citation database: https://www.scopus.com", table_description: str = "The Scopus citation database: https://www.scopus.com", observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API, diff --git a/academic_observatory_workflows/workflows/tests/test_oa_web_workflow.py b/academic_observatory_workflows/workflows/tests/test_oa_web_workflow.py index d436a4a50..e9621f929 100644 --- a/academic_observatory_workflows/workflows/tests/test_oa_web_workflow.py +++ b/academic_observatory_workflows/workflows/tests/test_oa_web_workflow.py @@ -433,7 +433,6 @@ def setup_tables( institution = load_jsonl(test_fixtures_folder(self.oa_web_fixtures, "institution.jsonl.gz")) settings_country = load_jsonl(test_fixtures_folder("doi", "country.jsonl")) - analysis_schema_path = schema_folder() oa_web_schema_path = test_fixtures_folder(self.oa_web_fixtures, "schema") with CliRunner().isolated_filesystem() as t: tables = [ @@ -442,28 +441,32 @@ def setup_tables( True, dataset_id_all, ror, - bq_find_schema(path=analysis_schema_path, table_name="ror", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_folder(), "ror"), table_name="ror", release_date=snapshot_date + ), ), Table( "country", True, dataset_id_all, country, - bq_find_schema(path=oa_web_schema_path, table_name="country"), + bq_find_schema(path=os.path.join(schema_folder(), "doi"), table_name="country"), ), Table( "institution", True, dataset_id_all, institution, - bq_find_schema(path=oa_web_schema_path, table_name="institution", release_date=snapshot_date), + bq_find_schema( + path=os.path.join(schema_folder(), "doi"), table_name="institution", release_date=snapshot_date + ), ), Table( "country", False, dataset_id_settings, settings_country, - bq_find_schema(path=analysis_schema_path, table_name="country"), + bq_find_schema(path=os.path.join(schema_folder(), "doi"), table_name="country"), ), ] diff --git a/academic_observatory_workflows/workflows/unpaywall_telescope.py b/academic_observatory_workflows/workflows/unpaywall_telescope.py index 19319c78f..a17bf1463 100644 --- a/academic_observatory_workflows/workflows/unpaywall_telescope.py +++ b/academic_observatory_workflows/workflows/unpaywall_telescope.py @@ -170,7 +170,7 @@ def __init__( bq_dataset_id: str = "our_research", bq_table_name: str = "unpaywall", api_dataset_id: str = "unpaywall", - schema_folder: str = default_schema_folder(), + schema_folder: str = os.path.join(default_schema_folder(), "unpaywall"), dataset_description: str = "Our Research datasets: http://ourresearch.org/", table_description: str = "Unpaywall Data Feed: https://unpaywall.org/products/data-feed", merge_primary_key: str = "doi", diff --git a/academic_observatory_workflows/workflows/web_of_science_telescope.py b/academic_observatory_workflows/workflows/web_of_science_telescope.py index b9bd16e24..a7308a682 100644 --- a/academic_observatory_workflows/workflows/web_of_science_telescope.py +++ b/academic_observatory_workflows/workflows/web_of_science_telescope.py @@ -102,7 +102,7 @@ def __init__( bq_dataset_id: str = "clarivate", bq_table_name: str = "web_of_science", api_dataset_id: str = "web_of_science", - schema_folder: str = default_schema_folder(), + schema_folder: str = os.path.join(default_schema_folder(), "web_of_science"), dataset_description: str = "The Web of Science citation database: https://clarivate.com/webofsciencegroup/solutions/web-of-science", table_description: str = "The Web of Science citation database: https://clarivate.com/webofsciencegroup/solutions/web-of-science", observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API,