From 15bb575208e200537511c41c66cd389345d0be93 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Fri, 17 May 2024 16:29:37 -0400 Subject: [PATCH 01/32] [Institution] Adding AuthorInstitution model --- src/user/migrations/0106_authorinstitution.py | 61 +++++++++++++++++++ src/user/related_models/author_institution.py | 25 ++++++++ src/user/related_models/author_model.py | 1 + 3 files changed, 87 insertions(+) create mode 100644 src/user/migrations/0106_authorinstitution.py create mode 100644 src/user/related_models/author_institution.py diff --git a/src/user/migrations/0106_authorinstitution.py b/src/user/migrations/0106_authorinstitution.py new file mode 100644 index 000000000..5a5a16802 --- /dev/null +++ b/src/user/migrations/0106_authorinstitution.py @@ -0,0 +1,61 @@ +# Generated by Django 4.1 on 2024-05-17 20:27 + +import django.contrib.postgres.fields +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("institution", "0004_institution_openalex_created_date_and_more"), + ("user", "0105_remove_author_academic_verification_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="AuthorInstitution", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("created_date", models.DateTimeField(auto_now_add=True)), + ("updated_date", models.DateTimeField(auto_now=True)), + ( + "years", + django.contrib.postgres.fields.ArrayField( + base_field=models.IntegerField(), + blank=True, + default=list, + size=None, + ), + ), + ("is_primary", models.BooleanField(default=False)), + ( + "author", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="institutions", + to="user.author", + ), + ), + ( + "institution", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="author_institutions", + to="institution.institution", + ), + ), + ], + options={ + "unique_together": {("author", "institution")}, + }, + ), + ] diff --git a/src/user/related_models/author_institution.py b/src/user/related_models/author_institution.py new file mode 100644 index 000000000..85202aef1 --- /dev/null +++ b/src/user/related_models/author_institution.py @@ -0,0 +1,25 @@ +from django.contrib.postgres.fields import ArrayField +from django.db import models + +from utils.models import DefaultModel + + +class AuthorInstitution(DefaultModel): + author = models.ForeignKey( + "user.Author", + on_delete=models.CASCADE, + related_name="institutions", + ) + + institution = models.ForeignKey( + "institution.Institution", + on_delete=models.CASCADE, + related_name="author_institutions", + ) + + years = ArrayField(models.IntegerField(), blank=True, default=list) + + is_primary = models.BooleanField(default=False) + + class Meta: + unique_together = ["author", "institution"] diff --git a/src/user/related_models/author_model.py b/src/user/related_models/author_model.py index 6f44ed307..4909ced71 100644 --- a/src/user/related_models/author_model.py +++ b/src/user/related_models/author_model.py @@ -9,6 +9,7 @@ from paper.utils import PAPER_SCORE_Q_ANNOTATION from purchase.related_models.purchase_model import Purchase from researchhub_case.constants.case_constants import APPROVED +from user.related_models.author_institution import AuthorInstitution from user.related_models.profile_image_storage import ProfileImageStorage from user.related_models.school_model import University from user.related_models.user_model import User From ba64987c2a891329c011b2505582d8d878e16b8f Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Fri, 17 May 2024 18:31:04 -0400 Subject: [PATCH 02/32] [Author] Adding 2 yr mean citedeness --- .../0107_author_two_year_mean_citedness.py | 18 ++++++++++++++++++ src/user/related_models/author_model.py | 3 +++ 2 files changed, 21 insertions(+) create mode 100644 src/user/migrations/0107_author_two_year_mean_citedness.py diff --git a/src/user/migrations/0107_author_two_year_mean_citedness.py b/src/user/migrations/0107_author_two_year_mean_citedness.py new file mode 100644 index 000000000..c8a93051b --- /dev/null +++ b/src/user/migrations/0107_author_two_year_mean_citedness.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1 on 2024-05-17 22:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0106_authorinstitution"), + ] + + operations = [ + migrations.AddField( + model_name="author", + name="two_year_mean_citedness", + field=models.FloatField(default=0), + ), + ] diff --git a/src/user/related_models/author_model.py b/src/user/related_models/author_model.py index 4909ced71..a222a2dd6 100644 --- a/src/user/related_models/author_model.py +++ b/src/user/related_models/author_model.py @@ -72,6 +72,9 @@ class Author(models.Model): max_length=20, ) + # AKA Impact Factor. Derived from OpenAlex: https://en.wikipedia.org/wiki/Impact_factor + two_year_mean_citedness = models.FloatField(default=0) + def __str__(self): university = self.university if university is None: From d146ba052bc8c9fac0a905eaf9a0f24ce9b8c3b5 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Fri, 17 May 2024 18:36:36 -0400 Subject: [PATCH 03/32] [Author] Remove AuthorCitation model --- src/user/models.py | 2 -- src/user/related_models/author_citation_model.py | 12 ------------ 2 files changed, 14 deletions(-) delete mode 100644 src/user/related_models/author_citation_model.py diff --git a/src/user/models.py b/src/user/models.py index 4c682bb91..6787abee5 100644 --- a/src/user/models.py +++ b/src/user/models.py @@ -1,5 +1,4 @@ from .related_models.action_model import Action -from .related_models.author_citation_model import AuthorCitation from .related_models.author_model import Author from .related_models.follow_model import Follow from .related_models.gatekeeper_model import Gatekeeper @@ -13,7 +12,6 @@ migratables = ( Action, Author, - AuthorCitation, Follow, Major, ProfileImageStorage, diff --git a/src/user/related_models/author_citation_model.py b/src/user/related_models/author_citation_model.py deleted file mode 100644 index ddabcba36..000000000 --- a/src/user/related_models/author_citation_model.py +++ /dev/null @@ -1,12 +0,0 @@ -from django.db import models - - -class AuthorCitation(models.Model): - author = models.ForeignKey( - "user.Author", related_name="citations", on_delete=models.CASCADE - ) - citation_count = models.IntegerField(default=0) - citation_name = models.CharField(max_length=256) - cited_by_url = models.URLField(max_length=256, null=True) - publish_year = models.CharField(max_length=4) - title = models.CharField(max_length=256) From 10aaeef1516b2d9afbc748df5024b604192aaad1 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Fri, 17 May 2024 18:51:39 -0400 Subject: [PATCH 04/32] Removing author citation --- .../migrations/0108_delete_authorcitation.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/user/migrations/0108_delete_authorcitation.py diff --git a/src/user/migrations/0108_delete_authorcitation.py b/src/user/migrations/0108_delete_authorcitation.py new file mode 100644 index 000000000..7333e07a4 --- /dev/null +++ b/src/user/migrations/0108_delete_authorcitation.py @@ -0,0 +1,16 @@ +# Generated by Django 4.1 on 2024-05-17 22:51 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0107_author_two_year_mean_citedness"), + ] + + operations = [ + migrations.DeleteModel( + name="AuthorCitation", + ), + ] From 5a53908f2b17794c5e072450a1047c004c70c8c3 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Fri, 17 May 2024 22:06:04 -0400 Subject: [PATCH 05/32] WIP Creating authorships --- src/paper/openalex_util.py | 44 ++++++++++++++++++- .../commands/load_works_from_openalex.py | 1 + 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index 2ecae9fe7..42388fa32 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -8,8 +8,6 @@ import utils.sentry as sentry from paper.utils import get_cache_key -from purchase.models import Wallet -from tag.models import Concept from utils.http import check_url_contains_pdf from utils.openalex import OpenAlex @@ -44,8 +42,13 @@ def process_openalex_works(works): + from institution.models import Institution from paper.models import Paper from paper.paper_upload_tasks import create_paper_related_tags + from paper.related_models.authorship_model import Authorship + from purchase.models import Wallet + from tag.models import Concept + from user.related_models.author_model import Author open_alex = OpenAlex() @@ -156,3 +159,40 @@ def process_openalex_works(works): for paper_id, topics_and_concepts in paper_to_topics_and_concepts.items(): openalex_concepts, openalex_topics = topics_and_concepts create_paper_related_tags(paper_id, openalex_concepts, openalex_topics) + + openalex_authorships = work.get("authorships") + + print("work:", work) + + for oa_authorship in openalex_authorships: + author_position = oa_authorship.get("author_position") + author_openalex_id = oa_authorship.get("author", {}).get("id") + + author = None + try: + author = Author.objects.get(openalex_ids__contains=[author_openalex_id]) + except Author.DoesNotExist: + author_name_parts = ( + oa_authorship.get("author", {}).get("display_name").split(" ") + ) + author = Author.objects.create( + first_name=author_name_parts[0], + last_name=author_name_parts[-1], + openalex_ids=[author_openalex_id], + ) + Wallet.objects.create(author=author) + + # Create authorship + affiliated_institutions = Institution.objects.filter( + openalex_id__in=[ + inst["id"] for inst in oa_authorship.get("institutions", []) + ] + ) + authorship = Authorship.objects.create( + author=author, + author_position=author_position, + paper=paper, + institutions=affiliated_institutions, + is_corresponding=oa_authorship.get("is_corresponding"), + raw_author_name=oa_authorship.get("author", {}).get("display_name"), + ) diff --git a/src/researchhub_document/management/commands/load_works_from_openalex.py b/src/researchhub_document/management/commands/load_works_from_openalex.py index 5a8293cb2..194d6af68 100644 --- a/src/researchhub_document/management/commands/load_works_from_openalex.py +++ b/src/researchhub_document/management/commands/load_works_from_openalex.py @@ -95,3 +95,4 @@ def handle(self, *args, **kwargs): works, cursor = OA.get_works(source_id=source, next_cursor=cursor) process_openalex_works(works) page += 1 + return From f1d0e6bcc67d20f0f74472e2bad93fd832388267 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Sat, 18 May 2024 22:34:08 -0400 Subject: [PATCH 06/32] [OpenAlex] Create authorship records --- src/paper/openalex_util.py | 96 ++++++++++++------- .../commands/load_works_from_openalex.py | 4 +- 2 files changed, 63 insertions(+), 37 deletions(-) diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index 42388fa32..ffc216020 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -1,3 +1,4 @@ +import copy import logging from django.apps import apps @@ -85,16 +86,18 @@ def process_openalex_works(works): else: create_papers.append(work) - paper_to_topics_and_concepts = {} + paper_to_openalex_data = {} # Create new papers with transaction.atomic(): for work in create_papers: + _work = copy.deepcopy(work) + ( data, openalex_concepts, openalex_topics, - ) = open_alex.build_paper_from_openalex_work(work) + ) = open_alex.build_paper_from_openalex_work(_work) paper = Paper(**data) @@ -120,18 +123,20 @@ def process_openalex_works(works): e, message=f"Failed to save paper, unexpected error: {paper.doi}" ) - paper_to_topics_and_concepts[paper.id] = ( - openalex_concepts, - openalex_topics, - ) + paper_to_openalex_data[paper.id] = { + "openalex_concepts": openalex_concepts, + "openalex_topics": openalex_topics, + "openalex_work": work, + } # Prepare papers for batch update for existing_paper, work in update_papers: + _work = copy.deepcopy(work) ( data, openalex_concepts, openalex_topics, - ) = open_alex.build_paper_from_openalex_work(work) + ) = open_alex.build_paper_from_openalex_work(_work) # we didn't fetch all fields in the initial paper query (we used .only()), # so we need to explicitly fetch them if we want to update them. @@ -141,10 +146,11 @@ def process_openalex_works(works): for field in PAPER_FIELDS_ALLOWED_TO_UPDATE: setattr(existing_paper, field, data.get(field)) - paper_to_topics_and_concepts[existing_paper.id] = ( - openalex_concepts, - openalex_topics, - ) + paper_to_openalex_data[existing_paper.id] = { + "openalex_concepts": openalex_concepts, + "openalex_topics": openalex_topics, + "openalex_work": work, + } # perform batch update if update_papers and len(update_papers) > 0: @@ -156,31 +162,30 @@ def process_openalex_works(works): sentry.log_error(e, message="Failed to bulk update papers") # Upsert concepts and associate to papers - for paper_id, topics_and_concepts in paper_to_topics_and_concepts.items(): - openalex_concepts, openalex_topics = topics_and_concepts - create_paper_related_tags(paper_id, openalex_concepts, openalex_topics) - - openalex_authorships = work.get("authorships") + for paper_id, paper_data in paper_to_openalex_data.items(): + create_paper_related_tags( + paper_id, paper_data["openalex_concepts"], paper_data["openalex_topics"] + ) - print("work:", work) + openalex_authorships = paper_data["openalex_work"].get("authorships") - for oa_authorship in openalex_authorships: - author_position = oa_authorship.get("author_position") - author_openalex_id = oa_authorship.get("author", {}).get("id") + for oa_authorship in openalex_authorships: + author_position = oa_authorship.get("author_position") + author_openalex_id = oa_authorship.get("author", {}).get("id") - author = None - try: - author = Author.objects.get(openalex_ids__contains=[author_openalex_id]) - except Author.DoesNotExist: - author_name_parts = ( - oa_authorship.get("author", {}).get("display_name").split(" ") - ) - author = Author.objects.create( - first_name=author_name_parts[0], - last_name=author_name_parts[-1], - openalex_ids=[author_openalex_id], - ) - Wallet.objects.create(author=author) + author = None + try: + author = Author.objects.get(openalex_ids__contains=[author_openalex_id]) + except Author.DoesNotExist: + author_name_parts = ( + oa_authorship.get("author", {}).get("display_name").split(" ") + ) + author = Author.objects.create( + first_name=author_name_parts[0], + last_name=author_name_parts[-1], + openalex_ids=[author_openalex_id], + ) + Wallet.objects.create(author=author) # Create authorship affiliated_institutions = Institution.objects.filter( @@ -188,11 +193,30 @@ def process_openalex_works(works): inst["id"] for inst in oa_authorship.get("institutions", []) ] ) - authorship = Authorship.objects.create( + + # Find or create authorship + authorship, created = Authorship.objects.get_or_create( author=author, author_position=author_position, - paper=paper, - institutions=affiliated_institutions, + paper_id=paper_id, is_corresponding=oa_authorship.get("is_corresponding"), raw_author_name=oa_authorship.get("author", {}).get("display_name"), ) + + # Get affiliated institutions + affiliated_institutions = Institution.objects.filter( + openalex_id__in=[ + inst["id"] for inst in oa_authorship.get("institutions", []) + ] + ) + + # Set institutions to authorship if they are not already set + existing_institutions = authorship.institutions.all() + new_institutions = [ + inst + for inst in affiliated_institutions + if inst not in existing_institutions + ] + + if new_institutions: + authorship.institutions.add(*new_institutions) diff --git a/src/researchhub_document/management/commands/load_works_from_openalex.py b/src/researchhub_document/management/commands/load_works_from_openalex.py index 194d6af68..863a95c9e 100644 --- a/src/researchhub_document/management/commands/load_works_from_openalex.py +++ b/src/researchhub_document/management/commands/load_works_from_openalex.py @@ -92,7 +92,9 @@ def handle(self, *args, **kwargs): page = 1 while cursor: print("Processing page " + str(page)) - works, cursor = OA.get_works(source_id=source, next_cursor=cursor) + works, cursor = OA.get_works( + source_id=source, type="article", next_cursor=cursor + ) process_openalex_works(works) page += 1 return From 8c995c08d77f40ce3300efedd4d53cb5260ef7c7 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Sun, 19 May 2024 18:50:38 -0400 Subject: [PATCH 07/32] [Author] Fetch and set additional data from OpenAlex --- src/paper/openalex_util.py | 34 ++++++++++++++++++++++++++-------- src/utils/openalex.py | 24 ++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index ffc216020..d24a39cab 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -163,16 +163,21 @@ def process_openalex_works(works): # Upsert concepts and associate to papers for paper_id, paper_data in paper_to_openalex_data.items(): + work = paper_data["openalex_work"] create_paper_related_tags( paper_id, paper_data["openalex_concepts"], paper_data["openalex_topics"] ) - openalex_authorships = paper_data["openalex_work"].get("authorships") + openalex_authorships = work.get("authorships") + authors_need_additional_data_fetch = [] for oa_authorship in openalex_authorships: author_position = oa_authorship.get("author_position") author_openalex_id = oa_authorship.get("author", {}).get("id") + just_id = author_openalex_id.split("/")[-1] + authors_need_additional_data_fetch.append(just_id) + author = None try: author = Author.objects.get(openalex_ids__contains=[author_openalex_id]) @@ -187,13 +192,6 @@ def process_openalex_works(works): ) Wallet.objects.create(author=author) - # Create authorship - affiliated_institutions = Institution.objects.filter( - openalex_id__in=[ - inst["id"] for inst in oa_authorship.get("institutions", []) - ] - ) - # Find or create authorship authorship, created = Authorship.objects.get_or_create( author=author, @@ -220,3 +218,23 @@ def process_openalex_works(works): if new_institutions: authorship.institutions.add(*new_institutions) + + # Update authors with additional data + oa_authors, _ = open_alex.get_authors( + openalex_ids=authors_need_additional_data_fetch + ) + for oa_author in oa_authors: + try: + author = Author.objects.get( + openalex_ids__contains=[oa_author.get("id")] + ) + except Author.DoesNotExist: + continue + + author.i10_index = oa_author.get("summary_stats", {}).get("i10_index") + author.h_index = oa_author.get("summary_stats", {}).get("h_index") + author.two_year_mean_citedness = oa_author.get("summary_stats", {}).get( + "2yr_mean_citedness" + ) + author.orcid_id = oa_author.get("orcid") + author.save() diff --git a/src/utils/openalex.py b/src/utils/openalex.py index 492270696..a31a8841d 100644 --- a/src/utils/openalex.py +++ b/src/utils/openalex.py @@ -310,6 +310,30 @@ def get_topics(self, next_cursor="*", page=1, batch_size=100): cursor = next_cursor if next_cursor != "*" else None return topics, cursor + def get_authors( + self, + next_cursor="*", + batch_size=100, + openalex_ids=None, + ): + # Build the filter + oa_filters = [] + + if openalex_ids: + oa_filters.append(f"ids.openalex:{'|'.join(openalex_ids)}") + + filters = { + "filter": ",".join(oa_filters), + "per-page": batch_size, + "cursor": next_cursor, + } + + response = self._get("authors", filters=filters) + authors = response.get("results", []) + next_cursor = response.get("meta", {}).get("next_cursor") + cursor = next_cursor if next_cursor != "*" else None + return authors, cursor + def get_works( self, since_date=None, From 5e99352be05b113cc96102c6f30f1dbffef0da44 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Mon, 20 May 2024 10:42:09 -0400 Subject: [PATCH 08/32] [Author] Associate paper with authors when processing --- src/paper/openalex_util.py | 144 +++++++++--------- .../tests/test_process_openalex_works.py | 12 ++ 2 files changed, 88 insertions(+), 68 deletions(-) diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index d24a39cab..07aa5ff1d 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -1,15 +1,10 @@ import copy -import logging -from django.apps import apps -from django.core.cache import cache from django.core.exceptions import ValidationError from django.db import IntegrityError, transaction from django.db.models import Q import utils.sentry as sentry -from paper.utils import get_cache_key -from utils.http import check_url_contains_pdf from utils.openalex import OpenAlex # Only these particular fields will be updated when an OpenAlex @@ -43,13 +38,8 @@ def process_openalex_works(works): - from institution.models import Institution from paper.models import Paper from paper.paper_upload_tasks import create_paper_related_tags - from paper.related_models.authorship_model import Authorship - from purchase.models import Wallet - from tag.models import Concept - from user.related_models.author_model import Author open_alex = OpenAlex() @@ -169,72 +159,90 @@ def process_openalex_works(works): ) openalex_authorships = work.get("authorships") + process_openalex_authorships(openalex_authorships, paper_id) - authors_need_additional_data_fetch = [] - for oa_authorship in openalex_authorships: - author_position = oa_authorship.get("author_position") - author_openalex_id = oa_authorship.get("author", {}).get("id") - just_id = author_openalex_id.split("/")[-1] - authors_need_additional_data_fetch.append(just_id) +def process_openalex_authorships(openalex_authorships, related_paper_id): + """ + Iterates through authorships associated with an OpenAlex work and create related objects such as + AuthorInstitution, Authorship, and Author objects. Related models will be updated if they already exist. + https://docs.openalex.org/api-entities/works/work-object/authorship-object + """ + from institution.models import Institution + from paper.models import Paper + from paper.related_models.authorship_model import Authorship + from purchase.models import Wallet + from user.related_models.author_model import Author - author = None - try: - author = Author.objects.get(openalex_ids__contains=[author_openalex_id]) - except Author.DoesNotExist: - author_name_parts = ( - oa_authorship.get("author", {}).get("display_name").split(" ") - ) - author = Author.objects.create( - first_name=author_name_parts[0], - last_name=author_name_parts[-1], - openalex_ids=[author_openalex_id], - ) - Wallet.objects.create(author=author) - - # Find or create authorship - authorship, created = Authorship.objects.get_or_create( - author=author, - author_position=author_position, - paper_id=paper_id, - is_corresponding=oa_authorship.get("is_corresponding"), - raw_author_name=oa_authorship.get("author", {}).get("display_name"), - ) + open_alex = OpenAlex() + related_paper = Paper.objects.get(id=related_paper_id) + + authors_need_additional_data_fetch = [] + for oa_authorship in openalex_authorships: + author_position = oa_authorship.get("author_position") + author_openalex_id = oa_authorship.get("author", {}).get("id") - # Get affiliated institutions - affiliated_institutions = Institution.objects.filter( - openalex_id__in=[ - inst["id"] for inst in oa_authorship.get("institutions", []) - ] + just_id = author_openalex_id.split("/")[-1] + authors_need_additional_data_fetch.append(just_id) + + author = None + try: + author = Author.objects.get(openalex_ids__contains=[author_openalex_id]) + except Author.DoesNotExist: + author_name_parts = ( + oa_authorship.get("author", {}).get("display_name").split(" ") ) + author = Author.objects.create( + first_name=author_name_parts[0], + last_name=author_name_parts[-1], + openalex_ids=[author_openalex_id], + ) + Wallet.objects.create(author=author) + + # Find or create authorship + authorship, created = Authorship.objects.get_or_create( + author=author, + author_position=author_position, + paper=related_paper, + is_corresponding=oa_authorship.get("is_corresponding"), + raw_author_name=oa_authorship.get("author", {}).get("display_name"), + ) - # Set institutions to authorship if they are not already set - existing_institutions = authorship.institutions.all() - new_institutions = [ - inst - for inst in affiliated_institutions - if inst not in existing_institutions + # Get affiliated institutions + affiliated_institutions = Institution.objects.filter( + openalex_id__in=[ + inst["id"] for inst in oa_authorship.get("institutions", []) ] + ) + + # Set institutions associated with authorships if they do not already exist + existing_institutions = authorship.institutions.all() + new_institutions = [ + inst + for inst in affiliated_institutions + if inst not in existing_institutions + ] + + if new_institutions: + authorship.institutions.add(*new_institutions) - if new_institutions: - authorship.institutions.add(*new_institutions) + # Update authors with additional metadata from OpenAlex + oa_authors, _ = open_alex.get_authors( + openalex_ids=authors_need_additional_data_fetch + ) + for oa_author in oa_authors: + try: + author = Author.objects.get(openalex_ids__contains=[oa_author.get("id")]) + except Author.DoesNotExist: + continue - # Update authors with additional data - oa_authors, _ = open_alex.get_authors( - openalex_ids=authors_need_additional_data_fetch + author.i10_index = oa_author.get("summary_stats", {}).get("i10_index") + author.h_index = oa_author.get("summary_stats", {}).get("h_index") + author.two_year_mean_citedness = oa_author.get("summary_stats", {}).get( + "2yr_mean_citedness" ) - for oa_author in oa_authors: - try: - author = Author.objects.get( - openalex_ids__contains=[oa_author.get("id")] - ) - except Author.DoesNotExist: - continue + author.orcid_id = oa_author.get("orcid") + author.save() - author.i10_index = oa_author.get("summary_stats", {}).get("i10_index") - author.h_index = oa_author.get("summary_stats", {}).get("h_index") - author.two_year_mean_citedness = oa_author.get("summary_stats", {}).get( - "2yr_mean_citedness" - ) - author.orcid_id = oa_author.get("orcid") - author.save() + # Associate paper with author + related_paper.authors.add(author) diff --git a/src/paper/tests/test_process_openalex_works.py b/src/paper/tests/test_process_openalex_works.py index 4806731e2..755f7a903 100644 --- a/src/paper/tests/test_process_openalex_works.py +++ b/src/paper/tests/test_process_openalex_works.py @@ -4,6 +4,7 @@ from paper.models import Paper from paper.openalex_util import process_openalex_works +from user.related_models.author_model import Author class ProcessOpenAlexWorksTests(APITestCase): @@ -70,3 +71,14 @@ def test_updating_existing_papers_from_openalex_works(self): self.assertEqual(updated_paper.title, "New title") self.assertEqual(updated_paper.paper_title, "New title") + + def test_create_authors_when_processing_work(self): + process_openalex_works(self.works) + + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + created_papers = Paper.objects.filter(doi__in=dois) + + # Sample the first paper to ensure it has authors + paper_authors = created_papers.first().authors.all() + self.assertGreater(len(paper_authors), 0) From c4237fb3d7e25cfda0363199f19f6a99f6bac2a3 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Mon, 20 May 2024 15:32:45 -0400 Subject: [PATCH 09/32] [Author] Be able to create Institution from dehydrated OpenAlex object --- ...tution_associated_institutions_and_more.py | 42 +++++++++++++++++++ src/institution/models.py | 17 ++++---- src/paper/openalex_util.py | 20 ++------- .../tests/test_process_openalex_works.py | 21 ++++++++++ 4 files changed, 76 insertions(+), 24 deletions(-) create mode 100644 src/institution/migrations/0005_alter_institution_associated_institutions_and_more.py diff --git a/src/institution/migrations/0005_alter_institution_associated_institutions_and_more.py b/src/institution/migrations/0005_alter_institution_associated_institutions_and_more.py new file mode 100644 index 000000000..93c2288eb --- /dev/null +++ b/src/institution/migrations/0005_alter_institution_associated_institutions_and_more.py @@ -0,0 +1,42 @@ +# Generated by Django 4.1 on 2024-05-20 19:24 + +import django.contrib.postgres.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("institution", "0004_institution_openalex_created_date_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="institution", + name="associated_institutions", + field=django.contrib.postgres.fields.ArrayField( + base_field=models.CharField(blank=True, max_length=500, null=True), + size=None, + ), + ), + migrations.AlterField( + model_name="institution", + name="h_index", + field=models.IntegerField(blank=True, default=0, null=True), + ), + migrations.AlterField( + model_name="institution", + name="i10_index", + field=models.IntegerField(blank=True, default=0, null=True), + ), + migrations.AlterField( + model_name="institution", + name="two_year_mean_citedness", + field=models.FloatField(blank=True, default=0, null=True), + ), + migrations.AlterField( + model_name="institution", + name="works_count", + field=models.IntegerField(blank=True, default=0, null=True), + ), + ] diff --git a/src/institution/models.py b/src/institution/models.py index 841c5d4b7..a0fc55d1b 100644 --- a/src/institution/models.py +++ b/src/institution/models.py @@ -89,20 +89,20 @@ class Institution(DefaultModel): ) # https://docs.openalex.org/api-entities/institutions/institution-object#2yr_mean_citedness - two_year_mean_citedness = models.FloatField(blank=False, null=False, default=0) + two_year_mean_citedness = models.FloatField(blank=True, null=True, default=0) # https://docs.openalex.org/api-entities/institutions/institution-object#summary_stats - h_index = models.IntegerField(blank=False, null=False, default=0) + h_index = models.IntegerField(blank=True, null=True, default=0) # https://docs.openalex.org/api-entities/institutions/institution-object#summary_stats - i10_index = models.IntegerField(blank=False, null=False, default=0) + i10_index = models.IntegerField(blank=True, null=True, default=0) # https://docs.openalex.org/api-entities/institutions/institution-object#works_count - works_count = models.IntegerField(blank=False, null=False, default=0) + works_count = models.IntegerField(blank=True, null=True, default=0) # https://docs.openalex.org/api-entities/institutions/institution-object#associated_institutions associated_institutions = ArrayField( - models.CharField(blank=False, null=False, max_length=500) + models.CharField(blank=True, null=True, max_length=500) ) # https://docs.openalex.org/api-entities/institutions/institution-object#display_name_alternatives @@ -164,11 +164,12 @@ def upsert_from_openalex(oa_institution): "works_count": oa_institution.get("works_count"), "associated_institutions": list( map( - lambda obj: obj["id"], oa_institution.get("associated_institutions") + lambda obj: obj["id"], + oa_institution.get("associated_institutions", []), ) ), "display_name_alternatives": oa_institution.get( - "display_name_alternatives" + "display_name_alternatives", [] ), } @@ -176,6 +177,6 @@ def upsert_from_openalex(oa_institution): for key, value in mapped.items(): setattr(institution, key, value) institution.save() - else: + elif not institution: institution = Institution.objects.create(**mapped) return institution diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index 07aa5ff1d..3b7c9bba5 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -208,23 +208,11 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): raw_author_name=oa_authorship.get("author", {}).get("display_name"), ) - # Get affiliated institutions - affiliated_institutions = Institution.objects.filter( - openalex_id__in=[ - inst["id"] for inst in oa_authorship.get("institutions", []) - ] - ) - # Set institutions associated with authorships if they do not already exist - existing_institutions = authorship.institutions.all() - new_institutions = [ - inst - for inst in affiliated_institutions - if inst not in existing_institutions - ] - - if new_institutions: - authorship.institutions.add(*new_institutions) + for oa_inst in oa_authorship.get("institutions", []): + institution = Institution.upsert_from_openalex(oa_inst) + if institution: + authorship.institutions.add(institution) # Update authors with additional metadata from OpenAlex oa_authors, _ = open_alex.get_authors( diff --git a/src/paper/tests/test_process_openalex_works.py b/src/paper/tests/test_process_openalex_works.py index 755f7a903..e213dff4b 100644 --- a/src/paper/tests/test_process_openalex_works.py +++ b/src/paper/tests/test_process_openalex_works.py @@ -82,3 +82,24 @@ def test_create_authors_when_processing_work(self): # Sample the first paper to ensure it has authors paper_authors = created_papers.first().authors.all() self.assertGreater(len(paper_authors), 0) + + def test_create_authorships_when_processing_work(self): + process_openalex_works(self.works) + + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + paper = Paper.objects.filter(doi__in=dois).first() + + authorships = paper.authorships.all() + self.assertGreater(len(authorships), 0) + + def create_authorship_institutions_when_processing_work(self): + process_openalex_works(self.works) + + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + paper = Paper.objects.filter(doi__in=dois).first() + + authorship = paper.authorships.first() + institutions = authorship.institutions.all() + self.assertGreater(len(institutions), 0) From f5774811cd463ad93cbd6c8d68f6eb02b8f974d7 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Mon, 20 May 2024 16:10:42 -0400 Subject: [PATCH 10/32] [Author] Test that orcid values are set --- src/paper/tests/openalex_authors.json | 1309 ++++++++++ src/paper/tests/openalex_works.json | 2114 +++++++++-------- .../tests/test_process_openalex_works.py | 18 + 3 files changed, 2406 insertions(+), 1035 deletions(-) create mode 100644 src/paper/tests/openalex_authors.json diff --git a/src/paper/tests/openalex_authors.json b/src/paper/tests/openalex_authors.json new file mode 100644 index 000000000..a0683c762 --- /dev/null +++ b/src/paper/tests/openalex_authors.json @@ -0,0 +1,1309 @@ +{ + "meta": { + "count": 1, + "db_response_time_ms": 15, + "page": 1, + "per_page": 25, + "groups_count": null + }, + "results": [ + { + "id": "https://openalex.org/A5053648717", + "orcid": "https://orcid.org/0000-0001-5679-8902", + "display_name": "Matthias Ernzerhof", + "display_name_alternatives": [ + "M. Ernzerhof", + "Matthias Ernzerhof" + ], + "works_count": 119, + "cited_by_count": 205672, + "summary_stats": { + "2yr_mean_citedness": 2.0, + "h_index": 35, + "i10_index": 72 + }, + "ids": { + "openalex": "https://openalex.org/A5053648717", + "orcid": "https://orcid.org/0000-0001-5679-8902" + }, + "affiliations": [ + { + "institution": { + "id": "https://openalex.org/I70931966", + "ror": "https://ror.org/0161xgx34", + "display_name": "Universit\u00e9 de Montr\u00e9al", + "country_code": "CA", + "type": "education", + "lineage": [ + "https://openalex.org/I70931966" + ] + }, + "years": [ + 2023, + 2022, + 2021, + 2020, + 2019, + 2018, + 2017, + 2016, + 2015, + 2014 + ] + }, + { + "institution": { + "id": "https://openalex.org/I4210115901", + "ror": "https://ror.org/029dwxd81", + "display_name": "Centre in Green Chemistry and Catalysis", + "country_code": "CA", + "type": "facility", + "lineage": [ + "https://openalex.org/I4210115901" + ] + }, + "years": [ + 2021, + 2019 + ] + }, + { + "institution": { + "id": "https://openalex.org/I159129438", + "ror": "https://ror.org/002rjbv21", + "display_name": "Universit\u00e9 du Qu\u00e9bec \u00e0 Montr\u00e9al", + "country_code": "CA", + "type": "education", + "lineage": [ + "https://openalex.org/I159129438", + "https://openalex.org/I49663120" + ] + }, + "years": [ + 2020 + ] + }, + { + "institution": { + "id": "https://openalex.org/I4210099103", + "ror": "https://ror.org/010rs2a38", + "display_name": "D\u00e9partement de Chimie Mol\u00e9culaire", + "country_code": "FR", + "type": "facility", + "lineage": [ + "https://openalex.org/I1294671590", + "https://openalex.org/I4210099103", + "https://openalex.org/I4210128300" + ] + }, + "years": [ + 2019 + ] + }, + { + "institution": { + "id": "https://openalex.org/I4210144791", + "ror": "https://ror.org/04dbzz632", + "display_name": "Institut N\u00e9el", + "country_code": "FR", + "type": "facility", + "lineage": [ + "https://openalex.org/I1294671590", + "https://openalex.org/I4210098836", + "https://openalex.org/I4210144791", + "https://openalex.org/I899635006" + ] + }, + "years": [ + 2013 + ] + }, + { + "institution": { + "id": "https://openalex.org/I114832834", + "ror": "https://ror.org/04vmvtb21", + "display_name": "Tulane University", + "country_code": "US", + "type": "education", + "lineage": [ + "https://openalex.org/I114832834" + ] + }, + "years": [ + 2008, + 1998, + 1997, + 1996 + ] + }, + { + "institution": { + "id": "https://openalex.org/I74775410", + "ror": "https://ror.org/008zs3103", + "display_name": "Rice University", + "country_code": "US", + "type": "education", + "lineage": [ + "https://openalex.org/I74775410" + ] + }, + "years": [ + 2001, + 2000, + 1999 + ] + }, + { + "institution": { + "id": "https://openalex.org/I4210094722", + "ror": "https://ror.org/00tbs6p91", + "display_name": "Center for Nanoscale Science and Technology", + "country_code": "US", + "type": "government", + "lineage": [ + "https://openalex.org/I1321296531", + "https://openalex.org/I1343035065", + "https://openalex.org/I4210094722" + ] + }, + "years": [ + 2001 + ] + }, + { + "institution": { + "id": "https://openalex.org/I135140700", + "ror": "https://ror.org/041nas322", + "display_name": "University of Bonn", + "country_code": "DE", + "type": "education", + "lineage": [ + "https://openalex.org/I135140700" + ] + }, + "years": [ + 1996, + 1994, + 1992 + ] + }, + { + "institution": { + "id": "https://openalex.org/I62916508", + "ror": "https://ror.org/02kkvpp62", + "display_name": "Technical University of Munich", + "country_code": "DE", + "type": "education", + "lineage": [ + "https://openalex.org/I62916508" + ] + }, + "years": [ + 1996, + 1995 + ] + } + ], + "last_known_institutions": [ + { + "id": "https://openalex.org/I70931966", + "ror": "https://ror.org/0161xgx34", + "display_name": "Universit\u00e9 de Montr\u00e9al", + "country_code": "CA", + "type": "education", + "lineage": [ + "https://openalex.org/I70931966" + ] + } + ], + "topics": [ + { + "id": "https://openalex.org/T10002", + "display_name": "Advancements in Density Functional Theory", + "count": 63, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10913", + "display_name": "Molecular Electronic Devices and Systems", + "count": 54, + "subfield": { + "id": "https://openalex.org/subfields/2208", + "display_name": "Electrical and Electronic Engineering" + }, + "field": { + "id": "https://openalex.org/fields/22", + "display_name": "Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11177", + "display_name": "Quantum Coherence in Photosynthesis and Aqueous Systems", + "count": 38, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10382", + "display_name": "Semiconductor Spintronics and Quantum Computing", + "count": 24, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10045", + "display_name": "Organic Solar Cell Technology", + "count": 10, + "subfield": { + "id": "https://openalex.org/subfields/2208", + "display_name": "Electrical and Electronic Engineering" + }, + "field": { + "id": "https://openalex.org/fields/22", + "display_name": "Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10083", + "display_name": "Graphene: Properties, Synthesis, and Applications", + "count": 9, + "subfield": { + "id": "https://openalex.org/subfields/2505", + "display_name": "Materials Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/25", + "display_name": "Materials Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11809", + "display_name": "NMR Spectroscopy Techniques", + "count": 8, + "subfield": { + "id": "https://openalex.org/subfields/1607", + "display_name": "Spectroscopy" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12202", + "display_name": "Ice Nucleation and Melting Phenomena", + "count": 8, + "subfield": { + "id": "https://openalex.org/subfields/1902", + "display_name": "Atmospheric Science" + }, + "field": { + "id": "https://openalex.org/fields/19", + "display_name": "Earth and Planetary Sciences" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10074", + "display_name": "Carbon Nanotubes and their Applications", + "count": 6, + "subfield": { + "id": "https://openalex.org/subfields/2505", + "display_name": "Materials Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/25", + "display_name": "Materials Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11129", + "display_name": "Excited-State Proton Transfer Mechanisms and Applications", + "count": 6, + "subfield": { + "id": "https://openalex.org/subfields/1606", + "display_name": "Physical and Theoretical Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12044", + "display_name": "Computational Chemistry and Polymer Physics", + "count": 6, + "subfield": { + "id": "https://openalex.org/subfields/1606", + "display_name": "Physical and Theoretical Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11948", + "display_name": "Accelerating Materials Innovation through Informatics", + "count": 5, + "subfield": { + "id": "https://openalex.org/subfields/2505", + "display_name": "Materials Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/25", + "display_name": "Materials Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11965", + "display_name": "Quantum Effects in Helium Nanodroplets and Solids", + "count": 5, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11825", + "display_name": "Catalytic Dehydrogenation of Light Alkanes", + "count": 5, + "subfield": { + "id": "https://openalex.org/subfields/1503", + "display_name": "Catalysis" + }, + "field": { + "id": "https://openalex.org/fields/15", + "display_name": "Chemical Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10321", + "display_name": "Applications of Quantum Dots in Nanotechnology", + "count": 4, + "subfield": { + "id": "https://openalex.org/subfields/2505", + "display_name": "Materials Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/25", + "display_name": "Materials Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12251", + "display_name": "Catalytic C-H Amination Reactions", + "count": 4, + "subfield": { + "id": "https://openalex.org/subfields/1605", + "display_name": "Organic Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10660", + "display_name": "Conducting Polymer Research", + "count": 4, + "subfield": { + "id": "https://openalex.org/subfields/2507", + "display_name": "Polymers and Plastics" + }, + "field": { + "id": "https://openalex.org/fields/25", + "display_name": "Materials Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11298", + "display_name": "Aromaticity in Organic Molecules and Materials", + "count": 3, + "subfield": { + "id": "https://openalex.org/subfields/1605", + "display_name": "Organic Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10354", + "display_name": "Transition-Metal-Catalyzed C\u2013H Bond Functionalization", + "count": 3, + "subfield": { + "id": "https://openalex.org/subfields/1605", + "display_name": "Organic Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10037", + "display_name": "High-Temperature Superconductivity", + "count": 3, + "subfield": { + "id": "https://openalex.org/subfields/3104", + "display_name": "Condensed Matter Physics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10558", + "display_name": "Nanoelectronics and Transistors", + "count": 3, + "subfield": { + "id": "https://openalex.org/subfields/2208", + "display_name": "Electrical and Electronic Engineering" + }, + "field": { + "id": "https://openalex.org/fields/22", + "display_name": "Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12588", + "display_name": "Emergent Phenomena at Oxide Interfaces", + "count": 2, + "subfield": { + "id": "https://openalex.org/subfields/2505", + "display_name": "Materials Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/25", + "display_name": "Materials Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10923", + "display_name": "Atomic Force Microscopy Techniques", + "count": 2, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10295", + "display_name": "Plasmonics and Nanophotonics Research", + "count": 2, + "subfield": { + "id": "https://openalex.org/subfields/2204", + "display_name": "Biomedical Engineering" + }, + "field": { + "id": "https://openalex.org/fields/22", + "display_name": "Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12852", + "display_name": "Thermochemical Properties of Organic Compounds", + "count": 2, + "subfield": { + "id": "https://openalex.org/subfields/1605", + "display_name": "Organic Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + } + ], + "topic_share": [ + { + "id": "https://openalex.org/T10913", + "display_name": "Molecular Electronic Devices and Systems", + "value": 0.0001811, + "subfield": { + "id": "https://openalex.org/subfields/2208", + "display_name": "Electrical and Electronic Engineering" + }, + "field": { + "id": "https://openalex.org/fields/22", + "display_name": "Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10002", + "display_name": "Advancements in Density Functional Theory", + "value": 0.0001207, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11177", + "display_name": "Quantum Coherence in Photosynthesis and Aqueous Systems", + "value": 9.96e-05, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10382", + "display_name": "Semiconductor Spintronics and Quantum Computing", + "value": 8.32e-05, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12044", + "display_name": "Computational Chemistry and Polymer Physics", + "value": 7.22e-05, + "subfield": { + "id": "https://openalex.org/subfields/1606", + "display_name": "Physical and Theoretical Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10045", + "display_name": "Organic Solar Cell Technology", + "value": 6.82e-05, + "subfield": { + "id": "https://openalex.org/subfields/2208", + "display_name": "Electrical and Electronic Engineering" + }, + "field": { + "id": "https://openalex.org/fields/22", + "display_name": "Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T14318", + "display_name": "Polydiacetylene Supramolecular Chemosensors", + "value": 5.53e-05, + "subfield": { + "id": "https://openalex.org/subfields/1605", + "display_name": "Organic Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12612", + "display_name": "Bose-Einstein Condensation of Polaritons", + "value": 5.36e-05, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12202", + "display_name": "Ice Nucleation and Melting Phenomena", + "value": 4.58e-05, + "subfield": { + "id": "https://openalex.org/subfields/1902", + "display_name": "Atmospheric Science" + }, + "field": { + "id": "https://openalex.org/fields/19", + "display_name": "Earth and Planetary Sciences" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11948", + "display_name": "Accelerating Materials Innovation through Informatics", + "value": 4.46e-05, + "subfield": { + "id": "https://openalex.org/subfields/2505", + "display_name": "Materials Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/25", + "display_name": "Materials Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11809", + "display_name": "NMR Spectroscopy Techniques", + "value": 4.32e-05, + "subfield": { + "id": "https://openalex.org/subfields/1607", + "display_name": "Spectroscopy" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11298", + "display_name": "Aromaticity in Organic Molecules and Materials", + "value": 4e-05, + "subfield": { + "id": "https://openalex.org/subfields/1605", + "display_name": "Organic Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10074", + "display_name": "Carbon Nanotubes and their Applications", + "value": 3.49e-05, + "subfield": { + "id": "https://openalex.org/subfields/2505", + "display_name": "Materials Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/25", + "display_name": "Materials Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10083", + "display_name": "Graphene: Properties, Synthesis, and Applications", + "value": 3.25e-05, + "subfield": { + "id": "https://openalex.org/subfields/2505", + "display_name": "Materials Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/25", + "display_name": "Materials Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12251", + "display_name": "Catalytic C-H Amination Reactions", + "value": 3.15e-05, + "subfield": { + "id": "https://openalex.org/subfields/1605", + "display_name": "Organic Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11565", + "display_name": "Thermoelastic Damping and Heat Conduction", + "value": 2.89e-05, + "subfield": { + "id": "https://openalex.org/subfields/2211", + "display_name": "Mechanics of Materials" + }, + "field": { + "id": "https://openalex.org/fields/22", + "display_name": "Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11129", + "display_name": "Excited-State Proton Transfer Mechanisms and Applications", + "value": 2.64e-05, + "subfield": { + "id": "https://openalex.org/subfields/1606", + "display_name": "Physical and Theoretical Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T13182", + "display_name": "Design and Simulation of Quantum-dot Cellular Automata", + "value": 2.49e-05, + "subfield": { + "id": "https://openalex.org/subfields/1703", + "display_name": "Computational Theory and Mathematics" + }, + "field": { + "id": "https://openalex.org/fields/17", + "display_name": "Computer Science" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11683", + "display_name": "Foundations of Electromagnetic Theory and Quantum Field Theory", + "value": 2.49e-05, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11965", + "display_name": "Quantum Effects in Helium Nanodroplets and Solids", + "value": 2.15e-05, + "subfield": { + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" + }, + "field": { + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12189", + "display_name": "Transition-Metal-Catalyzed Sulfur Chemistry", + "value": 1.92e-05, + "subfield": { + "id": "https://openalex.org/subfields/1605", + "display_name": "Organic Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10354", + "display_name": "Transition-Metal-Catalyzed C\u2013H Bond Functionalization", + "value": 1.9e-05, + "subfield": { + "id": "https://openalex.org/subfields/1605", + "display_name": "Organic Chemistry" + }, + "field": { + "id": "https://openalex.org/fields/16", + "display_name": "Chemistry" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T12285", + "display_name": "Engineering of Surface Nanostructures", + "value": 1.86e-05, + "subfield": { + "id": "https://openalex.org/subfields/2204", + "display_name": "Biomedical Engineering" + }, + "field": { + "id": "https://openalex.org/fields/22", + "display_name": "Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T10295", + "display_name": "Plasmonics and Nanophotonics Research", + "value": 1.73e-05, + "subfield": { + "id": "https://openalex.org/subfields/2204", + "display_name": "Biomedical Engineering" + }, + "field": { + "id": "https://openalex.org/fields/22", + "display_name": "Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + }, + { + "id": "https://openalex.org/T11825", + "display_name": "Catalytic Dehydrogenation of Light Alkanes", + "value": 1.73e-05, + "subfield": { + "id": "https://openalex.org/subfields/1503", + "display_name": "Catalysis" + }, + "field": { + "id": "https://openalex.org/fields/15", + "display_name": "Chemical Engineering" + }, + "domain": { + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" + } + } + ], + "x_concepts": [ + { + "id": "https://openalex.org/C121332964", + "wikidata": "https://www.wikidata.org/wiki/Q413", + "display_name": "Physics", + "level": 0, + "score": 95.8 + }, + { + "id": "https://openalex.org/C62520636", + "wikidata": "https://www.wikidata.org/wiki/Q944", + "display_name": "Quantum mechanics", + "level": 1, + "score": 89.9 + }, + { + "id": "https://openalex.org/C185592680", + "wikidata": "https://www.wikidata.org/wiki/Q2329", + "display_name": "Chemistry", + "level": 0, + "score": 73.9 + }, + { + "id": "https://openalex.org/C33923547", + "wikidata": "https://www.wikidata.org/wiki/Q395", + "display_name": "Mathematics", + "level": 0, + "score": 68.9 + }, + { + "id": "https://openalex.org/C178790620", + "wikidata": "https://www.wikidata.org/wiki/Q11351", + "display_name": "Organic chemistry", + "level": 1, + "score": 48.7 + }, + { + "id": "https://openalex.org/C26873012", + "wikidata": "https://www.wikidata.org/wiki/Q214781", + "display_name": "Condensed matter physics", + "level": 1, + "score": 47.9 + }, + { + "id": "https://openalex.org/C121864883", + "wikidata": "https://www.wikidata.org/wiki/Q677916", + "display_name": "Statistical physics", + "level": 1, + "score": 47.1 + }, + { + "id": "https://openalex.org/C185544564", + "wikidata": "https://www.wikidata.org/wiki/Q81197", + "display_name": "Nuclear physics", + "level": 1, + "score": 47.1 + }, + { + "id": "https://openalex.org/C147597530", + "wikidata": "https://www.wikidata.org/wiki/Q369472", + "display_name": "Computational chemistry", + "level": 1, + "score": 46.2 + }, + { + "id": "https://openalex.org/C127413603", + "wikidata": "https://www.wikidata.org/wiki/Q11023", + "display_name": "Engineering", + "level": 0, + "score": 45.4 + }, + { + "id": "https://openalex.org/C184779094", + "wikidata": "https://www.wikidata.org/wiki/Q26383", + "display_name": "Atomic physics", + "level": 1, + "score": 40.3 + }, + { + "id": "https://openalex.org/C192562407", + "wikidata": "https://www.wikidata.org/wiki/Q228736", + "display_name": "Materials science", + "level": 0, + "score": 39.5 + }, + { + "id": "https://openalex.org/C147120987", + "wikidata": "https://www.wikidata.org/wiki/Q2225", + "display_name": "Electron", + "level": 2, + "score": 37.8 + }, + { + "id": "https://openalex.org/C41008148", + "wikidata": "https://www.wikidata.org/wiki/Q21198", + "display_name": "Computer science", + "level": 0, + "score": 37.0 + }, + { + "id": "https://openalex.org/C152365726", + "wikidata": "https://www.wikidata.org/wiki/Q1048589", + "display_name": "Density functional theory", + "level": 2, + "score": 36.1 + }, + { + "id": "https://openalex.org/C32909587", + "wikidata": "https://www.wikidata.org/wiki/Q11369", + "display_name": "Molecule", + "level": 2, + "score": 33.6 + }, + { + "id": "https://openalex.org/C2524010", + "wikidata": "https://www.wikidata.org/wiki/Q8087", + "display_name": "Geometry", + "level": 1, + "score": 26.9 + }, + { + "id": "https://openalex.org/C97355855", + "wikidata": "https://www.wikidata.org/wiki/Q11473", + "display_name": "Thermodynamics", + "level": 1, + "score": 25.2 + }, + { + "id": "https://openalex.org/C119599485", + "wikidata": "https://www.wikidata.org/wiki/Q43035", + "display_name": "Electrical engineering", + "level": 1, + "score": 25.2 + }, + { + "id": "https://openalex.org/C159985019", + "wikidata": "https://www.wikidata.org/wiki/Q181790", + "display_name": "Composite material", + "level": 1, + "score": 22.7 + }, + { + "id": "https://openalex.org/C105795698", + "wikidata": "https://www.wikidata.org/wiki/Q12483", + "display_name": "Statistics", + "level": 1, + "score": 21.0 + }, + { + "id": "https://openalex.org/C138885662", + "wikidata": "https://www.wikidata.org/wiki/Q5891", + "display_name": "Philosophy", + "level": 0, + "score": 21.0 + }, + { + "id": "https://openalex.org/C86025842", + "wikidata": "https://www.wikidata.org/wiki/Q5358432", + "display_name": "Electronic structure", + "level": 2, + "score": 20.2 + } + ], + "counts_by_year": [ + { + "year": 2024, + "works_count": 0, + "cited_by_count": 9680 + }, + { + "year": 2023, + "works_count": 2, + "cited_by_count": 21995 + }, + { + "year": 2022, + "works_count": 2, + "cited_by_count": 20970 + }, + { + "year": 2021, + "works_count": 3, + "cited_by_count": 19543 + }, + { + "year": 2020, + "works_count": 2, + "cited_by_count": 17911 + }, + { + "year": 2019, + "works_count": 4, + "cited_by_count": 15531 + }, + { + "year": 2018, + "works_count": 3, + "cited_by_count": 13992 + }, + { + "year": 2017, + "works_count": 4, + "cited_by_count": 12948 + }, + { + "year": 2016, + "works_count": 3, + "cited_by_count": 11613 + }, + { + "year": 2015, + "works_count": 2, + "cited_by_count": 10294 + }, + { + "year": 2014, + "works_count": 3, + "cited_by_count": 9029 + }, + { + "year": 2013, + "works_count": 1, + "cited_by_count": 7834 + }, + { + "year": 2012, + "works_count": 3, + "cited_by_count": 6756 + } + ], + "works_api_url": "https://api.openalex.org/works?filter=author.id:A5053648717", + "updated_date": "2024-05-19T03:50:19.398319", + "created_date": "2023-07-21" + } + ], + "group_by": [] +} diff --git a/src/paper/tests/openalex_works.json b/src/paper/tests/openalex_works.json index 476dbc611..182228bc5 100644 --- a/src/paper/tests/openalex_works.json +++ b/src/paper/tests/openalex_works.json @@ -1,113 +1,86 @@ { "meta": { - "count": 253809574, - "db_response_time_ms": 56, + "count": 107299944, + "db_response_time_ms": 132, "page": 1, "per_page": 2, "groups_count": null }, "results": [ { - "id": "https://openalex.org/W1775749144", - "doi": "https://doi.org/10.1016/s0021-9258(19)52451-6", - "title": "PROTEIN MEASUREMENT WITH THE FOLIN PHENOL REAGENT", - "display_name": "PROTEIN MEASUREMENT WITH THE FOLIN PHENOL REAGENT", - "publication_year": 1951, - "publication_date": "1951-11-01", + "id": "https://openalex.org/W1981368803", + "doi": "https://doi.org/10.1103/physrevlett.77.3865", + "title": "Generalized Gradient Approximation Made Simple", + "display_name": "Generalized Gradient Approximation Made Simple", + "publication_year": 1996, + "publication_date": "1996-10-28", "ids": { - "openalex": "https://openalex.org/W1775749144", - "doi": "https://doi.org/10.1016/s0021-9258(19)52451-6", - "mag": "1775749144", - "pmid": "https://pubmed.ncbi.nlm.nih.gov/14907713" + "openalex": "https://openalex.org/W1981368803", + "doi": "https://doi.org/10.1103/physrevlett.77.3865", + "mag": "1981368803", + "pmid": "https://pubmed.ncbi.nlm.nih.gov/10062328" }, "language": "en", "primary_location": { - "is_oa": true, - "landing_page_url": "https://doi.org/10.1016/s0021-9258(19)52451-6", + "is_oa": false, + "landing_page_url": "https://doi.org/10.1103/physrevlett.77.3865", "pdf_url": null, "source": { - "id": "https://openalex.org/S140251998", - "display_name": "Journal of biological chemistry/\u0098The \u009cJournal of biological chemistry", - "issn_l": "0021-9258", + "id": "https://openalex.org/S24807848", + "display_name": "Physical review letters", + "issn_l": "0031-9007", "issn": [ - "0021-9258", - "1067-8816", - "1083-351X" + "0031-9007", + "1079-7114", + "1092-0145" ], - "is_oa": true, - "is_in_doaj": true, - "host_organization": "https://openalex.org/P4310320990", - "host_organization_name": "Elsevier BV", + "is_oa": false, + "is_in_doaj": false, + "host_organization": "https://openalex.org/P4310320261", + "host_organization_name": "American Physical Society", "host_organization_lineage": [ - "https://openalex.org/P4310320990" + "https://openalex.org/P4310320261" ], "host_organization_lineage_names": [ - "Elsevier BV" + "American Physical Society" ], "type": "journal" }, - "license": "cc-by", - "license_id": "https://openalex.org/licenses/cc-by", - "version": "publishedVersion", - "is_accepted": true, - "is_published": true + "license": null, + "license_id": null, + "version": null, + "is_accepted": false, + "is_published": false }, "type": "article", "type_crossref": "journal-article", "indexed_in": [ - "crossref" + "crossref", + "pubmed" ], "open_access": { - "is_oa": true, - "oa_status": "hybrid", - "oa_url": "https://doi.org/10.1016/s0021-9258(19)52451-6", - "any_repository_has_fulltext": true + "is_oa": false, + "oa_status": "closed", + "oa_url": null, + "any_repository_has_fulltext": false }, "authorships": [ { "author_position": "first", "author": { - "id": "https://openalex.org/A5067833651", - "display_name": "Oliver H. Lowry", - "orcid": null - }, - "institutions": [ - { - "id": "https://openalex.org/I204465549", - "display_name": "Washington University in St. Louis", - "ror": "https://ror.org/01yc7t268", - "country_code": "US", - "type": "education", - "lineage": [ - "https://openalex.org/I204465549" - ] - } - ], - "countries": [ - "US" - ], - "is_corresponding": false, - "raw_author_name": "OliverH. Lowry", - "raw_affiliation_strings": [ - "the Department of Pharmacology, Washington University School oj Medicine, St. Louis, Missouri" - ] - }, - { - "author_position": "middle", - "author": { - "id": "https://openalex.org/A5032482932", - "display_name": "N. J. Rosebrough", - "orcid": null + "id": "https://openalex.org/A5014987686", + "display_name": "John P. Perdew", + "orcid": "https://orcid.org/0000-0003-4237-824X" }, "institutions": [ { - "id": "https://openalex.org/I204465549", - "display_name": "Washington University in St. Louis", - "ror": "https://ror.org/01yc7t268", + "id": "https://openalex.org/I114832834", + "display_name": "Tulane University", + "ror": "https://ror.org/04vmvtb21", "country_code": "US", "type": "education", "lineage": [ - "https://openalex.org/I204465549" + "https://openalex.org/I114832834" ] } ], @@ -115,27 +88,27 @@ "US" ], "is_corresponding": false, - "raw_author_name": "NiraJ. Rosebrough", + "raw_author_name": "John P. Perdew", "raw_affiliation_strings": [ - "the Department of Pharmacology, Washington University School oj Medicine, St. Louis, Missouri" + "Department of Physics and Quantum Theory Group, Tulane University, New Orleans, Louisiana 70118" ] }, { "author_position": "middle", "author": { - "id": "https://openalex.org/A5004071084", - "display_name": "A. Farr", - "orcid": null + "id": "https://openalex.org/A5082307304", + "display_name": "Kieron Burke", + "orcid": "https://orcid.org/0000-0002-6159-0054" }, "institutions": [ { - "id": "https://openalex.org/I204465549", - "display_name": "Washington University in St. Louis", - "ror": "https://ror.org/01yc7t268", + "id": "https://openalex.org/I114832834", + "display_name": "Tulane University", + "ror": "https://ror.org/04vmvtb21", "country_code": "US", "type": "education", "lineage": [ - "https://openalex.org/I204465549" + "https://openalex.org/I114832834" ] } ], @@ -143,27 +116,27 @@ "US" ], "is_corresponding": false, - "raw_author_name": "A. Lewis Farr", + "raw_author_name": "Kieron Burke", "raw_affiliation_strings": [ - "the Department of Pharmacology, Washington University School oj Medicine, St. Louis, Missouri" + "Department of Physics and Quantum Theory Group, Tulane University, New Orleans, Louisiana 70118" ] }, { "author_position": "last", "author": { - "id": "https://openalex.org/A5074535928", - "display_name": "Rose J. Randall", - "orcid": null + "id": "https://openalex.org/A5053648717", + "display_name": "Matthias Ernzerhof", + "orcid": "https://orcid.org/0000-0001-5679-8902" }, "institutions": [ { - "id": "https://openalex.org/I204465549", - "display_name": "Washington University in St. Louis", - "ror": "https://ror.org/01yc7t268", + "id": "https://openalex.org/I114832834", + "display_name": "Tulane University", + "ror": "https://ror.org/04vmvtb21", "country_code": "US", "type": "education", "lineage": [ - "https://openalex.org/I204465549" + "https://openalex.org/I114832834" ] } ], @@ -171,9 +144,9 @@ "US" ], "is_corresponding": false, - "raw_author_name": "RoseJ. Randall", + "raw_author_name": "Matthias Ernzerhof", "raw_affiliation_strings": [ - "the Department of Pharmacology, Washington University School oj Medicine, St. Louis, Missouri" + "Department of Physics and Quantum Theory Group, Tulane University, New Orleans, Louisiana 70118" ] } ], @@ -182,1020 +155,677 @@ "corresponding_author_ids": [], "corresponding_institution_ids": [], "apc_list": { - "value": 2500, + "value": 3750, "currency": "USD", - "value_usd": 2500, + "value_usd": 3750, "provenance": "doaj" }, "apc_paid": { - "value": 2500, + "value": 3750, "currency": "USD", - "value_usd": 2500, + "value_usd": 3750, "provenance": "doaj" }, "has_fulltext": true, - "fulltext_origin": "pdf", - "cited_by_count": 305961, + "fulltext_origin": "ngrams", + "cited_by_count": 160375, "cited_by_percentile_year": { "min": 99, "max": 100 }, "biblio": { - "volume": "193", - "issue": "1", - "first_page": "265", - "last_page": "275" + "volume": "77", + "issue": "18", + "first_page": "3865", + "last_page": "3868" }, "is_retracted": false, "is_paratext": false, "primary_topic": { - "id": "https://openalex.org/T10602", - "display_name": "Glycosylation in Health and Disease", - "score": 0.9678, + "id": "https://openalex.org/T10002", + "display_name": "Advancements in Density Functional Theory", + "score": 0.9998, "subfield": { - "id": "https://openalex.org/subfields/1312", - "display_name": "Molecular Biology" + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" }, "field": { - "id": "https://openalex.org/fields/13", - "display_name": "Biochemistry, Genetics and Molecular Biology" + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" }, "domain": { - "id": "https://openalex.org/domains/1", - "display_name": "Life Sciences" + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" } }, "topics": [ { - "id": "https://openalex.org/T10602", - "display_name": "Glycosylation in Health and Disease", - "score": 0.9678, + "id": "https://openalex.org/T10002", + "display_name": "Advancements in Density Functional Theory", + "score": 0.9998, "subfield": { - "id": "https://openalex.org/subfields/1312", - "display_name": "Molecular Biology" + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" }, "field": { - "id": "https://openalex.org/fields/13", - "display_name": "Biochemistry, Genetics and Molecular Biology" + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" }, "domain": { - "id": "https://openalex.org/domains/1", - "display_name": "Life Sciences" + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" } }, { - "id": "https://openalex.org/T11399", - "display_name": "Protein Metabolism in Exercise and Nutrition", - "score": 0.951, + "id": "https://openalex.org/T11177", + "display_name": "Quantum Coherence in Photosynthesis and Aqueous Systems", + "score": 0.9997, "subfield": { - "id": "https://openalex.org/subfields/1307", - "display_name": "Cell Biology" + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" }, "field": { - "id": "https://openalex.org/fields/13", - "display_name": "Biochemistry, Genetics and Molecular Biology" + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" }, "domain": { - "id": "https://openalex.org/domains/1", - "display_name": "Life Sciences" + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" } }, { - "id": "https://openalex.org/T14135", - "display_name": "Oxidation States in Biochemistry and Medicine", - "score": 0.9502, + "id": "https://openalex.org/T11965", + "display_name": "Quantum Effects in Helium Nanodroplets and Solids", + "score": 0.9994, "subfield": { - "id": "https://openalex.org/subfields/1312", - "display_name": "Molecular Biology" + "id": "https://openalex.org/subfields/3107", + "display_name": "Atomic and Molecular Physics, and Optics" }, "field": { - "id": "https://openalex.org/fields/13", - "display_name": "Biochemistry, Genetics and Molecular Biology" + "id": "https://openalex.org/fields/31", + "display_name": "Physics and Astronomy" }, "domain": { - "id": "https://openalex.org/domains/1", - "display_name": "Life Sciences" + "id": "https://openalex.org/domains/3", + "display_name": "Physical Sciences" } } ], "keywords": [ { - "id": "https://openalex.org/keywords/proteins", - "display_name": "Proteins", - "score": 0.536585 + "id": "https://openalex.org/keywords/electronic-structure-calculations", + "display_name": "Electronic Structure Calculations", + "score": 0.512978 } ], "concepts": [ { - "id": "https://openalex.org/C40875361", - "wikidata": "https://www.wikidata.org/wiki/Q2356542", - "display_name": "Reagent", + "id": "https://openalex.org/C2780586882", + "wikidata": "https://www.wikidata.org/wiki/Q7520643", + "display_name": "Simple (philosophy)", "level": 2, - "score": 0.97429305 + "score": 0.82514447 }, { - "id": "https://openalex.org/C185592680", - "wikidata": "https://www.wikidata.org/wiki/Q2329", - "display_name": "Chemistry", + "id": "https://openalex.org/C99844830", + "wikidata": "https://www.wikidata.org/wiki/Q102441924", + "display_name": "Scaling", + "level": 2, + "score": 0.6654375 + }, + { + "id": "https://openalex.org/C121864883", + "wikidata": "https://www.wikidata.org/wiki/Q677916", + "display_name": "Statistical physics", + "level": 1, + "score": 0.6102829 + }, + { + "id": "https://openalex.org/C170122806", + "wikidata": "https://www.wikidata.org/wiki/Q1914828", + "display_name": "Linear scale", + "level": 2, + "score": 0.55408573 + }, + { + "id": "https://openalex.org/C121332964", + "wikidata": "https://www.wikidata.org/wiki/Q413", + "display_name": "Physics", "level": 0, - "score": 0.8413831 + "score": 0.49338427 }, { - "id": "https://openalex.org/C2777702071", - "wikidata": "https://www.wikidata.org/wiki/Q130336", - "display_name": "Phenol", + "id": "https://openalex.org/C3018272316", + "wikidata": "https://www.wikidata.org/wiki/Q905186", + "display_name": "Spin density", "level": 2, - "score": 0.56796163 + "score": 0.48516518 }, { - "id": "https://openalex.org/C43617362", - "wikidata": "https://www.wikidata.org/wiki/Q170050", - "display_name": "Chromatography", + "id": "https://openalex.org/C42704618", + "wikidata": "https://www.wikidata.org/wiki/Q910917", + "display_name": "Spin (aerodynamics)", + "level": 2, + "score": 0.43200958 + }, + { + "id": "https://openalex.org/C9249130", + "wikidata": "https://www.wikidata.org/wiki/Q898241", + "display_name": "Local-density approximation", + "level": 3, + "score": 0.43061215 + }, + { + "id": "https://openalex.org/C9059619", + "wikidata": "https://www.wikidata.org/wiki/Q3272140", + "display_name": "Kohn\u2013Sham equations", + "level": 3, + "score": 0.42569804 + }, + { + "id": "https://openalex.org/C62520636", + "wikidata": "https://www.wikidata.org/wiki/Q944", + "display_name": "Quantum mechanics", + "level": 1, + "score": 0.35535908 + }, + { + "id": "https://openalex.org/C86025842", + "wikidata": "https://www.wikidata.org/wiki/Q5358432", + "display_name": "Electronic structure", + "level": 2, + "score": 0.31023443 + }, + { + "id": "https://openalex.org/C152365726", + "wikidata": "https://www.wikidata.org/wiki/Q1048589", + "display_name": "Density functional theory", + "level": 2, + "score": 0.29765612 + }, + { + "id": "https://openalex.org/C26873012", + "wikidata": "https://www.wikidata.org/wiki/Q214781", + "display_name": "Condensed matter physics", + "level": 1, + "score": 0.2565789 + }, + { + "id": "https://openalex.org/C33923547", + "wikidata": "https://www.wikidata.org/wiki/Q395", + "display_name": "Mathematics", + "level": 0, + "score": 0.20775574 + }, + { + "id": "https://openalex.org/C97355855", + "wikidata": "https://www.wikidata.org/wiki/Q11473", + "display_name": "Thermodynamics", "level": 1, - "score": 0.5565455 + "score": 0.1649805 + }, + { + "id": "https://openalex.org/C2524010", + "wikidata": "https://www.wikidata.org/wiki/Q8087", + "display_name": "Geometry", + "level": 1, + "score": 0.11068845 + }, + { + "id": "https://openalex.org/C13280743", + "wikidata": "https://www.wikidata.org/wiki/Q131089", + "display_name": "Geodesy", + "level": 1, + "score": 0.0 + }, + { + "id": "https://openalex.org/C205649164", + "wikidata": "https://www.wikidata.org/wiki/Q1071", + "display_name": "Geography", + "level": 0, + "score": 0.0 + }, + { + "id": "https://openalex.org/C138885662", + "wikidata": "https://www.wikidata.org/wiki/Q5891", + "display_name": "Philosophy", + "level": 0, + "score": 0.0 }, { - "id": "https://openalex.org/C178790620", - "wikidata": "https://www.wikidata.org/wiki/Q11351", - "display_name": "Organic chemistry", + "id": "https://openalex.org/C111472728", + "wikidata": "https://www.wikidata.org/wiki/Q9471", + "display_name": "Epistemology", "level": 1, - "score": 0.27057263 + "score": 0.0 } ], "mesh": [], "locations_count": 2, "locations": [ { - "is_oa": true, - "landing_page_url": "https://doi.org/10.1016/s0021-9258(19)52451-6", + "is_oa": false, + "landing_page_url": "https://doi.org/10.1103/physrevlett.77.3865", "pdf_url": null, "source": { - "id": "https://openalex.org/S140251998", - "display_name": "Journal of biological chemistry/\u0098The \u009cJournal of biological chemistry", - "issn_l": "0021-9258", + "id": "https://openalex.org/S24807848", + "display_name": "Physical review letters", + "issn_l": "0031-9007", "issn": [ - "0021-9258", - "1067-8816", - "1083-351X" + "0031-9007", + "1079-7114", + "1092-0145" ], - "is_oa": true, - "is_in_doaj": true, - "host_organization": "https://openalex.org/P4310320990", - "host_organization_name": "Elsevier BV", + "is_oa": false, + "is_in_doaj": false, + "host_organization": "https://openalex.org/P4310320261", + "host_organization_name": "American Physical Society", "host_organization_lineage": [ - "https://openalex.org/P4310320990" + "https://openalex.org/P4310320261" ], "host_organization_lineage_names": [ - "Elsevier BV" + "American Physical Society" ], "type": "journal" }, - "license": "cc-by", - "license_id": "https://openalex.org/licenses/cc-by", - "version": "publishedVersion", - "is_accepted": true, - "is_published": true + "license": null, + "license_id": null, + "version": null, + "is_accepted": false, + "is_published": false }, { - "is_oa": true, - "landing_page_url": "https://www.jbc.org/article/S0021-9258(19)52451-6/pdf", - "pdf_url": "https://www.jbc.org/article/S0021-9258(19)52451-6/pdf", - "source": null, + "is_oa": false, + "landing_page_url": "https://pubmed.ncbi.nlm.nih.gov/10062328", + "pdf_url": null, + "source": { + "id": "https://openalex.org/S4306525036", + "display_name": "PubMed", + "issn_l": null, + "issn": null, + "is_oa": false, + "is_in_doaj": false, + "host_organization": "https://openalex.org/I1299303238", + "host_organization_name": "National Institutes of Health", + "host_organization_lineage": [ + "https://openalex.org/I1299303238" + ], + "host_organization_lineage_names": [ + "National Institutes of Health" + ], + "type": "repository" + }, "license": null, "license_id": null, - "version": "publishedVersion", - "is_accepted": true, - "is_published": true + "version": null, + "is_accepted": false, + "is_published": false } ], - "best_oa_location": { - "is_oa": true, - "landing_page_url": "https://doi.org/10.1016/s0021-9258(19)52451-6", - "pdf_url": null, - "source": { - "id": "https://openalex.org/S140251998", - "display_name": "Journal of biological chemistry/\u0098The \u009cJournal of biological chemistry", - "issn_l": "0021-9258", - "issn": [ - "0021-9258", - "1067-8816", - "1083-351X" - ], - "is_oa": true, - "is_in_doaj": true, - "host_organization": "https://openalex.org/P4310320990", - "host_organization_name": "Elsevier BV", - "host_organization_lineage": [ - "https://openalex.org/P4310320990" - ], - "host_organization_lineage_names": [ - "Elsevier BV" - ], - "type": "journal" - }, - "license": "cc-by", - "license_id": "https://openalex.org/licenses/cc-by", - "version": "publishedVersion", - "is_accepted": true, - "is_published": true - }, + "best_oa_location": null, "sustainable_development_goals": [ { - "display_name": "Clean water and sanitation", - "id": "https://metadata.un.org/sdg/6", - "score": 0.65 + "display_name": "Affordable and clean energy", + "score": 0.91, + "id": "https://metadata.un.org/sdg/7" } ], "grants": [], "datasets": [], "versions": [], - "referenced_works_count": 18, + "referenced_works_count": 30, "referenced_works": [ - "https://openalex.org/W1507976594", - "https://openalex.org/W1515052776", - "https://openalex.org/W1535643256", - "https://openalex.org/W1540384225", - "https://openalex.org/W1557790601", - "https://openalex.org/W1573055659", - "https://openalex.org/W1576013682", - "https://openalex.org/W195762992", - "https://openalex.org/W1979011732", - "https://openalex.org/W1979607356", - "https://openalex.org/W2051330591", - "https://openalex.org/W2149417143", - "https://openalex.org/W2415297835", - "https://openalex.org/W59369663", - "https://openalex.org/W8863503", - "https://openalex.org/W89669270", - "https://openalex.org/W951165088", - "https://openalex.org/W97662572" + "https://openalex.org/W1970549136", + "https://openalex.org/W1979040280", + "https://openalex.org/W1983648185", + "https://openalex.org/W1984849570", + "https://openalex.org/W1987736292", + "https://openalex.org/W1997176674", + "https://openalex.org/W1997563366", + "https://openalex.org/W1998015449", + "https://openalex.org/W1998954599", + "https://openalex.org/W1998959670", + "https://openalex.org/W2000822641", + "https://openalex.org/W2004333877", + "https://openalex.org/W2010338931", + "https://openalex.org/W2012664688", + "https://openalex.org/W2017070764", + "https://openalex.org/W2023664897", + "https://openalex.org/W2039715700", + "https://openalex.org/W2040891489", + "https://openalex.org/W2041205942", + "https://openalex.org/W2046379387", + "https://openalex.org/W2049079467", + "https://openalex.org/W2049852309", + "https://openalex.org/W2061724428", + "https://openalex.org/W2070470894", + "https://openalex.org/W2071602054", + "https://openalex.org/W2076751112", + "https://openalex.org/W2086957099", + "https://openalex.org/W2088370223", + "https://openalex.org/W2230728100", + "https://openalex.org/W3206606417" ], "related_works": [ - "https://openalex.org/W4387497383", - "https://openalex.org/W2948807893", - "https://openalex.org/W2778153218", - "https://openalex.org/W2748952813", - "https://openalex.org/W2527526854", - "https://openalex.org/W2078814861", - "https://openalex.org/W2062208111", - "https://openalex.org/W1976181487", - "https://openalex.org/W170893625", - "https://openalex.org/W1531601525" + "https://openalex.org/W4319299196", + "https://openalex.org/W3183731174", + "https://openalex.org/W3110971345", + "https://openalex.org/W3104851593", + "https://openalex.org/W2060532266", + "https://openalex.org/W2051648115", + "https://openalex.org/W2047899451", + "https://openalex.org/W2046701024", + "https://openalex.org/W2032719166", + "https://openalex.org/W1966874709" ], - "ngrams_url": "https://api.openalex.org/works/W1775749144/ngrams", + "ngrams_url": "https://api.openalex.org/works/W1981368803/ngrams", "abstract_inverted_index": { - "Since": [ + "Generalized": [ 0 ], - "1922": [ + "gradient": [ 1 ], - "when": [ + "approximations": [ 2 ], - "Wu": [ + "(GGA's)": [ 3 ], - "proposed": [ + "for": [ 4 ], "the": [ 5, - 8, - 13, - 31, + 10, 46, - 58, - 76, - 150, - 206, - 238, - 244 - ], - "use": [ + 50, + 65, + 69 + ], + "exchange-correlation": [ 6 ], - "of": [ - 7, - 15, - 20, - 33, - 60, - 110, - 115, - 119, - 123, - 152, - 158, - 189, - 195, - 214, - 240, - 282 - ], - "Folin": [ - 9, - 218, - 245 + "energy": [ + 7 ], - "phenol": [ - 10, - 221 + "improve": [ + 8 ], - "reagent": [ - 11, - 26, - 47, - 222, - 246 + "upon": [ + 9 ], - "for": [ - 12, - 30, - 72, - 85, - 98, - 135, - 149, - 212 - ], - "measurement": [ + "local": [ + 11 + ], + "spin": [ + 12 + ], + "density": [ + 13 + ], + "(LSD)": [ 14 ], - "proteins": [ + "description": [ + 15, + 63 + ], + "of": [ 16, - 34 + 26, + 45, + 64, + 68 ], - "(l),": [ + "atoms,": [ 17 ], - "a": [ - 18, - 235, - 279 + "molecules,": [ + 18 ], - "number": [ - 19 + "and": [ + 19, + 78 ], - "modified": [ + "solids.": [ + 20 + ], + "We": [ 21 ], - "analytical": [ + "present": [ 22 ], - "procedures": [ - 23 - ], - "ut.ilizing": [ - 24 - ], - "this": [ - 25, - 79, - 241 + "a": [ + 23, + 27, + 79 ], - "have": [ - 27 + "simple": [ + 24, + 28 ], - "been": [ - 28, - 104 + "derivation": [ + 25 ], - "reported": [ + "GGA,": [ 29 ], - "determination": [ - 32, - 151 - ], "in": [ - 35, - 38, - 43, - 112, - 127, - 138, - 166, - 175, - 254, - 273 - ], - "serum": [ - 36, - 262 - ], - "(2-G),": [ + 30, 37 ], - "antigen-antibody": [ - 39 + "which": [ + 31 ], - "precipitates": [ + "all": [ + 32 + ], + "parameters": [ + 33 + ], + "(other": [ + 34 + ], + "than": [ + 35 + ], + "those": [ + 36 + ], + "LSD)": [ + 38 + ], + "are": [ + 39, + 55 + ], + "fundamental": [ 40 ], - "(7-9),": [ + "constants.": [ 41 ], - "and": [ - 42, - 57, - 92, - 117, - 130, - 148, - 225, - 287 + "Only": [ + 42 + ], + "general": [ + 43 ], - "insulin": [ + "features": [ 44 ], - "(10).Although": [ - 45 + "detailed": [ + 47 ], - "would": [ + "construction": [ 48 ], - "seem": [ + "underlying": [ 49 ], - "to": [ - 50, - 95, - 249, - 268 - ], - "be": [ - 51, - 96, - 258, - 276 + "Perdew-Wang": [ + 51 ], - "recommended": [ + "1991": [ 52 ], - "by": [ + "(PW91)": [ 53 ], - "its": [ - 54, - 64, - 90, - 99 - ], - "great": [ - 55, - 70 + "GGA": [ + 54 ], - "sensitivity": [ + "invoked.": [ 56 ], - "simplicity": [ - 59 + "Improvements": [ + 57 ], - "procedure": [ - 61 + "over": [ + 58 ], - "possible": [ - 62 + "PW91": [ + 59 ], - "with": [ - 63, - 106, - 192, - 232 + "include": [ + 60 ], - "use,": [ - 65 + "an": [ + 61 ], - "it": [ - 66, - 102, - 251 + "accurate": [ + 62 ], - "has": [ - 67, - 82, - 103 + "linear": [ + 66 ], - "not": [ - 68 + "response": [ + 67 ], - "found": [ - 69 + "uniform": [ + 70, + 76 ], - "favor": [ + "electron": [ 71 ], - "general": [ + "gas,": [ + 72 + ], + "correct": [ 73 ], - "biochemical": [ + "behavior": [ 74 ], - "purposes.In": [ + "under": [ 75 ], - "belief": [ + "scaling,": [ 77 ], - "that": [ - 78, - 89 - ], - "reagent,": [ + "smoother": [ 80 ], - "nevertheless,": [ + "potential.": [ 81 - ], - "considerable": [ - 83 - ], - "merit": [ - 84 - ], - "certain": [ - 86 - ], - "application,": [ - 87 - ], - "but": [ - 88 - ], - "peculiarities": [ - 91 - ], - "limitations": [ - 93 - ], - "need": [ - 94 - ], - "understood": [ - 97 - ], - "fullest": [ - 100 - ], - "exploitation,": [ - 101 - ], - "studied": [ - 105 - ], - "regard": [ - 107 - ], - "t.o": [ - 108, - 234 - ], - "effects": [ - 109 - ], - "variations": [ - 111 - ], - "pH,": [ - 113 - ], - "time": [ - 114 - ], - "reaction,": [ - 116 - ], - "concentration": [ - 118 - ], - "reactants,": [ - 120 - ], - "permissible": [ - 121 - ], - "levels": [ - 122 - ], - "reagents": [ - 124 - ], - "commonly": [ - 125 - ], - "used": [ - 126 - ], - "handling": [ - 128 - ], - "proteins,": [ - 129 - ], - "interfering": [ - 131 - ], - "subst.ances.Procedures": [ - 132 - ], - "are": [ - 133 - ], - "described": [ - 134 - ], - "measuring": [ - 136 - ], - "protein": [ - 137 - ], - "solution": [ - 139, - 281 - ], - "or": [ - 140, - 145, - 180 - ], - "after": [ - 141, - 199 - ], - "precipitation": [ - 142 - ], - "wit,h": [ - 143 - ], - "acids": [ - 144 - ], - "other": [ - 146 - ], - "agents,": [ - 147 - ], - "as": [ - 153, - 155, - 208 - ], - "little": [ - 154 - ], - "0.2": [ - 156 - ], - "y": [ - 157, - 270 - ], - "protein.": [ - 159 - ], - "MethodReagents-Reagent": [ - 160 - ], - "A,": [ - 161 - ], - "2": [ - 162 - ], - "per": [ - 163, - 172, - 177, - 271 - ], - "cent": [ - 164, - 173, - 178 - ], - "N&OX": [ - 165 - ], - "0.10": [ - 167 - ], - "N": [ - 168, - 253 - ], - "NaOH.Reagent": [ - 169, - 215 - ], - "B,": [ - 170 - ], - "0.5": [ - 171 - ], - "CuS04.5Hz0": [ - 174 - ], - "1": [ - 176, - 193, - 200, - 252 - ], - "sodium": [ - 179 - ], - "potassium": [ - 181 - ], - "tartrabe.Reagent": [ - 182 - ], - "C,": [ - 183 - ], - "alkaline": [ - 184 - ], - "copper": [ - 185 - ], - "solution.Mix": [ - 186 - ], - "50": [ - 187 - ], - "ml.": [ - 188, - 194 - ], - "Reagent": [ - 190, - 196, - 209 - ], - "A": [ - 191 - ], - "B.": [ - 197 - ], - "Discard": [ - 198 - ], - "day.Reagent": [ - 201 - ], - "D,": [ - 202 - ], - "carbonate-copper": [ - 203 - ], - "solution,": [ - 204 - ], - "is": [ - 205 - ], - "same": [ - 207 - ], - "C": [ - 210 - ], - "except": [ - 211 - ], - "omission": [ - 213 - ], - "E,": [ - 216 - ], - "diluted": [ - 217, - 263 - ], - "reagent.Titrate": [ - 219 - ], - "Folin-Ciocalteu": [ - 220 - ], - "((II),": [ - 223 - ], - "Eimer": [ - 224 - ], - "Amend,": [ - 226 - ], - "Fisher": [ - 227 - ], - "Scientific": [ - 228 - ], - "Company,": [ - 229 - ], - "New": [ - 230 - ], - "York)": [ - 231 - ], - "NaOH": [ - 233 - ], - "phenolphthalein": [ - 236 - ], - "end-point.On": [ - 237 - ], - "basis": [ - 239 - ], - "titration": [ - 242 - ], - "dilute": [ - 243 - ], - "(about": [ - 247 - ], - "2-fold)": [ - 248 - ], - "make": [ - 250 - ], - "acid.Working": [ - 255 - ], - "standards": [ - 256 - ], - "may": [ - 257, - 275 - ], - "prepared": [ - 259 - ], - "from": [ - 260 - ], - "human": [ - 261 - ], - "IOO-to": [ - 264 - ], - "lOOO-fold": [ - 265 - ], - "(approximately": [ - 266 - ], - "700": [ - 267 - ], - "70": [ - 269 - ], - "ml.).These": [ - 272 - ], - "turn": [ - 274 - ], - "checked": [ - 277 - ], - "against": [ - 278 - ], - "standard": [ - 280 - ], - "crystalline": [ - 283 - ], - "bovine": [ - 284 - ], - "albumin": [ - 285 - ], - "(Armour": [ - 286 ] }, - "cited_by_api_url": "https://api.openalex.org/works?filter=cites:W1775749144", + "cited_by_api_url": "https://api.openalex.org/works?filter=cites:W1981368803", "counts_by_year": [ { "year": 2024, - "cited_by_count": 930 + "cited_by_count": 7184 }, { "year": 2023, - "cited_by_count": 2779 + "cited_by_count": 17542 }, { "year": 2022, - "cited_by_count": 3027 + "cited_by_count": 16694 }, { "year": 2021, - "cited_by_count": 3171 + "cited_by_count": 15502 }, { "year": 2020, - "cited_by_count": 3251 + "cited_by_count": 13823 }, { "year": 2019, - "cited_by_count": 3041 + "cited_by_count": 12184 }, { "year": 2018, - "cited_by_count": 3113 + "cited_by_count": 10853 }, { "year": 2017, - "cited_by_count": 3082 + "cited_by_count": 9921 }, { "year": 2016, - "cited_by_count": 3603 + "cited_by_count": 8821 }, { "year": 2015, - "cited_by_count": 4227 + "cited_by_count": 7847 }, { "year": 2014, - "cited_by_count": 4690 + "cited_by_count": 6836 }, { "year": 2013, - "cited_by_count": 4728 + "cited_by_count": 5974 }, { "year": 2012, - "cited_by_count": 4684 + "cited_by_count": 5168 } ], - "updated_date": "2024-05-14T18:58:37.540846", + "updated_date": "2024-05-15T09:45:55.437331", "created_date": "2016-06-24" }, { - "id": "https://openalex.org/W2100837269", - "doi": "https://doi.org/10.1038/227680a0", - "title": "Cleavage of Structural Proteins during the Assembly of the Head of Bacteriophage T4", - "display_name": "Cleavage of Structural Proteins during the Assembly of the Head of Bacteriophage T4", - "publication_year": 1970, - "publication_date": "1970-08-01", + "id": "https://openalex.org/W2107277218", + "doi": "https://doi.org/10.1006/meth.2001.1262", + "title": "Analysis of Relative Gene Expression Data Using Real-Time Quantitative PCR and the 2\u2212\u0394\u0394CT Method", + "display_name": "Analysis of Relative Gene Expression Data Using Real-Time Quantitative PCR and the 2\u2212\u0394\u0394CT Method", + "publication_year": 2001, + "publication_date": "2001-12-01", "ids": { - "openalex": "https://openalex.org/W2100837269", - "doi": "https://doi.org/10.1038/227680a0", - "mag": "2100837269", - "pmid": "https://pubmed.ncbi.nlm.nih.gov/5432063" + "openalex": "https://openalex.org/W2107277218", + "doi": "https://doi.org/10.1006/meth.2001.1262", + "mag": "2107277218", + "pmid": "https://pubmed.ncbi.nlm.nih.gov/11846609" }, "language": "en", "primary_location": { "is_oa": false, - "landing_page_url": "https://doi.org/10.1038/227680a0", + "landing_page_url": "https://doi.org/10.1006/meth.2001.1262", "pdf_url": null, "source": { - "id": "https://openalex.org/S137773608", - "display_name": "Nature", - "issn_l": "0028-0836", + "id": "https://openalex.org/S73539393", + "display_name": "Methods", + "issn_l": "1046-2023", "issn": [ - "0028-0836", - "1476-4687" + "1046-2023", + "1095-9130" ], "is_oa": false, "is_in_doaj": false, - "host_organization": "https://openalex.org/P4310319908", - "host_organization_name": "Nature Portfolio", + "host_organization": "https://openalex.org/P4310320990", + "host_organization_name": "Elsevier BV", "host_organization_lineage": [ - "https://openalex.org/P4310319908", - "https://openalex.org/P4310319965" + "https://openalex.org/P4310320990" ], "host_organization_lineage_names": [ - "Nature Portfolio", - "Springer Nature" + "Elsevier BV" ], "type": "journal" }, @@ -1208,7 +838,8 @@ "type": "article", "type_crossref": "journal-article", "indexed_in": [ - "crossref" + "crossref", + "pubmed" ], "open_access": { "is_oa": false, @@ -1220,107 +851,119 @@ { "author_position": "first", "author": { - "id": "https://openalex.org/A5030122660", - "display_name": "Ulrich K. Laemmli", - "orcid": null + "id": "https://openalex.org/A5055590197", + "display_name": "Kenneth J. Livak", + "orcid": "https://orcid.org/0000-0001-9105-5856" + }, + "institutions": [], + "countries": [ + "US" + ], + "is_corresponding": false, + "raw_author_name": "Kenneth J. Livak", + "raw_affiliation_strings": [ + "Applied Biosystems, Foster City, California, 94404" + ] + }, + { + "author_position": "last", + "author": { + "id": "https://openalex.org/A5001854674", + "display_name": "Thomas D. Schmittgen", + "orcid": "https://orcid.org/0000-0002-2812-8546" }, "institutions": [ { - "id": "https://openalex.org/I170203145", - "display_name": "MRC Laboratory of Molecular Biology", - "ror": "https://ror.org/00tw3jy02", - "country_code": "GB", - "type": "facility", + "id": "https://openalex.org/I72951846", + "display_name": "Washington State University", + "ror": "https://ror.org/05dk0ce17", + "country_code": "US", + "type": "education", "lineage": [ - "https://openalex.org/I170203145", - "https://openalex.org/I4210087105", - "https://openalex.org/I90344618" + "https://openalex.org/I72951846" ] } ], "countries": [ - "GB" + "US" ], - "is_corresponding": true, - "raw_author_name": "Ulrich K. Laemmli", + "is_corresponding": false, + "raw_author_name": "Thomas D. Schmittgen", "raw_affiliation_strings": [ - "MRC Laboratory of Molecular Biology, Hills Road, Cambridge" + "Department of Pharmaceutical Sciences, College of Pharmacy, Washington State University, Pullman, Washington, 99164-6534" ] } ], "countries_distinct_count": 1, "institutions_distinct_count": 1, - "corresponding_author_ids": [ - "https://openalex.org/A5030122660" - ], - "corresponding_institution_ids": [ - "https://openalex.org/I170203145" - ], + "corresponding_author_ids": [], + "corresponding_institution_ids": [], "apc_list": { - "value": 9750, - "currency": "EUR", - "value_usd": 11690, + "value": 3390, + "currency": "USD", + "value_usd": 3390, "provenance": "doaj" }, "apc_paid": { - "value": 9750, - "currency": "EUR", - "value_usd": 11690, + "value": 3390, + "currency": "USD", + "value_usd": 3390, "provenance": "doaj" }, - "has_fulltext": false, - "cited_by_count": 234506, + "has_fulltext": true, + "fulltext_origin": "ngrams", + "cited_by_count": 146346, "cited_by_percentile_year": { "min": 99, "max": 100 }, "biblio": { - "volume": "227", - "issue": "5259", - "first_page": "680", - "last_page": "685" + "volume": "25", + "issue": "4", + "first_page": "402", + "last_page": "408" }, "is_retracted": false, "is_paratext": false, "primary_topic": { - "id": "https://openalex.org/T11048", - "display_name": "Ecology and Evolution of Viruses in Ecosystems", - "score": 0.998, + "id": "https://openalex.org/T11970", + "display_name": "Real-Time Polymerase Chain Reaction", + "score": 1.0, "subfield": { - "id": "https://openalex.org/subfields/2303", - "display_name": "Ecology" + "id": "https://openalex.org/subfields/1312", + "display_name": "Molecular Biology" }, "field": { - "id": "https://openalex.org/fields/23", - "display_name": "Environmental Science" + "id": "https://openalex.org/fields/13", + "display_name": "Biochemistry, Genetics and Molecular Biology" }, "domain": { - "id": "https://openalex.org/domains/3", - "display_name": "Physical Sciences" + "id": "https://openalex.org/domains/1", + "display_name": "Life Sciences" } }, "topics": [ { - "id": "https://openalex.org/T11048", - "display_name": "Ecology and Evolution of Viruses in Ecosystems", - "score": 0.998, + "id": "https://openalex.org/T11970", + "display_name": "Real-Time Polymerase Chain Reaction", + "score": 1.0, "subfield": { - "id": "https://openalex.org/subfields/2303", - "display_name": "Ecology" + "id": "https://openalex.org/subfields/1312", + "display_name": "Molecular Biology" }, "field": { - "id": "https://openalex.org/fields/23", - "display_name": "Environmental Science" + "id": "https://openalex.org/fields/13", + "display_name": "Biochemistry, Genetics and Molecular Biology" }, "domain": { - "id": "https://openalex.org/domains/3", - "display_name": "Physical Sciences" + "id": "https://openalex.org/domains/1", + "display_name": "Life Sciences" } }, { - "id": "https://openalex.org/T10015", - "display_name": "RNA Sequencing Data Analysis", - "score": 0.9838, + "id": "https://openalex.org/T10604", + "display_name": "Regulation of RNA Processing and Function", + "score": 0.963, "subfield": { "id": "https://openalex.org/subfields/1312", "display_name": "Molecular Biology" @@ -1335,9 +978,9 @@ } }, { - "id": "https://openalex.org/T10044", - "display_name": "Protein Structure Prediction and Analysis", - "score": 0.9836, + "id": "https://openalex.org/T10885", + "display_name": "Microarray Data Analysis and Gene Expression Profiling", + "score": 0.9514, "subfield": { "id": "https://openalex.org/subfields/1312", "display_name": "Molecular Biology" @@ -1354,123 +997,225 @@ ], "keywords": [ { - "id": "https://openalex.org/keywords/bacteriophage", - "display_name": "Bacteriophage", - "score": 0.499433 + "id": "https://openalex.org/keywords/real-time-pcr", + "display_name": "Real-Time PCR", + "score": 0.57883 + }, + { + "id": "https://openalex.org/keywords/quantitative", + "display_name": "Quantitative", + "score": 0.528305 } ], "concepts": [ { - "id": "https://openalex.org/C2776441376", - "wikidata": "https://www.wikidata.org/wiki/Q165028", - "display_name": "Bacteriophage", - "level": 4, - "score": 0.8466327 - }, - { - "id": "https://openalex.org/C175156509", - "wikidata": "https://www.wikidata.org/wiki/Q1990218", - "display_name": "Cleavage (geology)", + "id": "https://openalex.org/C48023723", + "wikidata": "https://www.wikidata.org/wiki/Q856198", + "display_name": "Real-time polymerase chain reaction", "level": 3, - "score": 0.73753864 + "score": 0.69824404 }, { - "id": "https://openalex.org/C116084860", - "wikidata": "https://www.wikidata.org/wiki/Q48255", - "display_name": "Gel electrophoresis", + "id": "https://openalex.org/C95986675", + "wikidata": "https://www.wikidata.org/wiki/Q185168", + "display_name": "Quantitative analysis (chemistry)", "level": 2, - "score": 0.48121265 + "score": 0.5139794 }, { - "id": "https://openalex.org/C185592680", - "wikidata": "https://www.wikidata.org/wiki/Q2329", - "display_name": "Chemistry", - "level": 0, - "score": 0.46676797 + "id": "https://openalex.org/C5072461", + "wikidata": "https://www.wikidata.org/wiki/Q49506", + "display_name": "Delta", + "level": 2, + "score": 0.51295155 }, { - "id": "https://openalex.org/C2780312720", - "wikidata": "https://www.wikidata.org/wiki/Q5689100", - "display_name": "Head (geology)", - "level": 2, - "score": 0.4361011 + "id": "https://openalex.org/C70721500", + "wikidata": "https://www.wikidata.org/wiki/Q177005", + "display_name": "Computational biology", + "level": 1, + "score": 0.49055308 }, { - "id": "https://openalex.org/C104317684", - "wikidata": "https://www.wikidata.org/wiki/Q7187", - "display_name": "Gene", + "id": "https://openalex.org/C198531522", + "wikidata": "https://www.wikidata.org/wiki/Q485146", + "display_name": "Sample (material)", "level": 2, - "score": 0.41767645 + "score": 0.41298348 }, { "id": "https://openalex.org/C86803240", "wikidata": "https://www.wikidata.org/wiki/Q420", "display_name": "Biology", "level": 0, - "score": 0.39367613 + "score": 0.39520985 }, { - "id": "https://openalex.org/C55493867", - "wikidata": "https://www.wikidata.org/wiki/Q7094", - "display_name": "Biochemistry", + "id": "https://openalex.org/C186060115", + "wikidata": "https://www.wikidata.org/wiki/Q30336093", + "display_name": "Biological system", "level": 1, - "score": 0.3236053 + "score": 0.35528326 }, { - "id": "https://openalex.org/C70721500", - "wikidata": "https://www.wikidata.org/wiki/Q177005", - "display_name": "Computational biology", + "id": "https://openalex.org/C41008148", + "wikidata": "https://www.wikidata.org/wiki/Q21198", + "display_name": "Computer science", + "level": 0, + "score": 0.33155608 + }, + { + "id": "https://openalex.org/C153911025", + "wikidata": "https://www.wikidata.org/wiki/Q7202", + "display_name": "Molecular biology", "level": 1, - "score": 0.32033193 + "score": 0.3302499 }, { - "id": "https://openalex.org/C547475151", - "wikidata": "https://www.wikidata.org/wiki/Q25419", - "display_name": "Escherichia coli", - "level": 3, - "score": 0.1328496 + "id": "https://openalex.org/C104317684", + "wikidata": "https://www.wikidata.org/wiki/Q7187", + "display_name": "Gene", + "level": 2, + "score": 0.32891792 }, { - "id": "https://openalex.org/C151730666", - "wikidata": "https://www.wikidata.org/wiki/Q7205", - "display_name": "Paleontology", + "id": "https://openalex.org/C43617362", + "wikidata": "https://www.wikidata.org/wiki/Q170050", + "display_name": "Chromatography", "level": 1, - "score": 0.0 + "score": 0.22815666 }, { - "id": "https://openalex.org/C43369102", - "wikidata": "https://www.wikidata.org/wiki/Q2307625", - "display_name": "Fracture (geology)", - "level": 2, + "id": "https://openalex.org/C185592680", + "wikidata": "https://www.wikidata.org/wiki/Q2329", + "display_name": "Chemistry", + "level": 0, + "score": 0.22768846 + }, + { + "id": "https://openalex.org/C54355233", + "wikidata": "https://www.wikidata.org/wiki/Q7162", + "display_name": "Genetics", + "level": 1, + "score": 0.22186163 + }, + { + "id": "https://openalex.org/C121332964", + "wikidata": "https://www.wikidata.org/wiki/Q413", + "display_name": "Physics", + "level": 0, + "score": 0.17523903 + }, + { + "id": "https://openalex.org/C1276947", + "wikidata": "https://www.wikidata.org/wiki/Q333", + "display_name": "Astronomy", + "level": 1, "score": 0.0 } ], - "mesh": [], - "locations_count": 1, + "mesh": [ + { + "descriptor_ui": "D016133", + "descriptor_name": "Polymerase Chain Reaction", + "qualifier_ui": "Q000379", + "qualifier_name": "methods", + "is_major_topic": true + }, + { + "descriptor_ui": "D020133", + "descriptor_name": "Reverse Transcriptase Polymerase Chain Reaction", + "qualifier_ui": "", + "qualifier_name": null, + "is_major_topic": true + }, + { + "descriptor_ui": "D000465", + "descriptor_name": "Algorithms", + "qualifier_ui": "", + "qualifier_name": null, + "is_major_topic": false + }, + { + "descriptor_ui": "D001921", + "descriptor_name": "Brain", + "qualifier_ui": "", + "qualifier_name": null, + "is_major_topic": false + }, + { + "descriptor_ui": "D001921", + "descriptor_name": "Brain", + "qualifier_ui": "Q000378", + "qualifier_name": "metabolism", + "is_major_topic": false + }, + { + "descriptor_ui": "D002460", + "descriptor_name": "Cell Line", + "qualifier_ui": "", + "qualifier_name": null, + "is_major_topic": false + }, + { + "descriptor_ui": "D018076", + "descriptor_name": "DNA, Complementary", + "qualifier_ui": "", + "qualifier_name": null, + "is_major_topic": false + }, + { + "descriptor_ui": "D018076", + "descriptor_name": "DNA, Complementary", + "qualifier_ui": "Q000378", + "qualifier_name": "metabolism", + "is_major_topic": false + }, + { + "descriptor_ui": "D006801", + "descriptor_name": "Humans", + "qualifier_ui": "", + "qualifier_name": null, + "is_major_topic": false + }, + { + "descriptor_ui": "D016133", + "descriptor_name": "Polymerase Chain Reaction", + "qualifier_ui": "", + "qualifier_name": null, + "is_major_topic": false + }, + { + "descriptor_ui": "D013997", + "descriptor_name": "Time Factors", + "qualifier_ui": "", + "qualifier_name": null, + "is_major_topic": false + } + ], + "locations_count": 2, "locations": [ { "is_oa": false, - "landing_page_url": "https://doi.org/10.1038/227680a0", + "landing_page_url": "https://doi.org/10.1006/meth.2001.1262", "pdf_url": null, "source": { - "id": "https://openalex.org/S137773608", - "display_name": "Nature", - "issn_l": "0028-0836", + "id": "https://openalex.org/S73539393", + "display_name": "Methods", + "issn_l": "1046-2023", "issn": [ - "0028-0836", - "1476-4687" + "1046-2023", + "1095-9130" ], "is_oa": false, "is_in_doaj": false, - "host_organization": "https://openalex.org/P4310319908", - "host_organization_name": "Nature Portfolio", + "host_organization": "https://openalex.org/P4310320990", + "host_organization_name": "Elsevier BV", "host_organization_lineage": [ - "https://openalex.org/P4310319908", - "https://openalex.org/P4310319965" + "https://openalex.org/P4310320990" ], "host_organization_lineage_names": [ - "Nature Portfolio", - "Springer Nature" + "Elsevier BV" ], "type": "journal" }, @@ -1479,104 +1224,403 @@ "version": null, "is_accepted": false, "is_published": false + }, + { + "is_oa": false, + "landing_page_url": "https://pubmed.ncbi.nlm.nih.gov/11846609", + "pdf_url": null, + "source": { + "id": "https://openalex.org/S4306525036", + "display_name": "PubMed", + "issn_l": null, + "issn": null, + "is_oa": false, + "is_in_doaj": false, + "host_organization": "https://openalex.org/I1299303238", + "host_organization_name": "National Institutes of Health", + "host_organization_lineage": [ + "https://openalex.org/I1299303238" + ], + "host_organization_lineage_names": [ + "National Institutes of Health" + ], + "type": "repository" + }, + "license": null, + "license_id": null, + "version": null, + "is_accepted": false, + "is_published": false } ], "best_oa_location": null, "sustainable_development_goals": [], "grants": [], - "referenced_works_count": 20, + "datasets": [], + "versions": [], + "referenced_works_count": 10, "referenced_works": [ - "https://openalex.org/W1510011675", - "https://openalex.org/W1591624429", - "https://openalex.org/W1625660792", - "https://openalex.org/W1974447189", - "https://openalex.org/W1981333007", - "https://openalex.org/W1987077239", - "https://openalex.org/W1999197280", - "https://openalex.org/W2009651234", - "https://openalex.org/W2016311541", - "https://openalex.org/W2030138849", - "https://openalex.org/W2032258362", - "https://openalex.org/W2050132031", - "https://openalex.org/W2051897666", - "https://openalex.org/W2056070745", - "https://openalex.org/W2058000726", - "https://openalex.org/W2065627978", - "https://openalex.org/W2088608143", - "https://openalex.org/W2111301002", - "https://openalex.org/W2330047747", - "https://openalex.org/W2886607623" + "https://openalex.org/W1983241347", + "https://openalex.org/W2014588425", + "https://openalex.org/W2069943574", + "https://openalex.org/W2109970232", + "https://openalex.org/W2123325948", + "https://openalex.org/W2128088040", + "https://openalex.org/W2134343377", + "https://openalex.org/W2145879504", + "https://openalex.org/W2164578725", + "https://openalex.org/W4232459722" ], "related_works": [ - "https://openalex.org/W3097502728", - "https://openalex.org/W4220903595", - "https://openalex.org/W1846827988", - "https://openalex.org/W2081138942", - "https://openalex.org/W2914091207", - "https://openalex.org/W2078051322", - "https://openalex.org/W1999543360", - "https://openalex.org/W2127903644", - "https://openalex.org/W4292513206", - "https://openalex.org/W2380469046" + "https://openalex.org/W4297616317", + "https://openalex.org/W4297616267", + "https://openalex.org/W4297615481", + "https://openalex.org/W4239373651", + "https://openalex.org/W4221046490", + "https://openalex.org/W3195406774", + "https://openalex.org/W3149097030", + "https://openalex.org/W3111070561", + "https://openalex.org/W2502651140", + "https://openalex.org/W2381983889" ], - "ngrams_url": "https://api.openalex.org/works/W2100837269/ngrams", - "abstract_inverted_index": null, - "cited_by_api_url": "https://api.openalex.org/works?filter=cites:W2100837269", + "ngrams_url": "https://api.openalex.org/works/W2107277218/ngrams", + "abstract_inverted_index": { + "The": [ + 0, + 61, + 81 + ], + "two": [ + 1, + 107 + ], + "most": [ + 2 + ], + "commonly": [ + 3 + ], + "used": [ + 4 + ], + "methods": [ + 5 + ], + "to": [ + 6, + 33, + 51, + 68, + 87 + ], + "analyze": [ + 7, + 69 + ], + "data": [ + 8 + ], + "from": [ + 9, + 76 + ], + "real-time,": [ + 10, + 121 + ], + "quantitative": [ + 11, + 78, + 122 + ], + "PCR": [ + 12, + 31, + 41, + 79, + 123 + ], + "experiments": [ + 13 + ], + "are": [ + 14 + ], + "absolute": [ + 15 + ], + "quantification": [ + 16, + 21, + 38 + ], + "and": [ + 17, + 92, + 104 + ], + "relative": [ + 18, + 71 + ], + "quantification.": [ + 19 + ], + "Absolute": [ + 20 + ], + "determines": [ + 22 + ], + "the": [ + 23, + 30, + 40, + 44, + 70, + 89, + 95, + 102, + 110, + 118 + ], + "input": [ + 24 + ], + "copy": [ + 25 + ], + "number,": [ + 26 + ], + "usually": [ + 27 + ], + "by": [ + 28 + ], + "relating": [ + 29 + ], + "signal": [ + 32, + 42 + ], + "a": [ + 34, + 48, + 65 + ], + "standard": [ + 35 + ], + "curve.": [ + 36 + ], + "Relative": [ + 37 + ], + "relates": [ + 39 + ], + "of": [ + 43, + 53, + 83, + 94, + 106, + 109, + 120 + ], + "target": [ + 45 + ], + "transcript": [ + 46 + ], + "in": [ + 47, + 73, + 117 + ], + "treatment": [ + 49 + ], + "group": [ + 50 + ], + "that": [ + 52, + 113 + ], + "another": [ + 54 + ], + "sample": [ + 55 + ], + "such": [ + 56 + ], + "as": [ + 57 + ], + "an": [ + 58 + ], + "untreated": [ + 59 + ], + "control.": [ + 60 + ], + "2\u2212\u0394\u0394CT": [ + 62, + 96, + 111 + ], + "method": [ + 63, + 112 + ], + "is": [ + 64, + 86 + ], + "convenient": [ + 66 + ], + "way": [ + 67 + ], + "changes": [ + 72 + ], + "gene": [ + 74 + ], + "expression": [ + 75 + ], + "real-time": [ + 77 + ], + "experiments.": [ + 80 + ], + "purpose": [ + 82 + ], + "this": [ + 84 + ], + "report": [ + 85 + ], + "present": [ + 88, + 101 + ], + "derivation,": [ + 90 + ], + "assumptions,": [ + 91 + ], + "applications": [ + 93, + 105 + ], + "method.": [ + 97 + ], + "In": [ + 98 + ], + "addition,": [ + 99 + ], + "we": [ + 100 + ], + "derivation": [ + 103 + ], + "variations": [ + 108 + ], + "may": [ + 114 + ], + "be": [ + 115 + ], + "useful": [ + 116 + ], + "analysis": [ + 119 + ], + "data.": [ + 124 + ] + }, + "cited_by_api_url": "https://api.openalex.org/works?filter=cites:W2107277218", "counts_by_year": [ { "year": 2024, - "cited_by_count": 662 + "cited_by_count": 4588 }, { "year": 2023, - "cited_by_count": 1879 + "cited_by_count": 13082 }, { "year": 2022, - "cited_by_count": 2231 + "cited_by_count": 14530 }, { "year": 2021, - "cited_by_count": 2410 + "cited_by_count": 13942 }, { "year": 2020, - "cited_by_count": 2560 + "cited_by_count": 13022 }, { "year": 2019, - "cited_by_count": 2469 + "cited_by_count": 12263 }, { "year": 2018, - "cited_by_count": 2690 + "cited_by_count": 11221 }, { "year": 2017, - "cited_by_count": 2767 + "cited_by_count": 9988 }, { "year": 2016, - "cited_by_count": 3085 + "cited_by_count": 8488 }, { "year": 2015, - "cited_by_count": 3545 + "cited_by_count": 7654 }, { "year": 2014, - "cited_by_count": 3906 + "cited_by_count": 6898 }, { "year": 2013, - "cited_by_count": 4387 + "cited_by_count": 6254 }, { "year": 2012, - "cited_by_count": 4254 + "cited_by_count": 5452 } ], - "updated_date": "2024-05-08T22:51:42.412644", + "updated_date": "2024-05-15T22:29:26.487685", "created_date": "2016-06-24" } ], diff --git a/src/paper/tests/test_process_openalex_works.py b/src/paper/tests/test_process_openalex_works.py index e213dff4b..e44a80ad9 100644 --- a/src/paper/tests/test_process_openalex_works.py +++ b/src/paper/tests/test_process_openalex_works.py @@ -1,10 +1,12 @@ import json +from unittest.mock import patch from rest_framework.test import APITestCase from paper.models import Paper from paper.openalex_util import process_openalex_works from user.related_models.author_model import Author +from utils.openalex import OpenAlex class ProcessOpenAlexWorksTests(APITestCase): @@ -103,3 +105,19 @@ def create_authorship_institutions_when_processing_work(self): authorship = paper.authorships.first() institutions = authorship.institutions.all() self.assertGreater(len(institutions), 0) + + @patch.object(OpenAlex, "get_authors") + def test_add_orcid_to_author_when_processing_work(self, mock_get_authors): + # Note: In actuality orcid value could be null but the payload in this + # test has an orcid value in order to test if orcid is set properly when exists in payload + + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) + + open_alex = OpenAlex() + open_alex.get_authors() + + process_openalex_works(self.works) + author = Author.objects.filter(orcid_id__isnull=False).first() + self.assertIsNotNone(author.orcid_id) From f1f481fd6c718d1efdba27f6f7faeb6284790fb7 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Mon, 20 May 2024 16:26:41 -0400 Subject: [PATCH 11/32] [Author] Test to ensure summary stats such as h_index are set --- src/paper/tests/test_process_openalex_works.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/paper/tests/test_process_openalex_works.py b/src/paper/tests/test_process_openalex_works.py index e44a80ad9..c80d33f42 100644 --- a/src/paper/tests/test_process_openalex_works.py +++ b/src/paper/tests/test_process_openalex_works.py @@ -121,3 +121,21 @@ def test_add_orcid_to_author_when_processing_work(self, mock_get_authors): process_openalex_works(self.works) author = Author.objects.filter(orcid_id__isnull=False).first() self.assertIsNotNone(author.orcid_id) + + @patch.object(OpenAlex, "get_authors") + def test_author_summary_stats_are_set(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) + + open_alex = OpenAlex() + open_alex.get_authors() + + process_openalex_works(self.works) + author = Author.objects.filter( + openalex_ids__contains=[mock_data["results"][0]["id"]] + ).first() + + self.assertGreater(author.h_index, 0) + self.assertGreater(author.two_year_mean_citedness, 0) + self.assertGreater(author.i10_index, 0) From 838d926857feace4fbe472eddfdd7f0a4e4d80d1 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Mon, 20 May 2024 20:06:30 -0400 Subject: [PATCH 12/32] [Author] Adding Co-author model --- src/user/migrations/0109_coauthor.py | 56 +++++++++++++++++++++++ src/user/related_models/author_model.py | 1 + src/user/related_models/coauthor_model.py | 16 +++++++ 3 files changed, 73 insertions(+) create mode 100644 src/user/migrations/0109_coauthor.py create mode 100644 src/user/related_models/coauthor_model.py diff --git a/src/user/migrations/0109_coauthor.py b/src/user/migrations/0109_coauthor.py new file mode 100644 index 000000000..0c37c9635 --- /dev/null +++ b/src/user/migrations/0109_coauthor.py @@ -0,0 +1,56 @@ +# Generated by Django 4.1 on 2024-05-21 00:05 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("paper", "0126_authorship_delete_workauthorship"), + ("user", "0108_delete_authorcitation"), + ] + + operations = [ + migrations.CreateModel( + name="CoAuthor", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "author", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="coauthor_relationships", + to="user.author", + ), + ), + ( + "coauthor", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="coauthored_with", + to="user.author", + ), + ), + ( + "paper", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="coauthorships", + to="paper.paper", + ), + ), + ], + options={ + "unique_together": {("author", "coauthor", "paper")}, + }, + ), + ] diff --git a/src/user/related_models/author_model.py b/src/user/related_models/author_model.py index a222a2dd6..75a1a122e 100644 --- a/src/user/related_models/author_model.py +++ b/src/user/related_models/author_model.py @@ -10,6 +10,7 @@ from purchase.related_models.purchase_model import Purchase from researchhub_case.constants.case_constants import APPROVED from user.related_models.author_institution import AuthorInstitution +from user.related_models.coauthor_model import CoAuthor from user.related_models.profile_image_storage import ProfileImageStorage from user.related_models.school_model import University from user.related_models.user_model import User diff --git a/src/user/related_models/coauthor_model.py b/src/user/related_models/coauthor_model.py new file mode 100644 index 000000000..478c4741e --- /dev/null +++ b/src/user/related_models/coauthor_model.py @@ -0,0 +1,16 @@ +from django.db import models + + +class CoAuthor(models.Model): + author = models.ForeignKey( + "user.Author", related_name="coauthor_relationships", on_delete=models.CASCADE + ) + coauthor = models.ForeignKey( + "user.Author", related_name="coauthored_with", on_delete=models.CASCADE + ) + paper = models.ForeignKey( + "paper.Paper", related_name="coauthorships", on_delete=models.CASCADE + ) + + class Meta: + unique_together = ("author", "coauthor", "paper") From f92be6ac156f8fcc7a0e577861b1280bccd0752c Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Mon, 20 May 2024 20:59:47 -0400 Subject: [PATCH 13/32] [Author] updating tests to include mock author data --- src/paper/openalex_util.py | 12 ++ .../tests/test_process_openalex_works.py | 188 ++++++++++++------ .../migrations/0110_alter_coauthor_author.py | 23 +++ src/user/related_models/coauthor_model.py | 2 +- 4 files changed, 158 insertions(+), 67 deletions(-) create mode 100644 src/user/migrations/0110_alter_coauthor_author.py diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index 3b7c9bba5..960900c4b 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -5,6 +5,7 @@ from django.db.models import Q import utils.sentry as sentry +from user.related_models.coauthor_model import CoAuthor from utils.openalex import OpenAlex # Only these particular fields will be updated when an OpenAlex @@ -178,6 +179,7 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): related_paper = Paper.objects.get(id=related_paper_id) authors_need_additional_data_fetch = [] + authors_in_this_work = [] for oa_authorship in openalex_authorships: author_position = oa_authorship.get("author_position") author_openalex_id = oa_authorship.get("author", {}).get("id") @@ -208,6 +210,8 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): raw_author_name=oa_authorship.get("author", {}).get("display_name"), ) + authors_in_this_work.append(author) + # Set institutions associated with authorships if they do not already exist for oa_inst in oa_authorship.get("institutions", []): institution = Institution.upsert_from_openalex(oa_inst) @@ -234,3 +238,11 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): # Associate paper with author related_paper.authors.add(author) + + # Create co-author relationships + for i, author in enumerate(authors_in_this_work): + for coauthor in authors_in_this_work: + if author != coauthor: + CoAuthor.objects.get_or_create( + author=author, coauthor=coauthor, paper_id=related_paper_id + ) diff --git a/src/paper/tests/test_process_openalex_works.py b/src/paper/tests/test_process_openalex_works.py index c80d33f42..c136abe65 100644 --- a/src/paper/tests/test_process_openalex_works.py +++ b/src/paper/tests/test_process_openalex_works.py @@ -15,96 +15,136 @@ def setUp(self): response = json.load(file) self.works = response.get("results") - def test_create_papers_from_openalex_works(self): - process_openalex_works(self.works) + @patch.object(OpenAlex, "get_authors") + def test_create_papers_from_openalex_works(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) - dois = [work.get("doi") for work in self.works] - dois = [doi.replace("https://doi.org/", "") for doi in dois] + process_openalex_works(self.works) - created_papers = Paper.objects.filter(doi__in=dois) - self.assertEqual(len(created_papers), 2) + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] - def test_creating_papers_should_create_related_topics(self): - process_openalex_works(self.works) + created_papers = Paper.objects.filter(doi__in=dois) + self.assertEqual(len(created_papers), 2) - dois = [work.get("doi") for work in self.works] - dois = [doi.replace("https://doi.org/", "") for doi in dois] - created_papers = Paper.objects.filter(doi__in=dois) + @patch.object(OpenAlex, "get_authors") + def test_creating_papers_should_create_related_topics(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) - # Sample the first paper to ensure it has concepts - paper_concepts = created_papers.first().unified_document.concepts.all() - self.assertGreater(len(paper_concepts), 0) + process_openalex_works(self.works) - def test_creating_papers_should_create_related_concepts(self): - process_openalex_works(self.works) + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + created_papers = Paper.objects.filter(doi__in=dois) - dois = [work.get("doi") for work in self.works] - dois = [doi.replace("https://doi.org/", "") for doi in dois] - created_papers = Paper.objects.filter(doi__in=dois) + # Sample the first paper to ensure it has concepts + paper_concepts = created_papers.first().unified_document.concepts.all() + self.assertGreater(len(paper_concepts), 0) - # Sample the first paper to ensure it has topics - paper_topics = created_papers.first().unified_document.topics.all() - self.assertGreater(len(paper_topics), 0) + @patch.object(OpenAlex, "get_authors") + def test_creating_papers_should_create_related_concepts(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) - def test_creating_papers_should_create_related_hubs(self): - process_openalex_works(self.works) + process_openalex_works(self.works) - dois = [work.get("doi") for work in self.works] - dois = [doi.replace("https://doi.org/", "") for doi in dois] - created_papers = Paper.objects.filter(doi__in=dois) + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + created_papers = Paper.objects.filter(doi__in=dois) - # Sample the first paper to ensure it has topics - paper_hubs = created_papers.first().unified_document.hubs.all() - self.assertGreater(len(paper_hubs), 0) + # Sample the first paper to ensure it has topics + paper_topics = created_papers.first().unified_document.topics.all() + self.assertGreater(len(paper_topics), 0) - def test_updating_existing_papers_from_openalex_works(self): - # First create paper - work = self.works[0] - work["title"] = "Old title" - process_openalex_works([work]) + @patch.object(OpenAlex, "get_authors") + def test_creating_papers_should_create_related_hubs(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) - # Update paper - work["title"] = "New title" - process_openalex_works([work]) + process_openalex_works(self.works) - dois = [work.get("doi") for work in self.works] - dois = [doi.replace("https://doi.org/", "") for doi in dois] - updated_paper = Paper.objects.filter(doi__in=dois).first() + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + created_papers = Paper.objects.filter(doi__in=dois) - self.assertEqual(updated_paper.title, "New title") - self.assertEqual(updated_paper.paper_title, "New title") + # Sample the first paper to ensure it has topics + paper_hubs = created_papers.first().unified_document.hubs.all() + self.assertGreater(len(paper_hubs), 0) - def test_create_authors_when_processing_work(self): - process_openalex_works(self.works) + @patch.object(OpenAlex, "get_authors") + def test_updating_existing_papers_from_openalex_works(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) - dois = [work.get("doi") for work in self.works] - dois = [doi.replace("https://doi.org/", "") for doi in dois] - created_papers = Paper.objects.filter(doi__in=dois) + # First create paper + work = self.works[0] + work["title"] = "Old title" + process_openalex_works([work]) - # Sample the first paper to ensure it has authors - paper_authors = created_papers.first().authors.all() - self.assertGreater(len(paper_authors), 0) + # Update paper + work["title"] = "New title" + process_openalex_works([work]) - def test_create_authorships_when_processing_work(self): - process_openalex_works(self.works) + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + updated_paper = Paper.objects.filter(doi__in=dois).first() - dois = [work.get("doi") for work in self.works] - dois = [doi.replace("https://doi.org/", "") for doi in dois] - paper = Paper.objects.filter(doi__in=dois).first() + self.assertEqual(updated_paper.title, "New title") + self.assertEqual(updated_paper.paper_title, "New title") - authorships = paper.authorships.all() - self.assertGreater(len(authorships), 0) + @patch.object(OpenAlex, "get_authors") + def test_create_authors_when_processing_work(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) - def create_authorship_institutions_when_processing_work(self): - process_openalex_works(self.works) + process_openalex_works(self.works) - dois = [work.get("doi") for work in self.works] - dois = [doi.replace("https://doi.org/", "") for doi in dois] - paper = Paper.objects.filter(doi__in=dois).first() + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + created_papers = Paper.objects.filter(doi__in=dois) - authorship = paper.authorships.first() - institutions = authorship.institutions.all() - self.assertGreater(len(institutions), 0) + # Sample the first paper to ensure it has authors + paper_authors = created_papers.first().authors.all() + self.assertGreater(len(paper_authors), 0) + + @patch.object(OpenAlex, "get_authors") + def test_create_authorships_when_processing_work(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) + + process_openalex_works(self.works) + + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + paper = Paper.objects.filter(doi__in=dois).first() + + authorships = paper.authorships.all() + self.assertGreater(len(authorships), 0) + + @patch.object(OpenAlex, "get_authors") + def create_authorship_institutions_when_processing_work(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) + + process_openalex_works(self.works) + + dois = [work.get("doi") for work in self.works] + dois = [doi.replace("https://doi.org/", "") for doi in dois] + paper = Paper.objects.filter(doi__in=dois).first() + + authorship = paper.authorships.first() + institutions = authorship.institutions.all() + self.assertGreater(len(institutions), 0) @patch.object(OpenAlex, "get_authors") def test_add_orcid_to_author_when_processing_work(self, mock_get_authors): @@ -139,3 +179,19 @@ def test_author_summary_stats_are_set(self, mock_get_authors): self.assertGreater(author.h_index, 0) self.assertGreater(author.two_year_mean_citedness, 0) self.assertGreater(author.i10_index, 0) + + @patch.object(OpenAlex, "get_authors") + def test_create_coauthor_relationship(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) + + open_alex = OpenAlex() + open_alex.get_authors() + + process_openalex_works(self.works) + author = Author.objects.filter( + openalex_ids__contains=[mock_data["results"][0]["id"]] + ).first() + + self.assertGreater(author.coauthors.count(), 0) diff --git a/src/user/migrations/0110_alter_coauthor_author.py b/src/user/migrations/0110_alter_coauthor_author.py new file mode 100644 index 000000000..4f48deef1 --- /dev/null +++ b/src/user/migrations/0110_alter_coauthor_author.py @@ -0,0 +1,23 @@ +# Generated by Django 4.1 on 2024-05-21 00:39 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0109_coauthor"), + ] + + operations = [ + migrations.AlterField( + model_name="coauthor", + name="author", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="coauthors", + to="user.author", + ), + ), + ] diff --git a/src/user/related_models/coauthor_model.py b/src/user/related_models/coauthor_model.py index 478c4741e..14b3f0b88 100644 --- a/src/user/related_models/coauthor_model.py +++ b/src/user/related_models/coauthor_model.py @@ -3,7 +3,7 @@ class CoAuthor(models.Model): author = models.ForeignKey( - "user.Author", related_name="coauthor_relationships", on_delete=models.CASCADE + "user.Author", related_name="coauthors", on_delete=models.CASCADE ) coauthor = models.ForeignKey( "user.Author", related_name="coauthored_with", on_delete=models.CASCADE From 23025d26f3dea178e2a7503fb9774a2c3b8f863c Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Mon, 20 May 2024 23:14:00 -0400 Subject: [PATCH 14/32] [Author] Fixing issue where missing authorships would fetch random authors --- src/paper/openalex_util.py | 48 +++++++++++++++++++++++++++++++++----- src/utils/openalex.py | 4 ++-- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index 960900c4b..0db743617 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -1,10 +1,12 @@ import copy +import logging from django.core.exceptions import ValidationError from django.db import IntegrityError, transaction from django.db.models import Q import utils.sentry as sentry +from user.related_models.author_institution import AuthorInstitution from user.related_models.coauthor_model import CoAuthor from utils.openalex import OpenAlex @@ -200,6 +202,11 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): openalex_ids=[author_openalex_id], ) Wallet.objects.create(author=author) + except Exception as e: + continue + + # Associate paper with author + related_paper.authors.add(author) # Find or create authorship authorship, created = Authorship.objects.get_or_create( @@ -219,13 +226,27 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): authorship.institutions.add(institution) # Update authors with additional metadata from OpenAlex - oa_authors, _ = open_alex.get_authors( - openalex_ids=authors_need_additional_data_fetch - ) + oa_authors = [] + if len(authors_need_additional_data_fetch) > 0: + oa_authors, _ = open_alex.get_authors( + openalex_ids=authors_need_additional_data_fetch + ) + + print("authors_need_additional_data_fetch", authors_need_additional_data_fetch) + print("oa_authors", [a["id"] for a in oa_authors]) + for oa_author in oa_authors: try: author = Author.objects.get(openalex_ids__contains=[oa_author.get("id")]) - except Author.DoesNotExist: + except Author.DoesNotExist as e: + # This should not happen but hey, anything can happen! + logging.warning( + f"Author with OpenAlex ID not found: {oa_author.get('id')}", + ) + sentry.log_error( + e, + message=f"Author with OpenAlex ID {oa_author.get('id')} not found", + ) continue author.i10_index = oa_author.get("summary_stats", {}).get("i10_index") @@ -236,8 +257,23 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): author.orcid_id = oa_author.get("orcid") author.save() - # Associate paper with author - related_paper.authors.add(author) + # Load all the institutions author is associated with + affiliations = oa_author.get("affiliations", []) + for affiliation in affiliations: + oa_institution = affiliation.get("institution") + years = affiliation.get("years", []) + + institution = None + try: + institution = Institution.objects.get(openalex_id=oa_institution["id"]) + except Institution.DoesNotExist as e: + continue + + author_inst = AuthorInstitution.objects.get_or_create( + author=author, + institution=institution, + years=years, + ) # Create co-author relationships for i, author in enumerate(authors_in_this_work): diff --git a/src/utils/openalex.py b/src/utils/openalex.py index a31a8841d..4e7fb1bcd 100644 --- a/src/utils/openalex.py +++ b/src/utils/openalex.py @@ -319,7 +319,7 @@ def get_authors( # Build the filter oa_filters = [] - if openalex_ids: + if isinstance(openalex_ids, list): oa_filters.append(f"ids.openalex:{'|'.join(openalex_ids)}") filters = { @@ -356,7 +356,7 @@ def get_works( formatted_date = since_date.strftime("%Y-%m-%d") oa_filters.append(f"from_created_date:{formatted_date}") - if openalex_ids: + if isinstance(openalex_ids, list): oa_filters.append(f"ids.openalex:{'|'.join(openalex_ids)}") filters = { From 310d687936b5e542c303769803722cafa93cd6c2 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Mon, 20 May 2024 23:17:57 -0400 Subject: [PATCH 15/32] Removing print statements --- src/paper/openalex_util.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index 0db743617..bc50c8941 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -232,9 +232,6 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): openalex_ids=authors_need_additional_data_fetch ) - print("authors_need_additional_data_fetch", authors_need_additional_data_fetch) - print("oa_authors", [a["id"] for a in oa_authors]) - for oa_author in oa_authors: try: author = Author.objects.get(openalex_ids__contains=[oa_author.get("id")]) From 49807793c4aa296fadea5c4ec841290acc8ba89f Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Tue, 21 May 2024 17:14:59 -0400 Subject: [PATCH 16/32] [Author] Removing unique institutions from orcid, author institution --- src/paper/openalex_util.py | 2 + src/paper/paper_upload_tasks.py | 18 ++++--- ...84_alter_distribution_distribution_type.py | 52 +++++++++++++++++++ .../commands/load_works_from_openalex.py | 25 ++++++++- ...alter_authorinstitution_unique_together.py | 17 ++++++ .../migrations/0112_alter_author_orcid_id.py | 20 +++++++ src/user/related_models/author_institution.py | 3 -- src/user/related_models/author_model.py | 2 +- src/utils/openalex.py | 6 +-- 9 files changed, 130 insertions(+), 15 deletions(-) create mode 100644 src/reputation/migrations/0084_alter_distribution_distribution_type.py create mode 100644 src/user/migrations/0111_alter_authorinstitution_unique_together.py create mode 100644 src/user/migrations/0112_alter_author_orcid_id.py diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index bc50c8941..95dc88fa9 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -156,7 +156,9 @@ def process_openalex_works(works): # Upsert concepts and associate to papers for paper_id, paper_data in paper_to_openalex_data.items(): + print("processing work: " + paper_data["openalex_work"]["id"]) work = paper_data["openalex_work"] + create_paper_related_tags( paper_id, paper_data["openalex_concepts"], paper_data["openalex_topics"] ) diff --git a/src/paper/paper_upload_tasks.py b/src/paper/paper_upload_tasks.py index 6e4450a8b..5f97d3b78 100644 --- a/src/paper/paper_upload_tasks.py +++ b/src/paper/paper_upload_tasks.py @@ -707,13 +707,17 @@ def create_paper_related_tags(paper_id, openalex_concepts=[], openalex_topics=[] with transaction.atomic(): biorxiv_hub_id = 436 - try: - paper.hubs.add(biorxiv_hub_id) - paper.unified_document.hubs.add(biorxiv_hub_id) - except Exception as e: - sentry.log_error( - e, message=f"Failed to add paper to biorXiv hub: {paper.id}" - ) + if Hub.objects.filter(id=biorxiv_hub_id).exists(): + try: + paper.hubs.add(biorxiv_hub_id) + paper.unified_document.hubs.add(biorxiv_hub_id) + except Exception as e: + sentry.log_error( + e, message=f"Failed to add paper to biorXiv hub: {paper.id}" + ) + else: + # bioRxiv hub does not exist. This must be a staging / dev environment + pass @app.task(queue=QUEUE_PAPER_METADATA) diff --git a/src/reputation/migrations/0084_alter_distribution_distribution_type.py b/src/reputation/migrations/0084_alter_distribution_distribution_type.py new file mode 100644 index 000000000..5081855e1 --- /dev/null +++ b/src/reputation/migrations/0084_alter_distribution_distribution_type.py @@ -0,0 +1,52 @@ +# Generated by Django 4.1 on 2024-05-21 00:05 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("reputation", "0083_delete_rewardmodel"), + ] + + operations = [ + migrations.AlterField( + model_name="distribution", + name="distribution_type", + field=models.CharField( + choices=[ + ("FLAG_PAPER", "FLAG_PAPER"), + ("PAPER_UPVOTED", "PAPER_UPVOTED"), + ("PAPER_Downvoted", "PAPER_Downvoted"), + ("CREATE_BULLET_POINT", "CREATE_BULLET_POINT"), + ("BULLET_POINT_FLAGGED", "BULLET_POINT_FLAGGED"), + ("BULLET_POINT_UPVOTED", "BULLET_POINT_UPVOTED"), + ("BULLET_POINT_DOWNVOTED", "BULLET_POINT_DOWNVOTED"), + ("COMMENT_CENSORED", "COMMENT_CENSORED"), + ("COMMENT_FLAGGED", "COMMENT_FLAGGED"), + ("COMMENT_UPVOTED", "COMMENT_UPVOTED"), + ("COMMENT_DOWNVOTED", "COMMENT_DOWNVOTED"), + ("REPLY_CENSORED", "REPLY_CENSORED"), + ("REPLY_FLAGGED", "REPLY_FLAGGED"), + ("REPLY_UPVOTED", "REPLY_UPVOTED"), + ("REPLY_DOWNVOTED", "REPLY_DOWNVOTED"), + ("THREAD_CENSORED", "THREAD_CENSORED"), + ("THREAD_FLAGGED", "THREAD_FLAGGED"), + ("THREAD_UPVOTED", "THREAD_UPVOTED"), + ("THREAD_DOWNVOTED", "THREAD_DOWNVOTED"), + ("CREATE_SUMMARY", "CREATE_SUMMARY"), + ("SUMMARY_UPVOTED", "SUMMARY_UPVOTED"), + ("SUMMARY_DOWNVOTED", "SUMMARY_DOWNVOTED"), + ("HYPOTHESIS_UPVOTED", "HYPOTHESIS_UPVOTED"), + ("HYPOTHESIS_DOWNVOTED", "HYPOTHESIS_DOWNVOTED"), + ("UPVOTE_RSC_POT", "UPVOTE_RSC_POT"), + ("STORED_PAPER_POT", "STORED_PAPER_POT"), + ("REWARD", "REWARD"), + ("PURCHASE", "PURCHASE"), + ("EDITOR_COMPENSATION", "EDITOR_COMPENSATION"), + ("EDITOR_PAYOUT", "EDITOR_PAYOUT"), + ], + max_length=255, + ), + ), + ] diff --git a/src/researchhub_document/management/commands/load_works_from_openalex.py b/src/researchhub_document/management/commands/load_works_from_openalex.py index 863a95c9e..37c7555f4 100644 --- a/src/researchhub_document/management/commands/load_works_from_openalex.py +++ b/src/researchhub_document/management/commands/load_works_from_openalex.py @@ -46,6 +46,12 @@ def add_arguments(self, parser): type=str, help="The paper respository source to pull from", ) + parser.add_argument( + "--openalex_id", + default=None, + type=str, + help="The OpenAlex ID to pull", + ) parser.add_argument( "--mode", default="backfill", @@ -56,6 +62,7 @@ def add_arguments(self, parser): def handle(self, *args, **kwargs): start_id = kwargs["start_id"] to_id = kwargs["to_id"] + openalex_id = kwargs["openalex_id"] mode = kwargs["mode"] source = kwargs["source"] batch_size = 100 @@ -90,11 +97,27 @@ def handle(self, *args, **kwargs): cursor = "*" page = 1 + openalex_ids = None + openalex_types = None + if openalex_id: + print("Fetching single work with id: " + openalex_id) + openalex_ids = [openalex_id] + else: + openalex_types = [ + "article", + "preprint", + "review", + ] + while cursor: print("Processing page " + str(page)) works, cursor = OA.get_works( - source_id=source, type="article", next_cursor=cursor + source_id=source, + types=openalex_types, + next_cursor=cursor, + openalex_ids=openalex_ids, ) + process_openalex_works(works) page += 1 return diff --git a/src/user/migrations/0111_alter_authorinstitution_unique_together.py b/src/user/migrations/0111_alter_authorinstitution_unique_together.py new file mode 100644 index 000000000..4f32e67d5 --- /dev/null +++ b/src/user/migrations/0111_alter_authorinstitution_unique_together.py @@ -0,0 +1,17 @@ +# Generated by Django 4.1 on 2024-05-21 19:45 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0110_alter_coauthor_author"), + ] + + operations = [ + migrations.AlterUniqueTogether( + name="authorinstitution", + unique_together=set(), + ), + ] diff --git a/src/user/migrations/0112_alter_author_orcid_id.py b/src/user/migrations/0112_alter_author_orcid_id.py new file mode 100644 index 000000000..f4b40c168 --- /dev/null +++ b/src/user/migrations/0112_alter_author_orcid_id.py @@ -0,0 +1,20 @@ +# Generated by Django 4.1 on 2024-05-21 20:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0111_alter_authorinstitution_unique_together"), + ] + + operations = [ + migrations.AlterField( + model_name="author", + name="orcid_id", + field=models.CharField( + blank=True, default=None, max_length=1024, null=True + ), + ), + ] diff --git a/src/user/related_models/author_institution.py b/src/user/related_models/author_institution.py index 85202aef1..c9e012f97 100644 --- a/src/user/related_models/author_institution.py +++ b/src/user/related_models/author_institution.py @@ -20,6 +20,3 @@ class AuthorInstitution(DefaultModel): years = ArrayField(models.IntegerField(), blank=True, default=list) is_primary = models.BooleanField(default=False) - - class Meta: - unique_together = ["author", "institution"] diff --git a/src/user/related_models/author_model.py b/src/user/related_models/author_model.py index 75a1a122e..1d7ea3d13 100644 --- a/src/user/related_models/author_model.py +++ b/src/user/related_models/author_model.py @@ -46,7 +46,7 @@ class Author(models.Model): University, on_delete=models.SET_NULL, null=True, blank=True ) orcid_id = models.CharField( - max_length=1024, default=None, null=True, blank=True, unique=True + max_length=1024, default=None, null=True, blank=True, unique=False ) openalex_ids = ArrayField( models.CharField( diff --git a/src/utils/openalex.py b/src/utils/openalex.py index 4e7fb1bcd..c3870a2ef 100644 --- a/src/utils/openalex.py +++ b/src/utils/openalex.py @@ -337,7 +337,7 @@ def get_authors( def get_works( self, since_date=None, - type=None, + types=None, next_cursor="*", batch_size=100, openalex_ids=None, @@ -345,8 +345,8 @@ def get_works( ): # Build the filter oa_filters = [] - if type: - oa_filters.append(f"type:{type}") + if isinstance(types, list): + oa_filters.append(f"type:{'|'.join(types)}") if source_id: oa_filters.append(f"primary_location.source.id:{source_id}") From 9ecbc70a0d5d5de4423c35e9b4cc73d8f148e07b Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Wed, 22 May 2024 05:58:48 -0400 Subject: [PATCH 17/32] Removing accidentally comitted rep migration --- ...84_alter_distribution_distribution_type.py | 52 ------------------- 1 file changed, 52 deletions(-) delete mode 100644 src/reputation/migrations/0084_alter_distribution_distribution_type.py diff --git a/src/reputation/migrations/0084_alter_distribution_distribution_type.py b/src/reputation/migrations/0084_alter_distribution_distribution_type.py deleted file mode 100644 index 5081855e1..000000000 --- a/src/reputation/migrations/0084_alter_distribution_distribution_type.py +++ /dev/null @@ -1,52 +0,0 @@ -# Generated by Django 4.1 on 2024-05-21 00:05 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("reputation", "0083_delete_rewardmodel"), - ] - - operations = [ - migrations.AlterField( - model_name="distribution", - name="distribution_type", - field=models.CharField( - choices=[ - ("FLAG_PAPER", "FLAG_PAPER"), - ("PAPER_UPVOTED", "PAPER_UPVOTED"), - ("PAPER_Downvoted", "PAPER_Downvoted"), - ("CREATE_BULLET_POINT", "CREATE_BULLET_POINT"), - ("BULLET_POINT_FLAGGED", "BULLET_POINT_FLAGGED"), - ("BULLET_POINT_UPVOTED", "BULLET_POINT_UPVOTED"), - ("BULLET_POINT_DOWNVOTED", "BULLET_POINT_DOWNVOTED"), - ("COMMENT_CENSORED", "COMMENT_CENSORED"), - ("COMMENT_FLAGGED", "COMMENT_FLAGGED"), - ("COMMENT_UPVOTED", "COMMENT_UPVOTED"), - ("COMMENT_DOWNVOTED", "COMMENT_DOWNVOTED"), - ("REPLY_CENSORED", "REPLY_CENSORED"), - ("REPLY_FLAGGED", "REPLY_FLAGGED"), - ("REPLY_UPVOTED", "REPLY_UPVOTED"), - ("REPLY_DOWNVOTED", "REPLY_DOWNVOTED"), - ("THREAD_CENSORED", "THREAD_CENSORED"), - ("THREAD_FLAGGED", "THREAD_FLAGGED"), - ("THREAD_UPVOTED", "THREAD_UPVOTED"), - ("THREAD_DOWNVOTED", "THREAD_DOWNVOTED"), - ("CREATE_SUMMARY", "CREATE_SUMMARY"), - ("SUMMARY_UPVOTED", "SUMMARY_UPVOTED"), - ("SUMMARY_DOWNVOTED", "SUMMARY_DOWNVOTED"), - ("HYPOTHESIS_UPVOTED", "HYPOTHESIS_UPVOTED"), - ("HYPOTHESIS_DOWNVOTED", "HYPOTHESIS_DOWNVOTED"), - ("UPVOTE_RSC_POT", "UPVOTE_RSC_POT"), - ("STORED_PAPER_POT", "STORED_PAPER_POT"), - ("REWARD", "REWARD"), - ("PURCHASE", "PURCHASE"), - ("EDITOR_COMPENSATION", "EDITOR_COMPENSATION"), - ("EDITOR_PAYOUT", "EDITOR_PAYOUT"), - ], - max_length=255, - ), - ), - ] From 44a7fc34231e1930af19587861241400168d92fc Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Wed, 22 May 2024 10:27:45 -0400 Subject: [PATCH 18/32] [Author] Adding contribution summary --- src/paper/openalex_util.py | 23 ++++++++ .../0113_authorcontributionsummary.py | 52 +++++++++++++++++++ .../author_contribution_summary_model.py | 22 ++++++++ src/user/related_models/author_model.py | 3 ++ 4 files changed, 100 insertions(+) create mode 100644 src/user/migrations/0113_authorcontributionsummary.py create mode 100644 src/user/related_models/author_contribution_summary_model.py diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index 95dc88fa9..01534b3c9 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -6,6 +6,9 @@ from django.db.models import Q import utils.sentry as sentry +from user.related_models.author_contribution_summary_model import ( + AuthorContributionSummary, +) from user.related_models.author_institution import AuthorInstitution from user.related_models.coauthor_model import CoAuthor from utils.openalex import OpenAlex @@ -248,6 +251,7 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): ) continue + # Set misc author metadata author.i10_index = oa_author.get("summary_stats", {}).get("i10_index") author.h_index = oa_author.get("summary_stats", {}).get("h_index") author.two_year_mean_citedness = oa_author.get("summary_stats", {}).get( @@ -256,6 +260,25 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): author.orcid_id = oa_author.get("orcid") author.save() + # Set author contribution/citation activity + activity_by_year = oa_author.get("counts_by_year", []) + for activity in activity_by_year: + try: + AuthorContributionSummary.objects.update_or_create( + source=AuthorContributionSummary.SOURCE_OPENALEX, + author=author, + year=activity.get("year"), + defaults={ + "works_count": activity.get("works_count", None), + "citation_count": activity.get("cited_by_count", None), + }, + ) + except Exception as e: + sentry.log_error( + e, + message=f"Failed to upsert author contribution summary for author: {author.id}", + ) + # Load all the institutions author is associated with affiliations = oa_author.get("affiliations", []) for affiliation in affiliations: diff --git a/src/user/migrations/0113_authorcontributionsummary.py b/src/user/migrations/0113_authorcontributionsummary.py new file mode 100644 index 000000000..3e1e7764a --- /dev/null +++ b/src/user/migrations/0113_authorcontributionsummary.py @@ -0,0 +1,52 @@ +# Generated by Django 4.1 on 2024-05-22 14:14 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0112_alter_author_orcid_id"), + ] + + operations = [ + migrations.CreateModel( + name="AuthorContributionSummary", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "source", + models.CharField( + choices=[ + ("OPENALEX", "OpenAlex"), + ("RESEARCHHUB", "ResearchHub"), + ], + max_length=20, + ), + ), + ("works_count", models.IntegerField(blank=True, null=True)), + ("citation_count", models.IntegerField(blank=True, null=True)), + ("year", models.IntegerField()), + ( + "author", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="contribution_summaries", + to="user.author", + ), + ), + ], + options={ + "unique_together": {("source", "author", "year")}, + }, + ), + ] diff --git a/src/user/related_models/author_contribution_summary_model.py b/src/user/related_models/author_contribution_summary_model.py new file mode 100644 index 000000000..208fd7cd1 --- /dev/null +++ b/src/user/related_models/author_contribution_summary_model.py @@ -0,0 +1,22 @@ +from django.db import models + + +class AuthorContributionSummary(models.Model): + SOURCE_OPENALEX = "OPENALEX" + SOURCE_RESEARCHHUB = "RESEARCHHUB" + SOURCE_CHOICES = [ + (SOURCE_OPENALEX, "OpenAlex"), + (SOURCE_RESEARCHHUB, "ResearchHub"), + ] + source = models.CharField( + max_length=20, null=False, blank=False, choices=SOURCE_CHOICES + ) + author = models.ForeignKey( + "user.Author", on_delete=models.CASCADE, related_name="contribution_summaries" + ) + works_count = models.IntegerField(null=True, blank=True) + citation_count = models.IntegerField(null=True, blank=True) + year = models.IntegerField(null=False, blank=False) + + class Meta: + unique_together = ("source", "author", "year") diff --git a/src/user/related_models/author_model.py b/src/user/related_models/author_model.py index 1d7ea3d13..9bd1d0bb9 100644 --- a/src/user/related_models/author_model.py +++ b/src/user/related_models/author_model.py @@ -9,6 +9,9 @@ from paper.utils import PAPER_SCORE_Q_ANNOTATION from purchase.related_models.purchase_model import Purchase from researchhub_case.constants.case_constants import APPROVED +from user.related_models.author_contribution_summary_model import ( + AuthorContributionSummary, +) from user.related_models.author_institution import AuthorInstitution from user.related_models.coauthor_model import CoAuthor from user.related_models.profile_image_storage import ProfileImageStorage From 1daee7cda266384eeae91e0dd5fad752c81804a1 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Wed, 22 May 2024 10:30:43 -0400 Subject: [PATCH 19/32] str() to safeguard from type error --- src/paper/openalex_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index 01534b3c9..d980e2146 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -276,7 +276,7 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): except Exception as e: sentry.log_error( e, - message=f"Failed to upsert author contribution summary for author: {author.id}", + message=f"Failed to upsert author contribution summary for author: {str(author.id)}", ) # Load all the institutions author is associated with From 96a70de15cb71d494b0270b1d051e932dcd9db5e Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Wed, 22 May 2024 16:17:12 -0400 Subject: [PATCH 20/32] [Author] Adding test to ensure creation of contribution summary --- src/paper/tests/test_process_openalex_works.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/paper/tests/test_process_openalex_works.py b/src/paper/tests/test_process_openalex_works.py index c136abe65..baaf943a8 100644 --- a/src/paper/tests/test_process_openalex_works.py +++ b/src/paper/tests/test_process_openalex_works.py @@ -5,6 +5,9 @@ from paper.models import Paper from paper.openalex_util import process_openalex_works +from user.related_models.author_contribution_summary_model import ( + AuthorContributionSummary, +) from user.related_models.author_model import Author from utils.openalex import OpenAlex @@ -195,3 +198,16 @@ def test_create_coauthor_relationship(self, mock_get_authors): ).first() self.assertGreater(author.coauthors.count(), 0) + + @patch.object(OpenAlex, "get_authors") + def test_create_contribution_activity(self, mock_get_authors): + with open("./paper/tests/openalex_authors.json", "r") as file: + mock_data = json.load(file) + mock_get_authors.return_value = (mock_data["results"], None) + + process_openalex_works(self.works) + author = Author.objects.filter( + openalex_ids__contains=[mock_data["results"][0]["id"]] + ).first() + + self.assertGreater(len(author.contribution_summaries.all()), 0) From 404fad28ba0d987a1f333d63911ef7fbf979526c Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Wed, 22 May 2024 16:37:15 -0400 Subject: [PATCH 21/32] [Author] Removing is_primary from Author Institution as it may not be totally necessary --- .../0113_remove_authorinstitution_is_primary.py | 17 +++++++++++++++++ src/user/related_models/author_institution.py | 2 -- 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 src/user/migrations/0113_remove_authorinstitution_is_primary.py diff --git a/src/user/migrations/0113_remove_authorinstitution_is_primary.py b/src/user/migrations/0113_remove_authorinstitution_is_primary.py new file mode 100644 index 000000000..291be3951 --- /dev/null +++ b/src/user/migrations/0113_remove_authorinstitution_is_primary.py @@ -0,0 +1,17 @@ +# Generated by Django 4.1 on 2024-05-22 20:35 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0112_alter_author_orcid_id"), + ] + + operations = [ + migrations.RemoveField( + model_name="authorinstitution", + name="is_primary", + ), + ] diff --git a/src/user/related_models/author_institution.py b/src/user/related_models/author_institution.py index c9e012f97..d0edbea52 100644 --- a/src/user/related_models/author_institution.py +++ b/src/user/related_models/author_institution.py @@ -18,5 +18,3 @@ class AuthorInstitution(DefaultModel): ) years = ArrayField(models.IntegerField(), blank=True, default=list) - - is_primary = models.BooleanField(default=False) From fb937448e824d285133c1a6b92178c09f803eb66 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Tue, 21 May 2024 21:41:11 -0400 Subject: [PATCH 22/32] [Profile] Creating an endpoint for fetching profile data --- src/institution/serializers.py | 12 + src/researchhub/urls.py | 4 +- src/user/serializers.py | 67 +++ src/user/views/__init__.py | 1 + src/user/views/author_views.py | 976 +++++++++++++++++++++++++++++++++ src/user/views/user_views.py | 831 ---------------------------- 6 files changed, 1058 insertions(+), 833 deletions(-) create mode 100644 src/institution/serializers.py create mode 100644 src/user/views/author_views.py diff --git a/src/institution/serializers.py b/src/institution/serializers.py new file mode 100644 index 000000000..940352575 --- /dev/null +++ b/src/institution/serializers.py @@ -0,0 +1,12 @@ +from rest_framework.serializers import SerializerMethodField + +from institution.models import Institution +from researchhub.serializers import DynamicModelFieldSerializer + + +class DynamicInstitutionSerializer(DynamicModelFieldSerializer): + institutions = SerializerMethodField() + + class Meta: + model = Institution + fields = "__all__" diff --git a/src/researchhub/urls.py b/src/researchhub/urls.py index de285bb9e..a2130583a 100644 --- a/src/researchhub/urls.py +++ b/src/researchhub/urls.py @@ -45,7 +45,7 @@ from researchhub.settings import INSTALLED_APPS, USE_DEBUG_TOOLBAR from researchhub_comment.views.rh_comment_view import RhCommentViewSet from review.views.review_view import ReviewViewSet -from user.views import editor_views +from user.views import author_views, editor_views router = routers.DefaultRouter() @@ -81,7 +81,7 @@ basename="paper_submission", ) -router.register(r"author", user.views.AuthorViewSet, basename="author") +router.register(r"author", author_views.AuthorViewSet, basename="author") router.register(r"hub", hub.views.HubViewSet, basename="hub") diff --git a/src/user/serializers.py b/src/user/serializers.py index 6e1ce3c31..077e9e1ad 100644 --- a/src/user/serializers.py +++ b/src/user/serializers.py @@ -19,6 +19,7 @@ from hub.models import Hub from hub.serializers import DynamicHubSerializer, HubSerializer, SimpleHubSerializer from hypothesis.models import Hypothesis +from institution.serializers import DynamicInstitutionSerializer from paper.models import Paper, PaperSubmission from purchase.models import Purchase from reputation.models import Bounty, Contribution, Withdrawal @@ -39,6 +40,8 @@ UserApiToken, Verdict, ) +from user.related_models.author_institution import AuthorInstitution +from user.related_models.coauthor_model import CoAuthor from user.related_models.gatekeeper_model import Gatekeeper from utils import sentry @@ -1001,3 +1004,67 @@ def get_flag(self, verdict): def get_flagged_content_name(self, verdict): return verdict.flag.content_type.name + + +class DynamicAuthorInstitutionSerializer(DynamicModelFieldSerializer): + institution = SerializerMethodField() + + class Meta: + model = AuthorInstitution + fields = "__all__" + + def get_institution(self, author_institution): + context = self.context + _context_fields = context.get("author_institution::get_institution", {}) + + institution = author_institution.institution + serializer = DynamicInstitutionSerializer( + institution, context=context, **_context_fields + ) + return serializer.data + + +class DynamicCoAuthorSerializer(DynamicModelFieldSerializer): + coauthor = SerializerMethodField() + + class Meta: + model = CoAuthor + fields = "__all__" + + def get_coauthor(self, coauthor): + context = self.context + _context_fields = context.get("coauthor::get_coauthor", {}) + + serializer = DynamicAuthorSerializer( + coauthor.coauthor, context=context, **_context_fields + ) + return serializer.data + + +class DynamicAuthorProfileSerializer(DynamicModelFieldSerializer): + institutions = SerializerMethodField() + coauthors = SerializerMethodField() + + class Meta: + model = Author + fields = "__all__" + + def get_institutions(self, author): + context = self.context + _context_fields = context.get("author_profile::get_institutions", {}) + + serializer = DynamicAuthorInstitutionSerializer( + author.institutions, context=context, many=True, **_context_fields + ) + return serializer.data + + def get_coauthors(self, author): + context = self.context + _context_fields = context.get("author_profile::get_coauthors", {}) + serializer = DynamicAuthorSerializer( + [co.coauthor for co in author.coauthors.all()], + context=context, + many=True, + **_context_fields, + ) + return serializer.data diff --git a/src/user/views/__init__.py b/src/user/views/__init__.py index ae3412fb9..67d0b76e0 100644 --- a/src/user/views/__init__.py +++ b/src/user/views/__init__.py @@ -1,5 +1,6 @@ # flake8: noqa from user.views.audit_views import AuditViewSet +from user.views.author_views import AuthorViewSet from user.views.contribution_views import ContributionViewSet from user.views.editor_views import * from user.views.gatekeeper_view import GatekeeperViewSet diff --git a/src/user/views/author_views.py b/src/user/views/author_views.py new file mode 100644 index 000000000..5ab9521b5 --- /dev/null +++ b/src/user/views/author_views.py @@ -0,0 +1,976 @@ +import hmac +from datetime import datetime, timedelta +from hashlib import sha1 + +from allauth.account.models import EmailAddress +from django.contrib.contenttypes.models import ContentType +from django.core.cache import cache +from django.db import IntegrityError, models, transaction +from django.db.models import Exists, F, OuterRef, Q, Sum +from django.db.models.functions import Coalesce +from django.shortcuts import get_object_or_404 +from django.utils import timezone +from django.utils.decorators import method_decorator +from django.views.decorators.cache import cache_page +from django_filters.rest_framework import DjangoFilterBackend +from requests.exceptions import HTTPError +from rest_framework import status, viewsets +from rest_framework.decorators import action, api_view, permission_classes +from rest_framework.filters import OrderingFilter, SearchFilter +from rest_framework.permissions import ( + AllowAny, + IsAuthenticated, + IsAuthenticatedOrReadOnly, +) +from rest_framework.response import Response +from rest_framework.utils.urls import replace_query_param + +from discussion.models import Comment, Reply, Thread +from discussion.serializers import DynamicThreadSerializer +from hypothesis.related_models.hypothesis import Hypothesis +from paper.models import Paper +from paper.serializers import DynamicPaperSerializer +from paper.tasks import pull_openalex_author_works +from paper.utils import PAPER_SCORE_Q_ANNOTATION, get_cache_key +from paper.views import PaperViewSet +from reputation.models import Bounty, BountySolution, Contribution, Distribution +from reputation.serializers import ( + DynamicBountySerializer, + DynamicBountySolutionSerializer, + DynamicContributionSerializer, +) +from reputation.views import BountyViewSet +from researchhub.settings import ( + EMAIL_WHITELIST, + SIFT_MODERATION_WHITELIST, + SIFT_WEBHOOK_SECRET_KEY, +) +from researchhub_comment.models import RhCommentModel +from researchhub_document.related_models.researchhub_post_model import ResearchhubPost +from researchhub_document.serializers import DynamicPostSerializer +from review.models.review_model import Review +from user.filters import AuthorFilter, UserFilter +from user.models import Author, Follow, Major, University, User, UserApiToken +from user.permissions import ( + Censor, + DeleteAuthorPermission, + DeleteUserPermission, + HasVerificationPermission, + RequestorIsOwnUser, + UpdateAuthor, +) +from user.serializers import ( + AuthorEditableSerializer, + AuthorSerializer, + DynamicAuthorProfileSerializer, + DynamicUserSerializer, + MajorSerializer, + UniversitySerializer, + UserActions, + UserEditableSerializer, + UserSerializer, +) +from user.tasks import handle_spam_user_task, reinstate_user_task +from user.utils import calculate_show_referral, reset_latest_acitvity_cache +from utils.http import POST, RequestMethods +from utils.openalex import OpenAlex +from utils.permissions import CreateOrUpdateIfAllowed +from utils.sentry import log_error, log_info +from utils.throttles import THROTTLE_CLASSES + + +class AuthorViewSet(viewsets.ModelViewSet): + queryset = Author.objects.all() + serializer_class = AuthorSerializer + filter_backends = (SearchFilter, DjangoFilterBackend, OrderingFilter) + filter_class = AuthorFilter + search_fields = ("first_name", "last_name") + permission_classes = [ + (IsAuthenticatedOrReadOnly & UpdateAuthor & CreateOrUpdateIfAllowed) + | DeleteAuthorPermission + ] + throttle_classes = THROTTLE_CLASSES + + def create(self, request, *args, **kwargs): + """Override to use an editable serializer.""" + serializer = AuthorEditableSerializer(data=request.data) + serializer.is_valid(raise_exception=True) + self.perform_create(serializer) + headers = self.get_success_headers(serializer.data) + return Response( + serializer.data, status=status.HTTP_201_CREATED, headers=headers + ) + + def update(self, request, *args, **kwargs): + """Override to use an editable serializer.""" + partial = kwargs.pop("partial", False) + instance = self.get_object() + serializer = AuthorEditableSerializer( + instance, data=request.data, partial=partial + ) + serializer.is_valid(raise_exception=True) + self.perform_update(serializer) + + if getattr(instance, "_prefetched_objects_cache", None): + # If 'prefetch_related' has been applied to a queryset, we need to + # forcibly invalidate the prefetch cache on the instance. + instance._prefetched_objects_cache = {} + + return Response(serializer.data) + + @action(detail=True, methods=["get"], permission_classes=[AllowAny]) + def profile(self, request, pk=None): + author = self.get_object() + serializer = DynamicAuthorProfileSerializer( + author, + context={ + "author_institution::get_institution": { + "_include_fields": [ + "id", + "display_name", + "region", + "city", + "latitude", + "longitude", + "image_url", + "image_thumbnail_url", + ] + }, + "author_profile::get_institutions": { + "_include_fields": [ + "id", + "years", + "is_primary", + "institution", + ] + }, + "author_profile::get_coauthors": { + "_include_fields": [ + "id", + "first_name", + "last_name", + ] + }, + "coauthor::get_coauthor": { + "_include_fields": [ + "id", + "first_name", + "last_name", + "profile_image", + "paper", + ] + }, + }, + _include_fields=( + "id", + "openalex_ids", + "first_name", + "last_name", + "description", + "headline", + "profile_image", + "orcid_id", + "h_index", + "i10_index", + "google_scholar", + "linkedin", + "twitter", + "two_year_mean_citedness", + "institutions", + "created_date", + "country_code", + "coauthors", + ), + ) + return Response(serializer.data, status=200) + + @action( + detail=True, + methods=["get"], + ) + def get_authored_papers(self, request, pk=None): + author = self.get_object() + prefetch_lookups = PaperViewSet.prefetch_lookups(self) + authored_papers = ( + author.authored_papers.filter(is_removed=False) + .prefetch_related( + *prefetch_lookups, + ) + .annotate(paper_score=PAPER_SCORE_Q_ANNOTATION) + .order_by("-paper_score") + ) + context = self._get_authored_papers_context() + page = self.paginate_queryset(authored_papers) + serializer = DynamicPaperSerializer( + page, + _include_fields=[ + "id", + "abstract", + "authors", + "boost_amount", + "file", + "first_preview", + "hubs", + "paper_title", + "score", + "title", + "uploaded_by", + "uploaded_date", + "url", + "paper_publish_date", + "slug", + "created_date", + ], + many=True, + context=context, + ) + response = self.get_paginated_response(serializer.data) + return response + + def _get_authored_papers_context(self): + context = { + "pap_dps_get_authors": { + "_include_fields": [ + "id", + "first_name", + "last_name", + "profile_image", + ] + }, + "pap_dps_get_uploaded_by": { + "_include_fields": [ + "id", + "author_profile", + ] + }, + "pap_dps_get_first_preview": { + "_include_fields": [ + "file", + ] + }, + "pap_dps_get_hubs": { + "_include_fields": ( + "id", + "slug", + "name", + ) + }, + "usr_dus_get_author_profile": { + "_include_fields": ["id", "first_name", "last_name", "profile_image"] + }, + "doc_duds_get_hubs": { + "_include_fields": [ + "id", + "name", + "slug", + "hub_image", + ] + }, + } + return context + + def _get_contribution_context(self, filter_by_user_id): + context = { + "request": self.request, + "_config": { + "filter_by_user_id": filter_by_user_id, + }, + "doc_dps_get_created_by": { + "_include_fields": [ + "id", + "author_profile", + ] + }, + "doc_duds_get_created_by": { + "_include_fields": [ + "id", + "author_profile", + ] + }, + "doc_dps_get_hubs": { + "_include_fields": [ + "name", + "slug", + ] + }, + "pap_dps_get_uploaded_by": { + "_include_fields": [ + "id", + "author_profile", + ] + }, + "dis_dts_get_created_by": { + "_include_fields": [ + "id", + "author_profile", + ] + }, + "dis_dts_get_review": { + "_include_fields": [ + "id", + "score", + ] + }, + "dis_dcs_get_created_by": { + "_include_fields": [ + "author_profile", + "id", + ] + }, + "dis_drs_get_created_by": { + "_include_fields": [ + "author_profile", + "id", + ] + }, + "pap_dps_get_user_vote": { + "_include_fields": [ + "id", + "created_by", + "created_date", + "vote_type", + ] + }, + "pap_dps_get_hubs": { + "_include_fields": [ + "name", + "slug", + ] + }, + "pap_dpvs_paper": {"_exclude_fields": "__all__"}, + "doc_dps_get_user_vote": { + "_include_fields": [ + "id", + "content_type", + "created_by", + "created_date", + "vote_type", + "item", + ] + }, + "dis_drs_get_user_vote": { + "_include_fields": [ + "id", + "content_type", + "created_by", + "created_date", + "vote_type", + "item", + ] + }, + "dis_dcs_get_user_vote": { + "_include_fields": [ + "id", + "content_type", + "created_by", + "created_date", + "vote_type", + "item", + ] + }, + "dis_dts_get_user_vote": { + "_include_fields": [ + "id", + "content_type", + "created_by", + "created_date", + "vote_type", + "item", + ] + }, + "dis_dts_get_comments": { + "_include_fields": [ + "created_by", + "created_date", + "updated_date", + "created_location", + "external_metadata", + "id", + "is_created_by_editor", + "is_public", + "is_removed", + "paper_id", + "parent", + "plain_text", + "promoted", + "replies", + "reply_count", + "score", + "source", + "text", + "thread_id", + "user_flag", + "user_vote", + "was_edited", + ] + }, + "dis_dcs_get_replies": { + "_include_fields": [ + "created_by", + "created_location", + "id", + "is_created_by_editor", + "is_public", + "is_removed", + "paper_id", + "parent", + "plain_text", + "promoted", + "score", + "text", + "thread_id", + "user_flag", + "user_vote", + "created_date", + "updated_date", + ] + }, + "doc_duds_get_documents": { + "_include_fields": [ + "promoted", + "abstract", + "aggregate_citation_consensus", + "created_by", + "created_date", + "hot_score", + "hubs", + "id", + "discussion_count", + "paper_title", + "preview_img", + "renderable_text", + "score", + "slug", + "title", + "uploaded_by", + "uploaded_date", + "user_vote", + ] + }, + "doc_duds_get_bounties": {"_include_fields": ["id"]}, + "doc_duds_get_document_filter": { + "_include_fields": [ + "answered", + "bounty_open", + "bounty_total_amount", + ] + }, + "rep_dcs_get_author": { + "_include_fields": [ + "id", + "first_name", + "last_name", + "profile_image", + ] + }, + "rep_dcs_get_unified_document": { + "_include_fields": [ + "id", + "document_type", + "documents", + "hubs", + "document_filter", + ] + }, + "rep_dcs_get_source": { + "_include_fields": [ + "amount", + "citation", + "comment_count", + "comment_content_json", + # 02-18-24 Kobe: Temporarily commenting this out as it leads to a fatal error. + # I'm not sure this key is necessary for the client. Only time will tell + # "children", + "content_type", + "created_by", + "created_date", + "created_location", + "discussion_type", + "document_meta", + "external_metadata", + "hypothesis", + "id", + "is_public", + "is_removed", + "paper_slug", + "paper_title", + "paper", + "plain_text", + "post_slug", + "post", + "promoted", + "replies", + "review", + "score", + "slug", + "source", + "text", + "title", + "user_flag", + "user_vote", + "vote", + "bet_amount", + ] + }, + "rep_dbs_get_item": { + "_include_fields": [ + "created_by", + "created_date", + "updated_date", + "created_location", + "external_metadata", + "id", + "is_created_by_editor", + "is_public", + "is_removed", + "paper_id", + "parent", + "plain_text", + "promoted", + "replies", + "reply_count", + "score", + "source", + "text", + "thread_id", + "paper", + "post", + "awarded_bounty_amount", + "unified_document", + "user_flag", + "user_vote", + "was_edited", + ] + }, + "rep_dbss_get_item": { + "_include_fields": [ + "created_by", + "created_date", + "updated_date", + "created_location", + "external_metadata", + "id", + "is_created_by_editor", + "is_public", + "is_removed", + "paper_id", + "parent", + "plain_text", + "promoted", + "replies", + "reply_count", + "score", + "source", + "text", + "awarded_bounty_amount", + "thread_id", + "user_flag", + "user_vote", + "was_edited", + ] + }, + "rep_dbs_get_created_by": {"_include_fields": ["author_profile", "id"]}, + "dis_dts_get_bounties": { + "_include_fields": [ + "amount", + "created_by", + ] + }, + "dis_dts_get_paper": { + "_include_fields": [ + "id", + "slug", + ] + }, + "dis_dts_get_post": { + "_include_fields": [ + "id", + "slug", + ] + }, + "doc_duds_get_hubs": { + "_include_fields": [ + "name", + "slug", + ] + }, + "hyp_dhs_get_hubs": { + "_include_fields": [ + "name", + "slug", + ] + }, + "hyp_dhs_get_created_by": { + "_include_fields": [ + "author_profile", + "id", + ] + }, + "rhc_dcs_get_created_by": { + "_include_fields": [ + "first_name", + "last_name", + "author_profile", + ] + }, + "rhc_dcs_get_children": { + "_exclude_fields": [ + "thread", + "comment_content_src", + "promoted", + "user_endorsement", + "user_flag", + "purchases", + ] + }, + "rhc_dcs_get_purchases": { + "_include_fields": [ + "amount", + "user", + ] + }, + "rev_drs_get_created_by": { + "_include_fields": [ + "author_profile", + "id", + ] + }, + "pch_dps_get_user": { + "_include_fields": [ + "author_profile", + "id", + ] + }, + } + return context + + @action( + detail=True, + methods=["get"], + ) + def contributions(self, request, pk=None): + author = self.get_object() + + query_params = request.query_params + ordering = query_params.get("ordering", "-created_date") + asset_type = query_params.get("type", "overview") + contributions = self._get_author_contribution_queryset( + author.id, ordering, asset_type + ) + + page = self.paginate_queryset(contributions) + context = self._get_contribution_context(author.user_id) + serializer = DynamicContributionSerializer( + page, + _include_fields=[ + "contribution_type", + "created_date", + "id", + "source", + "created_by", + "unified_document", + "author", + ], + context=context, + many=True, + ) + data = serializer.data + response = self.get_paginated_response(data) + if asset_type == "bounty_offered": + total_bounty_amount = contributions.aggregate( + total_amount=Sum("bounty__amount") + ) + response.data["total_bounty_amount"] = total_bounty_amount.get( + "total_amount", 0 + ) + + return response + + def _get_author_comments(self, author_id): + author = self.get_object() + user = author.user + + if user: + user_threads = RhCommentModel.objects.filter(Q(created_by=user)) + return user_threads + return [] + + def _get_author_contribution_queryset(self, author_id, ordering, asset_type): + author_comments = self._get_author_comments(author_id) + rh_comment_content_type = ContentType.objects.get_for_model(RhCommentModel) + post_content_type = ContentType.objects.get_for_model(ResearchhubPost) + paper_content_type = ContentType.objects.get_for_model(Paper) + hypothesis_content_type = ContentType.objects.get_for_model(Hypothesis) + review_content_type = ContentType.objects.get_for_model(Review) + bounty_content_type = ContentType.objects.get_for_model(Bounty) + bounty_solution_content_type = ContentType.objects.get_for_model(BountySolution) + + types = asset_type.split(",") + + query = Q() + for asset_type in types: + if asset_type == "overview": + query |= Q( + Q( + unified_document__is_removed=False, + content_type=rh_comment_content_type, + # we filter by object_id instead of author_profile because + # sometimes there's contributions without a matching comment. + # this method ensures the comments exists. + object_id__in=author_comments, + contribution_type__in=[ + Contribution.COMMENTER, + ], + ) + | Q( + unified_document__is_removed=False, + user__author_profile=author_id, + content_type_id__in=[ + hypothesis_content_type, + paper_content_type, + post_content_type, + review_content_type, + ], + contribution_type__in=[ + Contribution.SUBMITTER, + Contribution.SUPPORTER, + ], + ) + ) + elif asset_type == "discussion": + query |= Q( + unified_document__is_removed=False, + user__author_profile=author_id, + content_type_id=post_content_type, + contribution_type__in=[Contribution.SUBMITTER], + ) + elif asset_type == "hypothesis": + query |= Q( + unified_document__is_removed=False, + user__author_profile=author_id, + content_type_id=hypothesis_content_type, + contribution_type__in=[Contribution.SUBMITTER], + ) + elif asset_type == "comment": + query |= Q( + unified_document__is_removed=False, + content_type=rh_comment_content_type, + # we filter by object_id instead of author_profile because + # sometimes there's contributions without a matching comment. + # this method ensures the comments exists. + object_id__in=author_comments, + contribution_type__in=[Contribution.COMMENTER], + ) + elif asset_type == "paper": + query |= Q( + unified_document__is_removed=False, + user__author_profile=author_id, + content_type_id=paper_content_type, + contribution_type__in=[Contribution.SUBMITTER], + ) + elif asset_type == "bounty_offered": + query |= Q( + unified_document__is_removed=False, + user__author_profile=author_id, + content_type_id=bounty_content_type, + contribution_type__in=[Contribution.BOUNTY_CREATED], + ) + elif asset_type == "bounty_earned": + query |= Q( + unified_document__is_removed=False, + user__author_profile=author_id, + content_type_id=bounty_solution_content_type, + contribution_type__in=[Contribution.BOUNTY_SOLUTION], + ) + else: + raise Exception("Unrecognized asset type: {}".format(asset_type)) + + qs = ( + Contribution.objects.filter(query) + .select_related( + "content_type", + "user", + "user__author_profile", + "unified_document", + ) + .order_by(ordering) + ) + + return qs + + @action( + detail=True, + methods=["get"], + ) + def get_user_discussions(self, request, pk=None): + author = self.get_object() + user = author.user + + if user: + user_discussions = user.thread_set.filter(is_removed=False).order_by("-id") + else: + user_discussions = self.queryset.none() + + page = self.paginate_queryset(user_discussions) + context = self._get_user_discussion_context() + serializer = DynamicThreadSerializer( + page, + _include_fields=[ + "id", + "comment_count", + "created_by", + "created_date", + "paper", + "post", + "score", + "text", + ], + many=True, + context=context, + ) + return self.get_paginated_response(serializer.data) + + def _get_user_discussion_context(self): + context = { + "dis_dts_get_created_by": { + "_include_fields": [ + "id", + "author_profile", + ] + }, + "dis_dts_get_paper": { + "_include_fields": [ + "id", + "slug", + ] + }, + "dis_dts_get_post": { + "_include_fields": [ + "id", + "slug", + ] + }, + "usr_dus_get_author_profile": { + "_include_fields": ["id", "first_name", "last_name", "profile_image"] + }, + } + return context + + @action( + detail=True, + methods=["get"], + ) + def get_user_contributions(self, request, pk=None): + author = self.get_object() + user = author.user + + if user: + prefetch_lookups = PaperViewSet.prefetch_lookups(self) + user_paper_uploads = user.papers.filter(is_removed=False).prefetch_related( + *prefetch_lookups + ) + else: + user_paper_uploads = self.queryset.none() + + context = self._get_user_contributions_context() + page = self.paginate_queryset(user_paper_uploads) + serializer = DynamicPaperSerializer( + page, + _include_fields=[ + "id", + "abstract", + "boost_amount", + "file", + "hubs", + "paper_title", + "score", + "title", + "slug", + "uploaded_by", + "uploaded_date", + ], + many=True, + context=context, + ) + response = self.get_paginated_response(serializer.data) + + return response + + def _get_user_contributions_context(self): + context = { + "pap_dps_get_uploaded_by": { + "_include_fields": [ + "id", + "author_profile", + ] + }, + "usr_dus_get_author_profile": { + "_include_fields": ["id", "first_name", "last_name", "profile_image"] + }, + "doc_duds_get_hubs": { + "_include_fields": [ + "id", + "name", + "slug", + "hub_image", + ] + }, + } + return context + + @action( + detail=True, + methods=["get"], + ) + def get_user_posts(self, request, pk=None): + author = self.get_object() + user = author.user + + if user: + user_posts = user.created_posts.all().prefetch_related( + "unified_document", "purchases" + ) + else: + user_posts = self.queryset.none() + + context = self._get_user_posts_context() + page = self.paginate_queryset(user_posts) + serializer = DynamicPostSerializer( + page, + _include_fields=[ + "id", + "created_by", + "hubs", + "boost_amount", + "renderable_text", + "score", + "slug", + "title", + ], + many=True, + context=context, + ) + response = self.get_paginated_response(serializer.data) + return response + + def _get_user_posts_context(self): + context = { + "doc_dps_get_created_by": { + "_include_fields": [ + "id", + "author_profile", + ] + }, + "usr_dus_get_author_profile": { + "_include_fields": ["id", "first_name", "last_name", "profile_image"] + }, + "doc_dps_get_hubs": { + "_include_fields": [ + "id", + "name", + "slug", + "hub_image", + ] + }, + } + return context diff --git a/src/user/views/user_views.py b/src/user/views/user_views.py index 75e533233..f0e15aa3d 100644 --- a/src/user/views/user_views.py +++ b/src/user/views/user_views.py @@ -914,834 +914,3 @@ class MajorViewSet(viewsets.ReadOnlyModelViewSet): filter_backends = (SearchFilter, DjangoFilterBackend, OrderingFilter) search_fields = ("major", "major_category") permission_classes = [AllowAny] - - -class AuthorViewSet(viewsets.ModelViewSet): - queryset = Author.objects.all() - serializer_class = AuthorSerializer - filter_backends = (SearchFilter, DjangoFilterBackend, OrderingFilter) - filter_class = AuthorFilter - search_fields = ("first_name", "last_name") - permission_classes = [ - (IsAuthenticatedOrReadOnly & UpdateAuthor & CreateOrUpdateIfAllowed) - | DeleteAuthorPermission - ] - throttle_classes = THROTTLE_CLASSES - - def create(self, request, *args, **kwargs): - """Override to use an editable serializer.""" - serializer = AuthorEditableSerializer(data=request.data) - serializer.is_valid(raise_exception=True) - self.perform_create(serializer) - headers = self.get_success_headers(serializer.data) - return Response( - serializer.data, status=status.HTTP_201_CREATED, headers=headers - ) - - def update(self, request, *args, **kwargs): - """Override to use an editable serializer.""" - partial = kwargs.pop("partial", False) - instance = self.get_object() - serializer = AuthorEditableSerializer( - instance, data=request.data, partial=partial - ) - serializer.is_valid(raise_exception=True) - self.perform_update(serializer) - - if getattr(instance, "_prefetched_objects_cache", None): - # If 'prefetch_related' has been applied to a queryset, we need to - # forcibly invalidate the prefetch cache on the instance. - instance._prefetched_objects_cache = {} - - return Response(serializer.data) - - @action( - detail=True, - methods=["get"], - ) - def get_authored_papers(self, request, pk=None): - author = self.get_object() - prefetch_lookups = PaperViewSet.prefetch_lookups(self) - authored_papers = ( - author.authored_papers.filter(is_removed=False) - .prefetch_related( - *prefetch_lookups, - ) - .annotate(paper_score=PAPER_SCORE_Q_ANNOTATION) - .order_by("-paper_score") - ) - context = self._get_authored_papers_context() - page = self.paginate_queryset(authored_papers) - serializer = DynamicPaperSerializer( - page, - _include_fields=[ - "id", - "abstract", - "authors", - "boost_amount", - "file", - "first_preview", - "hubs", - "paper_title", - "score", - "title", - "uploaded_by", - "uploaded_date", - "url", - "paper_publish_date", - "slug", - "created_date", - ], - many=True, - context=context, - ) - response = self.get_paginated_response(serializer.data) - return response - - def _get_authored_papers_context(self): - context = { - "pap_dps_get_authors": { - "_include_fields": [ - "id", - "first_name", - "last_name", - "profile_image", - ] - }, - "pap_dps_get_uploaded_by": { - "_include_fields": [ - "id", - "author_profile", - ] - }, - "pap_dps_get_first_preview": { - "_include_fields": [ - "file", - ] - }, - "pap_dps_get_hubs": { - "_include_fields": ( - "id", - "slug", - "name", - ) - }, - "usr_dus_get_author_profile": { - "_include_fields": ["id", "first_name", "last_name", "profile_image"] - }, - "doc_duds_get_hubs": { - "_include_fields": [ - "id", - "name", - "slug", - "hub_image", - ] - }, - } - return context - - def _get_contribution_context(self, filter_by_user_id): - context = { - "request": self.request, - "_config": { - "filter_by_user_id": filter_by_user_id, - }, - "doc_dps_get_created_by": { - "_include_fields": [ - "id", - "author_profile", - ] - }, - "doc_duds_get_created_by": { - "_include_fields": [ - "id", - "author_profile", - ] - }, - "doc_dps_get_hubs": { - "_include_fields": [ - "name", - "slug", - ] - }, - "pap_dps_get_uploaded_by": { - "_include_fields": [ - "id", - "author_profile", - ] - }, - "dis_dts_get_created_by": { - "_include_fields": [ - "id", - "author_profile", - ] - }, - "dis_dts_get_review": { - "_include_fields": [ - "id", - "score", - ] - }, - "dis_dcs_get_created_by": { - "_include_fields": [ - "author_profile", - "id", - ] - }, - "dis_drs_get_created_by": { - "_include_fields": [ - "author_profile", - "id", - ] - }, - "pap_dps_get_user_vote": { - "_include_fields": [ - "id", - "created_by", - "created_date", - "vote_type", - ] - }, - "pap_dps_get_hubs": { - "_include_fields": [ - "name", - "slug", - ] - }, - "pap_dpvs_paper": {"_exclude_fields": "__all__"}, - "doc_dps_get_user_vote": { - "_include_fields": [ - "id", - "content_type", - "created_by", - "created_date", - "vote_type", - "item", - ] - }, - "dis_drs_get_user_vote": { - "_include_fields": [ - "id", - "content_type", - "created_by", - "created_date", - "vote_type", - "item", - ] - }, - "dis_dcs_get_user_vote": { - "_include_fields": [ - "id", - "content_type", - "created_by", - "created_date", - "vote_type", - "item", - ] - }, - "dis_dts_get_user_vote": { - "_include_fields": [ - "id", - "content_type", - "created_by", - "created_date", - "vote_type", - "item", - ] - }, - "dis_dts_get_comments": { - "_include_fields": [ - "created_by", - "created_date", - "updated_date", - "created_location", - "external_metadata", - "id", - "is_created_by_editor", - "is_public", - "is_removed", - "paper_id", - "parent", - "plain_text", - "promoted", - "replies", - "reply_count", - "score", - "source", - "text", - "thread_id", - "user_flag", - "user_vote", - "was_edited", - ] - }, - "dis_dcs_get_replies": { - "_include_fields": [ - "created_by", - "created_location", - "id", - "is_created_by_editor", - "is_public", - "is_removed", - "paper_id", - "parent", - "plain_text", - "promoted", - "score", - "text", - "thread_id", - "user_flag", - "user_vote", - "created_date", - "updated_date", - ] - }, - "doc_duds_get_documents": { - "_include_fields": [ - "promoted", - "abstract", - "aggregate_citation_consensus", - "created_by", - "created_date", - "hot_score", - "hubs", - "id", - "discussion_count", - "paper_title", - "preview_img", - "renderable_text", - "score", - "slug", - "title", - "uploaded_by", - "uploaded_date", - "user_vote", - ] - }, - "doc_duds_get_bounties": {"_include_fields": ["id"]}, - "doc_duds_get_document_filter": { - "_include_fields": [ - "answered", - "bounty_open", - "bounty_total_amount", - ] - }, - "rep_dcs_get_author": { - "_include_fields": [ - "id", - "first_name", - "last_name", - "profile_image", - ] - }, - "rep_dcs_get_unified_document": { - "_include_fields": [ - "id", - "document_type", - "documents", - "hubs", - "document_filter", - ] - }, - "rep_dcs_get_source": { - "_include_fields": [ - "amount", - "citation", - "comment_count", - "comment_content_json", - # 02-18-24 Kobe: Temporarily commenting this out as it leads to a fatal error. - # I'm not sure this key is necessary for the client. Only time will tell - # "children", - "content_type", - "created_by", - "created_date", - "created_location", - "discussion_type", - "document_meta", - "external_metadata", - "hypothesis", - "id", - "is_public", - "is_removed", - "paper_slug", - "paper_title", - "paper", - "plain_text", - "post_slug", - "post", - "promoted", - "replies", - "review", - "score", - "slug", - "source", - "text", - "title", - "user_flag", - "user_vote", - "vote", - "bet_amount", - ] - }, - "rep_dbs_get_item": { - "_include_fields": [ - "created_by", - "created_date", - "updated_date", - "created_location", - "external_metadata", - "id", - "is_created_by_editor", - "is_public", - "is_removed", - "paper_id", - "parent", - "plain_text", - "promoted", - "replies", - "reply_count", - "score", - "source", - "text", - "thread_id", - "paper", - "post", - "awarded_bounty_amount", - "unified_document", - "user_flag", - "user_vote", - "was_edited", - ] - }, - "rep_dbss_get_item": { - "_include_fields": [ - "created_by", - "created_date", - "updated_date", - "created_location", - "external_metadata", - "id", - "is_created_by_editor", - "is_public", - "is_removed", - "paper_id", - "parent", - "plain_text", - "promoted", - "replies", - "reply_count", - "score", - "source", - "text", - "awarded_bounty_amount", - "thread_id", - "user_flag", - "user_vote", - "was_edited", - ] - }, - "rep_dbs_get_created_by": {"_include_fields": ["author_profile", "id"]}, - "dis_dts_get_bounties": { - "_include_fields": [ - "amount", - "created_by", - ] - }, - "dis_dts_get_paper": { - "_include_fields": [ - "id", - "slug", - ] - }, - "dis_dts_get_post": { - "_include_fields": [ - "id", - "slug", - ] - }, - "doc_duds_get_hubs": { - "_include_fields": [ - "name", - "slug", - ] - }, - "hyp_dhs_get_hubs": { - "_include_fields": [ - "name", - "slug", - ] - }, - "hyp_dhs_get_created_by": { - "_include_fields": [ - "author_profile", - "id", - ] - }, - "rhc_dcs_get_created_by": { - "_include_fields": [ - "first_name", - "last_name", - "author_profile", - ] - }, - "rhc_dcs_get_children": { - "_exclude_fields": [ - "thread", - "comment_content_src", - "promoted", - "user_endorsement", - "user_flag", - "purchases", - ] - }, - "rhc_dcs_get_purchases": { - "_include_fields": [ - "amount", - "user", - ] - }, - "rev_drs_get_created_by": { - "_include_fields": [ - "author_profile", - "id", - ] - }, - "pch_dps_get_user": { - "_include_fields": [ - "author_profile", - "id", - ] - }, - } - return context - - @action( - detail=True, - methods=["get"], - ) - def contributions(self, request, pk=None): - author = self.get_object() - - query_params = request.query_params - ordering = query_params.get("ordering", "-created_date") - asset_type = query_params.get("type", "overview") - contributions = self._get_author_contribution_queryset( - author.id, ordering, asset_type - ) - - page = self.paginate_queryset(contributions) - context = self._get_contribution_context(author.user_id) - serializer = DynamicContributionSerializer( - page, - _include_fields=[ - "contribution_type", - "created_date", - "id", - "source", - "created_by", - "unified_document", - "author", - ], - context=context, - many=True, - ) - data = serializer.data - response = self.get_paginated_response(data) - if asset_type == "bounty_offered": - total_bounty_amount = contributions.aggregate( - total_amount=Sum("bounty__amount") - ) - response.data["total_bounty_amount"] = total_bounty_amount.get( - "total_amount", 0 - ) - - return response - - def _get_author_comments(self, author_id): - author = self.get_object() - user = author.user - - if user: - user_threads = RhCommentModel.objects.filter(Q(created_by=user)) - return user_threads - return [] - - def _get_author_contribution_queryset(self, author_id, ordering, asset_type): - author_comments = self._get_author_comments(author_id) - rh_comment_content_type = ContentType.objects.get_for_model(RhCommentModel) - post_content_type = ContentType.objects.get_for_model(ResearchhubPost) - paper_content_type = ContentType.objects.get_for_model(Paper) - hypothesis_content_type = ContentType.objects.get_for_model(Hypothesis) - review_content_type = ContentType.objects.get_for_model(Review) - bounty_content_type = ContentType.objects.get_for_model(Bounty) - bounty_solution_content_type = ContentType.objects.get_for_model(BountySolution) - - types = asset_type.split(",") - - query = Q() - for asset_type in types: - if asset_type == "overview": - query |= Q( - Q( - unified_document__is_removed=False, - content_type=rh_comment_content_type, - # we filter by object_id instead of author_profile because - # sometimes there's contributions without a matching comment. - # this method ensures the comments exists. - object_id__in=author_comments, - contribution_type__in=[ - Contribution.COMMENTER, - ], - ) - | Q( - unified_document__is_removed=False, - user__author_profile=author_id, - content_type_id__in=[ - hypothesis_content_type, - paper_content_type, - post_content_type, - review_content_type, - ], - contribution_type__in=[ - Contribution.SUBMITTER, - Contribution.SUPPORTER, - ], - ) - ) - elif asset_type == "discussion": - query |= Q( - unified_document__is_removed=False, - user__author_profile=author_id, - content_type_id=post_content_type, - contribution_type__in=[Contribution.SUBMITTER], - ) - elif asset_type == "hypothesis": - query |= Q( - unified_document__is_removed=False, - user__author_profile=author_id, - content_type_id=hypothesis_content_type, - contribution_type__in=[Contribution.SUBMITTER], - ) - elif asset_type == "comment": - query |= Q( - unified_document__is_removed=False, - content_type=rh_comment_content_type, - # we filter by object_id instead of author_profile because - # sometimes there's contributions without a matching comment. - # this method ensures the comments exists. - object_id__in=author_comments, - contribution_type__in=[Contribution.COMMENTER], - ) - elif asset_type == "paper": - query |= Q( - unified_document__is_removed=False, - user__author_profile=author_id, - content_type_id=paper_content_type, - contribution_type__in=[Contribution.SUBMITTER], - ) - elif asset_type == "bounty_offered": - query |= Q( - unified_document__is_removed=False, - user__author_profile=author_id, - content_type_id=bounty_content_type, - contribution_type__in=[Contribution.BOUNTY_CREATED], - ) - elif asset_type == "bounty_earned": - query |= Q( - unified_document__is_removed=False, - user__author_profile=author_id, - content_type_id=bounty_solution_content_type, - contribution_type__in=[Contribution.BOUNTY_SOLUTION], - ) - else: - raise Exception("Unrecognized asset type: {}".format(asset_type)) - - qs = ( - Contribution.objects.filter(query) - .select_related( - "content_type", - "user", - "user__author_profile", - "unified_document", - ) - .order_by(ordering) - ) - - return qs - - @action( - detail=True, - methods=["get"], - ) - def get_user_discussions(self, request, pk=None): - author = self.get_object() - user = author.user - - if user: - user_discussions = user.thread_set.filter(is_removed=False).order_by("-id") - else: - user_discussions = self.queryset.none() - - page = self.paginate_queryset(user_discussions) - context = self._get_user_discussion_context() - serializer = DynamicThreadSerializer( - page, - _include_fields=[ - "id", - "comment_count", - "created_by", - "created_date", - "paper", - "post", - "score", - "text", - ], - many=True, - context=context, - ) - return self.get_paginated_response(serializer.data) - - def _get_user_discussion_context(self): - context = { - "dis_dts_get_created_by": { - "_include_fields": [ - "id", - "author_profile", - ] - }, - "dis_dts_get_paper": { - "_include_fields": [ - "id", - "slug", - ] - }, - "dis_dts_get_post": { - "_include_fields": [ - "id", - "slug", - ] - }, - "usr_dus_get_author_profile": { - "_include_fields": ["id", "first_name", "last_name", "profile_image"] - }, - } - return context - - @action( - detail=True, - methods=["get"], - ) - def get_user_contributions(self, request, pk=None): - author = self.get_object() - user = author.user - - if user: - prefetch_lookups = PaperViewSet.prefetch_lookups(self) - user_paper_uploads = user.papers.filter(is_removed=False).prefetch_related( - *prefetch_lookups - ) - else: - user_paper_uploads = self.queryset.none() - - context = self._get_user_contributions_context() - page = self.paginate_queryset(user_paper_uploads) - serializer = DynamicPaperSerializer( - page, - _include_fields=[ - "id", - "abstract", - "boost_amount", - "file", - "hubs", - "paper_title", - "score", - "title", - "slug", - "uploaded_by", - "uploaded_date", - ], - many=True, - context=context, - ) - response = self.get_paginated_response(serializer.data) - - return response - - def _get_user_contributions_context(self): - context = { - "pap_dps_get_uploaded_by": { - "_include_fields": [ - "id", - "author_profile", - ] - }, - "usr_dus_get_author_profile": { - "_include_fields": ["id", "first_name", "last_name", "profile_image"] - }, - "doc_duds_get_hubs": { - "_include_fields": [ - "id", - "name", - "slug", - "hub_image", - ] - }, - } - return context - - @action( - detail=True, - methods=["get"], - ) - def get_user_posts(self, request, pk=None): - author = self.get_object() - user = author.user - - if user: - user_posts = user.created_posts.all().prefetch_related( - "unified_document", "purchases" - ) - else: - user_posts = self.queryset.none() - - context = self._get_user_posts_context() - page = self.paginate_queryset(user_posts) - serializer = DynamicPostSerializer( - page, - _include_fields=[ - "id", - "created_by", - "hubs", - "boost_amount", - "renderable_text", - "score", - "slug", - "title", - ], - many=True, - context=context, - ) - response = self.get_paginated_response(serializer.data) - return response - - def _get_user_posts_context(self): - context = { - "doc_dps_get_created_by": { - "_include_fields": [ - "id", - "author_profile", - ] - }, - "usr_dus_get_author_profile": { - "_include_fields": ["id", "first_name", "last_name", "profile_image"] - }, - "doc_dps_get_hubs": { - "_include_fields": [ - "id", - "name", - "slug", - "hub_image", - ] - }, - } - return context From ed2f59b3346f314ac4e4287f2b3facec716f4a5b Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Wed, 22 May 2024 21:04:51 -0400 Subject: [PATCH 23/32] [Profile] Adding count to each coauthor to indicate coauthor frequency --- src/user/serializers.py | 26 +++++++++++++++++++++++++- src/user/views/author_views.py | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/user/serializers.py b/src/user/serializers.py index 077e9e1ad..d93ebb424 100644 --- a/src/user/serializers.py +++ b/src/user/serializers.py @@ -221,6 +221,8 @@ class Meta: class DynamicAuthorSerializer(DynamicModelFieldSerializer): + count = IntegerField(read_only=True) + class Meta: model = Author fields = "__all__" @@ -1059,12 +1061,34 @@ def get_institutions(self, author): return serializer.data def get_coauthors(self, author): + from django.db.models import Count + context = self.context _context_fields = context.get("author_profile::get_coauthors", {}) + + coauthors = ( + CoAuthor.objects.filter(author=author) + .values("coauthor", "coauthor__first_name", "coauthor__last_name") + .annotate(count=Count("coauthor")) + .order_by("-count") + ) + + # Prepare the data for serialization + coauthor_data = [ + { + "id": co["coauthor"], + "first_name": co["coauthor__first_name"], + "last_name": co["coauthor__last_name"], + "count": co["count"], + } + for co in coauthors + ] + serializer = DynamicAuthorSerializer( - [co.coauthor for co in author.coauthors.all()], + coauthor_data, context=context, many=True, **_context_fields, ) + return serializer.data diff --git a/src/user/views/author_views.py b/src/user/views/author_views.py index 5ab9521b5..c63affc06 100644 --- a/src/user/views/author_views.py +++ b/src/user/views/author_views.py @@ -149,6 +149,7 @@ def profile(self, request, pk=None): "id", "first_name", "last_name", + "count", ] }, "coauthor::get_coauthor": { From 3a0a190b4820d77c7fc535c81ca605090a964dc9 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Wed, 22 May 2024 22:03:29 -0400 Subject: [PATCH 24/32] [Profile] Need to do use a different approach for serializing coauthors --- src/user/serializers.py | 14 +++++++++++--- src/user/views/author_views.py | 11 +++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/user/serializers.py b/src/user/serializers.py index d93ebb424..f80a0f593 100644 --- a/src/user/serializers.py +++ b/src/user/serializers.py @@ -1068,17 +1068,26 @@ def get_coauthors(self, author): coauthors = ( CoAuthor.objects.filter(author=author) - .values("coauthor", "coauthor__first_name", "coauthor__last_name") + .values( + "coauthor", + "coauthor__first_name", + "coauthor__last_name", + "coauthor__is_verified", + "coauthor__headline", + "coauthor__description", + ) .annotate(count=Count("coauthor")) .order_by("-count") ) - # Prepare the data for serialization coauthor_data = [ { "id": co["coauthor"], "first_name": co["coauthor__first_name"], "last_name": co["coauthor__last_name"], + "is_verified": co["coauthor__is_verified"], + "headline": co["coauthor__headline"], + "description": co["coauthor__description"], "count": co["count"], } for co in coauthors @@ -1090,5 +1099,4 @@ def get_coauthors(self, author): many=True, **_context_fields, ) - return serializer.data diff --git a/src/user/views/author_views.py b/src/user/views/author_views.py index c63affc06..d61ae1698 100644 --- a/src/user/views/author_views.py +++ b/src/user/views/author_views.py @@ -150,15 +150,10 @@ def profile(self, request, pk=None): "first_name", "last_name", "count", - ] - }, - "coauthor::get_coauthor": { - "_include_fields": [ - "id", - "first_name", - "last_name", + "is_verified", "profile_image", - "paper", + "headline", + "description", ] }, }, From 97b7aba0171217f706c2745686777cd4a0cc3cd4 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Wed, 22 May 2024 22:26:35 -0400 Subject: [PATCH 25/32] [Profile] Adding activity by year to endpoint --- src/user/serializers.py | 22 ++++++++++++++++++++++ src/user/views/author_views.py | 8 ++++++++ 2 files changed, 30 insertions(+) diff --git a/src/user/serializers.py b/src/user/serializers.py index f80a0f593..d59d3ab68 100644 --- a/src/user/serializers.py +++ b/src/user/serializers.py @@ -40,6 +40,9 @@ UserApiToken, Verdict, ) +from user.related_models.author_contribution_summary_model import ( + AuthorContributionSummary, +) from user.related_models.author_institution import AuthorInstitution from user.related_models.coauthor_model import CoAuthor from user.related_models.gatekeeper_model import Gatekeeper @@ -1046,11 +1049,24 @@ def get_coauthor(self, coauthor): class DynamicAuthorProfileSerializer(DynamicModelFieldSerializer): institutions = SerializerMethodField() coauthors = SerializerMethodField() + activity_by_year = SerializerMethodField() class Meta: model = Author fields = "__all__" + def get_activity_by_year(self, author): + context = self.context + _context_fields = context.get("author_profile::activity_by_year", {}) + + serializer = DynamicAuthorContributionSummarySerializer( + author.contribution_summaries.all(), + context=context, + many=True, + **_context_fields, + ) + return serializer.data + def get_institutions(self, author): context = self.context _context_fields = context.get("author_profile::get_institutions", {}) @@ -1100,3 +1116,9 @@ def get_coauthors(self, author): **_context_fields, ) return serializer.data + + +class DynamicAuthorContributionSummarySerializer(DynamicModelFieldSerializer): + class Meta: + model = AuthorContributionSummary + fields = "__all__" diff --git a/src/user/views/author_views.py b/src/user/views/author_views.py index d61ae1698..0156ffc92 100644 --- a/src/user/views/author_views.py +++ b/src/user/views/author_views.py @@ -144,6 +144,13 @@ def profile(self, request, pk=None): "institution", ] }, + "author_profile::activity_by_year": { + "_include_fields": [ + "year", + "works_count", + "citation_count", + ] + }, "author_profile::get_coauthors": { "_include_fields": [ "id", @@ -176,6 +183,7 @@ def profile(self, request, pk=None): "created_date", "country_code", "coauthors", + "activity_by_year", ), ) return Response(serializer.data, status=200) From 9c3fd381fe8cf9c4b5727b8bd61e2d70bb0707f5 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Wed, 22 May 2024 23:09:57 -0400 Subject: [PATCH 26/32] [Profile] Adding summary stats --- .../migrations/0114_merge_20240523_0259.py | 13 +++++++++++ ...ributionsummary_citation_count_and_more.py | 23 +++++++++++++++++++ .../author_contribution_summary_model.py | 4 ++-- src/user/serializers.py | 17 ++++++++++++++ src/user/views/author_views.py | 1 + 5 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 src/user/migrations/0114_merge_20240523_0259.py create mode 100644 src/user/migrations/0115_alter_authorcontributionsummary_citation_count_and_more.py diff --git a/src/user/migrations/0114_merge_20240523_0259.py b/src/user/migrations/0114_merge_20240523_0259.py new file mode 100644 index 000000000..b7bc8ebad --- /dev/null +++ b/src/user/migrations/0114_merge_20240523_0259.py @@ -0,0 +1,13 @@ +# Generated by Django 4.1 on 2024-05-23 02:59 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0113_authorcontributionsummary"), + ("user", "0113_remove_authorinstitution_is_primary"), + ] + + operations = [] diff --git a/src/user/migrations/0115_alter_authorcontributionsummary_citation_count_and_more.py b/src/user/migrations/0115_alter_authorcontributionsummary_citation_count_and_more.py new file mode 100644 index 000000000..715e1422c --- /dev/null +++ b/src/user/migrations/0115_alter_authorcontributionsummary_citation_count_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.1 on 2024-05-23 03:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0114_merge_20240523_0259"), + ] + + operations = [ + migrations.AlterField( + model_name="authorcontributionsummary", + name="citation_count", + field=models.IntegerField(blank=True, default=0), + ), + migrations.AlterField( + model_name="authorcontributionsummary", + name="works_count", + field=models.IntegerField(blank=True, default=0), + ), + ] diff --git a/src/user/related_models/author_contribution_summary_model.py b/src/user/related_models/author_contribution_summary_model.py index 208fd7cd1..43e6808aa 100644 --- a/src/user/related_models/author_contribution_summary_model.py +++ b/src/user/related_models/author_contribution_summary_model.py @@ -14,8 +14,8 @@ class AuthorContributionSummary(models.Model): author = models.ForeignKey( "user.Author", on_delete=models.CASCADE, related_name="contribution_summaries" ) - works_count = models.IntegerField(null=True, blank=True) - citation_count = models.IntegerField(null=True, blank=True) + works_count = models.IntegerField(null=False, blank=True, default=0) + citation_count = models.IntegerField(null=False, blank=True, default=0) year = models.IntegerField(null=False, blank=False) class Meta: diff --git a/src/user/serializers.py b/src/user/serializers.py index d59d3ab68..4d97eec01 100644 --- a/src/user/serializers.py +++ b/src/user/serializers.py @@ -2,6 +2,7 @@ import dj_rest_auth.registration.serializers as rest_auth_serializers from django.contrib.contenttypes.models import ContentType +from django.db import models from rest_framework.serializers import ( CharField, IntegerField, @@ -1050,11 +1051,27 @@ class DynamicAuthorProfileSerializer(DynamicModelFieldSerializer): institutions = SerializerMethodField() coauthors = SerializerMethodField() activity_by_year = SerializerMethodField() + works_count = SerializerMethodField() + citation_count = SerializerMethodField() + summary_stats = SerializerMethodField() class Meta: model = Author fields = "__all__" + def get_summary_stats(self, author): + activity = author.contribution_summaries.all() + agg = activity.aggregate( + works_count=models.Sum("works_count"), + citation_count=models.Sum("citation_count"), + ) + + return { + "works_count": agg["works_count"], + "citation_count": agg["citation_count"], + "two_year_mean_citedness": author.two_year_mean_citedness, + } + def get_activity_by_year(self, author): context = self.context _context_fields = context.get("author_profile::activity_by_year", {}) diff --git a/src/user/views/author_views.py b/src/user/views/author_views.py index 0156ffc92..114d047c8 100644 --- a/src/user/views/author_views.py +++ b/src/user/views/author_views.py @@ -183,6 +183,7 @@ def profile(self, request, pk=None): "created_date", "country_code", "coauthors", + "summary_stats", "activity_by_year", ), ) From b6281b6d9862d854ce4e2e5c8a0d75144a0481a1 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Thu, 23 May 2024 21:01:16 -0400 Subject: [PATCH 27/32] [Author] Added created_source field to indicate if author was created externally (e.g. OpenAlex) --- src/paper/openalex_util.py | 1 + src/user/migrations/0114_merge_20240524_0059.py | 13 +++++++++++++ src/user/related_models/author_model.py | 15 +++++++++++++++ 3 files changed, 29 insertions(+) create mode 100644 src/user/migrations/0114_merge_20240524_0059.py diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index d980e2146..5914628c0 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -205,6 +205,7 @@ def process_openalex_authorships(openalex_authorships, related_paper_id): first_name=author_name_parts[0], last_name=author_name_parts[-1], openalex_ids=[author_openalex_id], + created_source=Author.SOURCE_OPENALEX, ) Wallet.objects.create(author=author) except Exception as e: diff --git a/src/user/migrations/0114_merge_20240524_0059.py b/src/user/migrations/0114_merge_20240524_0059.py new file mode 100644 index 000000000..72a8aae22 --- /dev/null +++ b/src/user/migrations/0114_merge_20240524_0059.py @@ -0,0 +1,13 @@ +# Generated by Django 4.1 on 2024-05-24 00:59 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0113_authorcontributionsummary"), + ("user", "0113_remove_authorinstitution_is_primary"), + ] + + operations = [] diff --git a/src/user/related_models/author_model.py b/src/user/related_models/author_model.py index 9bd1d0bb9..385a8544c 100644 --- a/src/user/related_models/author_model.py +++ b/src/user/related_models/author_model.py @@ -22,6 +22,12 @@ class Author(models.Model): + SOURCE_OPENALEX = "OPENALEX" + SOURCE_RESEARCHHUB = "RESEARCHHUB" + SOURCE_CHOICES = [ + (SOURCE_OPENALEX, "OpenAlex"), + (SOURCE_RESEARCHHUB, "ResearchHub"), + ] user = models.OneToOneField( User, related_name="author_profile", @@ -76,6 +82,15 @@ class Author(models.Model): max_length=20, ) + # Indicates whether the user was created through the RH platform or through another source such as OpenAlex + created_source = models.CharField( + max_length=20, + null=False, + blank=False, + choices=SOURCE_CHOICES, + default=SOURCE_RESEARCHHUB, + ) + # AKA Impact Factor. Derived from OpenAlex: https://en.wikipedia.org/wiki/Impact_factor two_year_mean_citedness = models.FloatField(default=0) From ee781bfad290e0332e188a9fbb33a726b1e61fc9 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Thu, 23 May 2024 21:07:57 -0400 Subject: [PATCH 28/32] [Author] Adding a field indicating the last time we fetched data from OpenAlex --- .../0115_author_created_source_and_more.py | 27 +++++++++++++++++++ src/user/related_models/author_model.py | 3 +++ 2 files changed, 30 insertions(+) create mode 100644 src/user/migrations/0115_author_created_source_and_more.py diff --git a/src/user/migrations/0115_author_created_source_and_more.py b/src/user/migrations/0115_author_created_source_and_more.py new file mode 100644 index 000000000..48e3b8b44 --- /dev/null +++ b/src/user/migrations/0115_author_created_source_and_more.py @@ -0,0 +1,27 @@ +# Generated by Django 4.1 on 2024-05-24 01:07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0114_merge_20240524_0059"), + ] + + operations = [ + migrations.AddField( + model_name="author", + name="created_source", + field=models.CharField( + choices=[("OPENALEX", "OpenAlex"), ("RESEARCHHUB", "ResearchHub")], + default="RESEARCHHUB", + max_length=20, + ), + ), + migrations.AddField( + model_name="author", + name="last_full_fetch_from_openalex", + field=models.DateTimeField(blank=True, null=True), + ), + ] diff --git a/src/user/related_models/author_model.py b/src/user/related_models/author_model.py index 385a8544c..12916401f 100644 --- a/src/user/related_models/author_model.py +++ b/src/user/related_models/author_model.py @@ -91,6 +91,9 @@ class Author(models.Model): default=SOURCE_RESEARCHHUB, ) + # Indicates the last time we did a full fetch from OpenAlex which includes all the works + last_full_fetch_from_openalex = models.DateTimeField(null=True, blank=True) + # AKA Impact Factor. Derived from OpenAlex: https://en.wikipedia.org/wiki/Impact_factor two_year_mean_citedness = models.FloatField(default=0) From 60f8c9b901ba9c116a69a321285830f7f7a81689 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Thu, 23 May 2024 22:36:41 -0400 Subject: [PATCH 29/32] [Author] Updating openalex fetch script to support author id as argument --- .../commands/load_works_from_openalex.py | 23 ++++++++++++------- src/user/serializers.py | 16 ++++++------- src/user/views/author_views.py | 1 - src/utils/openalex.py | 4 ++++ 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/src/researchhub_document/management/commands/load_works_from_openalex.py b/src/researchhub_document/management/commands/load_works_from_openalex.py index 37c7555f4..7a753247f 100644 --- a/src/researchhub_document/management/commands/load_works_from_openalex.py +++ b/src/researchhub_document/management/commands/load_works_from_openalex.py @@ -52,6 +52,12 @@ def add_arguments(self, parser): type=str, help="The OpenAlex ID to pull", ) + parser.add_argument( + "--openalex_author_id", + default=None, + type=str, + help="The OpenAlex Author ID to pull", + ) parser.add_argument( "--mode", default="backfill", @@ -63,6 +69,7 @@ def handle(self, *args, **kwargs): start_id = kwargs["start_id"] to_id = kwargs["to_id"] openalex_id = kwargs["openalex_id"] + openalex_author_id = kwargs["openalex_author_id"] mode = kwargs["mode"] source = kwargs["source"] batch_size = 100 @@ -98,16 +105,16 @@ def handle(self, *args, **kwargs): cursor = "*" page = 1 openalex_ids = None - openalex_types = None + openalex_types = [ + "article", + "preprint", + "review", + ] if openalex_id: print("Fetching single work with id: " + openalex_id) openalex_ids = [openalex_id] - else: - openalex_types = [ - "article", - "preprint", - "review", - ] + elif openalex_author_id: + print("Fetching full author works for author: " + openalex_author_id) while cursor: print("Processing page " + str(page)) @@ -116,8 +123,8 @@ def handle(self, *args, **kwargs): types=openalex_types, next_cursor=cursor, openalex_ids=openalex_ids, + openalex_author_id=openalex_author_id, ) process_openalex_works(works) page += 1 - return diff --git a/src/user/serializers.py b/src/user/serializers.py index 4d97eec01..3ed96a278 100644 --- a/src/user/serializers.py +++ b/src/user/serializers.py @@ -1060,15 +1060,15 @@ class Meta: fields = "__all__" def get_summary_stats(self, author): - activity = author.contribution_summaries.all() - agg = activity.aggregate( - works_count=models.Sum("works_count"), - citation_count=models.Sum("citation_count"), - ) + from django.db.models import Sum + + citation_count = author.authored_papers.aggregate( + total_citations=Sum("citations") + )["total_citations"] return { - "works_count": agg["works_count"], - "citation_count": agg["citation_count"], + "works_count": author.authored_papers.count(), + "citation_count": citation_count, "two_year_mean_citedness": author.two_year_mean_citedness, } @@ -1110,7 +1110,7 @@ def get_coauthors(self, author): "coauthor__description", ) .annotate(count=Count("coauthor")) - .order_by("-count") + .order_by("-count")[:10] ) coauthor_data = [ diff --git a/src/user/views/author_views.py b/src/user/views/author_views.py index 114d047c8..3c41847ad 100644 --- a/src/user/views/author_views.py +++ b/src/user/views/author_views.py @@ -178,7 +178,6 @@ def profile(self, request, pk=None): "google_scholar", "linkedin", "twitter", - "two_year_mean_citedness", "institutions", "created_date", "country_code", diff --git a/src/utils/openalex.py b/src/utils/openalex.py index c3870a2ef..af894ca4d 100644 --- a/src/utils/openalex.py +++ b/src/utils/openalex.py @@ -342,6 +342,7 @@ def get_works( batch_size=100, openalex_ids=None, source_id=None, + openalex_author_id=None, ): # Build the filter oa_filters = [] @@ -359,6 +360,9 @@ def get_works( if isinstance(openalex_ids, list): oa_filters.append(f"ids.openalex:{'|'.join(openalex_ids)}") + if openalex_author_id: + oa_filters.append(f"author.id:{openalex_author_id}") + filters = { "filter": ",".join(oa_filters), "per-page": batch_size, From ac8a7f864ce3142608c9d715735406ff39838044 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Fri, 24 May 2024 18:54:44 -0400 Subject: [PATCH 30/32] [Author] Merging migrations --- src/user/migrations/0116_merge_20240524_2253.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/user/migrations/0116_merge_20240524_2253.py diff --git a/src/user/migrations/0116_merge_20240524_2253.py b/src/user/migrations/0116_merge_20240524_2253.py new file mode 100644 index 000000000..7e65244d2 --- /dev/null +++ b/src/user/migrations/0116_merge_20240524_2253.py @@ -0,0 +1,13 @@ +# Generated by Django 4.1 on 2024-05-24 22:53 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0115_alter_authorcontributionsummary_citation_count_and_more"), + ("user", "0115_author_created_source_and_more"), + ] + + operations = [] From b393ee592d6440148dc69c21e23e1f50dd3aaf97 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Fri, 24 May 2024 20:52:14 -0400 Subject: [PATCH 31/32] [Author] Saving timestamp for when full works were fetched for author --- .../management/commands/load_works_from_openalex.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/researchhub_document/management/commands/load_works_from_openalex.py b/src/researchhub_document/management/commands/load_works_from_openalex.py index 7a753247f..be355d35f 100644 --- a/src/researchhub_document/management/commands/load_works_from_openalex.py +++ b/src/researchhub_document/management/commands/load_works_from_openalex.py @@ -1,8 +1,10 @@ from django.core.management.base import BaseCommand +from django.utils import timezone from paper.openalex_util import process_openalex_works from paper.related_models.paper_model import Paper from topic.models import Topic +from user.related_models.author_model import Author from utils.openalex import OpenAlex # To pull papers from bioRxiv use source param: @@ -128,3 +130,10 @@ def handle(self, *args, **kwargs): process_openalex_works(works) page += 1 + + if openalex_author_id: + print("Finished fetching all works for author: " + openalex_author_id) + full_openalex_id = "https://openalex.org/" + openalex_author_id + author = Author.objects.get(openalex_ids__contains=[full_openalex_id]) + author.last_full_fetch_from_openalex = timezone.now() + author.save() From b710603d4d5526e1603a2026013ed6920ec6dc49 Mon Sep 17 00:00:00 2001 From: Kobe Attias Date: Sat, 25 May 2024 07:36:26 -0400 Subject: [PATCH 32/32] [OpenAlex] Remove improperly placed transaction block when processing papers --- src/paper/openalex_util.py | 82 ++++++++++++------- .../commands/load_works_from_openalex.py | 7 +- src/utils/openalex.py | 6 ++ 3 files changed, 63 insertions(+), 32 deletions(-) diff --git a/src/paper/openalex_util.py b/src/paper/openalex_util.py index 5914628c0..4af4efbc5 100644 --- a/src/paper/openalex_util.py +++ b/src/paper/openalex_util.py @@ -66,7 +66,15 @@ def process_openalex_works(works): # Split works into two buckets: create and update create_papers = [] update_papers = [] + for work in works: + # When fetched in batch, OpneAlex will truncate authors beyond 100. + # If this is the case, we need to fetch the full work + # https://docs.openalex.org/api-entities/authors/limitations + if work.get("is_authors_truncated", False): + just_id = work.get("id").split("/")[-1] + work = open_alex.get_work(just_id) + doi = work.get("doi") if doi is None: print(f"No Doi for result: {work.get('id')}") @@ -85,45 +93,46 @@ def process_openalex_works(works): paper_to_openalex_data = {} # Create new papers - with transaction.atomic(): - for work in create_papers: - _work = copy.deepcopy(work) - - ( - data, - openalex_concepts, - openalex_topics, - ) = open_alex.build_paper_from_openalex_work(_work) + for work in create_papers: + _work = copy.deepcopy(work) + ( + data, + openalex_concepts, + openalex_topics, + ) = open_alex.build_paper_from_openalex_work(_work) - paper = Paper(**data) + paper = Paper(**data) - # Validate paper - try: - paper.clean_fields() - paper.clean() - except ValidationError as e: - sentry.log_error( - e, - message=f"Failed to validate paper: {paper.doi}, {work.get('id')}", - ) - continue + # Validate paper + try: + paper.clean_fields() + paper.clean() + except ValidationError as e: + sentry.log_error( + e, + message=f"Failed to validate paper: {paper.doi}, {work.get('id')}", + ) + continue - try: - paper.save() - except IntegrityError as e: - sentry.log_error( - e, message=f"Failed to save paper, DOI already exists: {paper.doi}" - ) - except Exception as e: - sentry.log_error( - e, message=f"Failed to save paper, unexpected error: {paper.doi}" - ) + try: + paper.save() + # Succeessfully saved paper, add to map paper_to_openalex_data[paper.id] = { "openalex_concepts": openalex_concepts, "openalex_topics": openalex_topics, "openalex_work": work, } + except IntegrityError as e: + sentry.log_error( + e, message=f"Failed to save paper, DOI already exists: {paper.doi}" + ) + continue + except Exception as e: + sentry.log_error( + e, message=f"Failed to save paper, unexpected error: {paper.doi}" + ) + continue # Prepare papers for batch update for existing_paper, work in update_papers: @@ -167,7 +176,18 @@ def process_openalex_works(works): ) openalex_authorships = work.get("authorships") - process_openalex_authorships(openalex_authorships, paper_id) + if openalex_authorships and paper_id: + try: + process_openalex_authorships(openalex_authorships, paper_id) + except Exception as e: + sentry.log_error( + e, message=f"Failed to process authorships for paper_id: {paper_id}" + ) + else: + sentry.log_error( + None, + message=f"Authorships data is missing or paper_id is None for work: {work.get('id')}", + ) def process_openalex_authorships(openalex_authorships, related_paper_id): diff --git a/src/researchhub_document/management/commands/load_works_from_openalex.py b/src/researchhub_document/management/commands/load_works_from_openalex.py index be355d35f..f1cc9e21a 100644 --- a/src/researchhub_document/management/commands/load_works_from_openalex.py +++ b/src/researchhub_document/management/commands/load_works_from_openalex.py @@ -112,9 +112,14 @@ def handle(self, *args, **kwargs): "preprint", "review", ] + if openalex_id: print("Fetching single work with id: " + openalex_id) - openalex_ids = [openalex_id] + work = OA.get_work( + openalex_id=openalex_id, + ) + + process_openalex_works([work]) elif openalex_author_id: print("Fetching full author works for author: " + openalex_author_id) diff --git a/src/utils/openalex.py b/src/utils/openalex.py index af894ca4d..f74bbe399 100644 --- a/src/utils/openalex.py +++ b/src/utils/openalex.py @@ -334,6 +334,12 @@ def get_authors( cursor = next_cursor if next_cursor != "*" else None return authors, cursor + def get_work( + self, + openalex_id=None, + ): + return self._get(f"works/{openalex_id}") + def get_works( self, since_date=None,