Merge pull request #1608 from ResearchHub/author/overview

[Author] Author overview endpoint returns top works
ResearchHub · May 29, 2024 · fe7a0f8 · fe7a0f8
2 parents 78fd250 + 6cadebf
commit fe7a0f8
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 0 deletions.
diff --git a/src/user/serializers.py b/src/user/serializers.py
@@ -1054,11 +1054,48 @@ class DynamicAuthorProfileSerializer(DynamicModelFieldSerializer):
     works_count = SerializerMethodField()
     citation_count = SerializerMethodField()
     summary_stats = SerializerMethodField()
+    open_access_pct = SerializerMethodField()
+    achievements = SerializerMethodField()
+    headline = SerializerMethodField()
 
     class Meta:
         model = Author
         fields = "__all__"
 
+    def get_headline(self, author):
+        from collections import Counter
+
+        if author.headline:
+            return author.headline
+
+        all_topics = []
+        authored_papers = author.authored_papers.all()
+
+        for p in authored_papers:
+            unified_document = p.unified_document
+            all_topics += list(unified_document.topics.all())
+
+        topic_counts = Counter(all_topics)
+
+        # Sort topics by frequency
+        sorted_topics = sorted(topic_counts.items(), key=lambda x: x[1], reverse=True)
+
+        # Extract topics from sorted list
+        sorted_topics = [topic for topic, count in sorted_topics]
+
+        return "Author with expertise in " + sorted_topics[0].display_name
+
+    def get_achievements(self, author):
+        summary_stats = self.get_summary_stats(author)
+        open_access_pct = self.get_open_access_pct(author)
+        achivements = []
+        if summary_stats["citation_count"] >= 1:
+            achivements.append("CITED_AUTHOR")
+        if open_access_pct >= 0.5:
+            achivements.append("OPEN_ACCESS")
+
+        return achivements
+
     def get_summary_stats(self, author):
         from django.db.models import Sum
 
@@ -1084,6 +1121,13 @@ def get_activity_by_year(self, author):
         )
         return serializer.data
 
+    def get_open_access_pct(self, author):
+        total_paper_count = author.authored_papers.count()
+        return (
+            author.authored_papers.filter(is_open_access=True).count()
+            / total_paper_count
+        )
+
     def get_institutions(self, author):
         context = self.context
         _context_fields = context.get("author_profile::get_institutions", {})

diff --git a/src/user/tests/test_views.py b/src/user/tests/test_views.py
@@ -1,7 +1,12 @@
+import json
+from unittest.mock import patch
+
 from django.test import TestCase
 from rest_framework.test import APITestCase
 
+from paper.openalex_util import process_openalex_works
 from user.tests.helpers import create_random_authenticated_user, create_user
+from utils.openalex import OpenAlex
 from utils.test_helpers import (
     get_authenticated_get_response,
     get_authenticated_patch_response,
@@ -33,6 +38,34 @@ def test_set_has_seen_first_coin_modal(self):
         user.refresh_from_db()
         self.assertTrue(user.has_seen_first_coin_modal)
 
+    @patch.object(OpenAlex, "get_authors")
+    def test_author_overview(self, mock_get_authors):
+        from paper.models import Paper
+
+        works = None
+        with open("./paper/tests/openalex_works.json", "r") as file:
+            response = json.load(file)
+            works = response.get("results")
+
+        with open("./paper/tests/openalex_authors.json", "r") as file:
+            mock_data = json.load(file)
+            mock_get_authors.return_value = (mock_data["results"], None)
+
+            process_openalex_works(works)
+
+            dois = [work.get("doi") for work in works]
+            dois = [doi.replace("https://doi.org/", "") for doi in dois]
+
+            papers = Paper.objects.filter(doi__in=dois)
+            first_author = papers.first().authors.first()
+
+            url = f"/api/author/{first_author.id}/overview/"
+            response = self.client.get(
+                url,
+            )
+
+            self.assertGreater(response.data["count"], 0)
+
     def get_actions_response(self, user):
         url = f"/api/user/{user.id}/actions/"
         return get_authenticated_get_response(user, url)

diff --git a/src/user/views/author_views.py b/src/user/views/author_views.py
@@ -47,7 +47,16 @@
 )
 from researchhub_comment.models import RhCommentModel
 from researchhub_document.related_models.researchhub_post_model import ResearchhubPost
+from researchhub_document.related_models.researchhub_unified_document_model import (
+    ResearchhubUnifiedDocument,
+)
 from researchhub_document.serializers import DynamicPostSerializer
+from researchhub_document.serializers.researchhub_unified_document_serializer import (
+    DynamicUnifiedDocumentSerializer,
+)
+from researchhub_document.views.researchhub_unified_document_views import (
+    ResearchhubUnifiedDocumentViewSet,
+)
 from review.models.review_model import Review
 from user.filters import AuthorFilter, UserFilter
 from user.models import Author, Follow, Major, University, User, UserApiToken
@@ -184,6 +193,8 @@ def profile(self, request, pk=None):
                 "coauthors",
                 "summary_stats",
                 "activity_by_year",
+                "open_access_pct",
+                "achievements",
             ),
         )
         return Response(serializer.data, status=200)
@@ -648,6 +659,55 @@ def _get_contribution_context(self, filter_by_user_id):
         }
         return context
 
+    @action(
+        detail=True,
+        methods=["get"],
+    )
+    def overview(self, request, pk=None):
+        author = self.get_object()
+
+        # We want to only return a few documents for the overview section
+        NUM_DOCUMENTS_TO_FETCH = 4
+
+        # Fetch the authored papers and order by citations
+        authored_doc_ids = list(
+            author.authored_papers.filter(is_removed=False)
+            .order_by("-citations")
+            .values_list("unified_document_id", flat=True)
+        )[:NUM_DOCUMENTS_TO_FETCH]
+
+        documents = ResearchhubUnifiedDocument.objects.filter(id__in=authored_doc_ids)
+
+        # Maintain the ordering authored papers
+        documents_ordered = sorted(
+            documents, key=lambda x: authored_doc_ids.index(x.id)
+        )
+
+        context = ResearchhubUnifiedDocumentViewSet._get_serializer_context(self)
+        page = self.paginate_queryset(documents_ordered)
+
+        serializer = DynamicUnifiedDocumentSerializer(
+            page,
+            _include_fields=[
+                "id",
+                "created_date",
+                "documents",
+                "document_filter",
+                "document_type",
+                "hot_score",
+                "hubs",
+                "reviews",
+                "score",
+                "fundraise",
+            ],
+            many=True,
+            context=context,
+        )
+
+        serializer_data = serializer.data
+
+        return self.get_paginated_response(serializer_data)
+
     @action(
         detail=True,
         methods=["get"],