Skip to content

Commit

Permalink
Merge pull request #1608 from ResearchHub/author/overview
Browse files Browse the repository at this point in the history
[Author] Author overview endpoint returns top works
  • Loading branch information
yattias committed May 29, 2024
2 parents 78fd250 + 6cadebf commit fe7a0f8
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 0 deletions.
44 changes: 44 additions & 0 deletions src/user/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,11 +1054,48 @@ class DynamicAuthorProfileSerializer(DynamicModelFieldSerializer):
works_count = SerializerMethodField()
citation_count = SerializerMethodField()
summary_stats = SerializerMethodField()
open_access_pct = SerializerMethodField()
achievements = SerializerMethodField()
headline = SerializerMethodField()

class Meta:
model = Author
fields = "__all__"

def get_headline(self, author):
from collections import Counter

if author.headline:
return author.headline

all_topics = []
authored_papers = author.authored_papers.all()

for p in authored_papers:
unified_document = p.unified_document
all_topics += list(unified_document.topics.all())

topic_counts = Counter(all_topics)

# Sort topics by frequency
sorted_topics = sorted(topic_counts.items(), key=lambda x: x[1], reverse=True)

# Extract topics from sorted list
sorted_topics = [topic for topic, count in sorted_topics]

return "Author with expertise in " + sorted_topics[0].display_name

def get_achievements(self, author):
summary_stats = self.get_summary_stats(author)
open_access_pct = self.get_open_access_pct(author)
achivements = []
if summary_stats["citation_count"] >= 1:
achivements.append("CITED_AUTHOR")
if open_access_pct >= 0.5:
achivements.append("OPEN_ACCESS")

return achivements

def get_summary_stats(self, author):
from django.db.models import Sum

Expand All @@ -1084,6 +1121,13 @@ def get_activity_by_year(self, author):
)
return serializer.data

def get_open_access_pct(self, author):
total_paper_count = author.authored_papers.count()
return (
author.authored_papers.filter(is_open_access=True).count()
/ total_paper_count
)

def get_institutions(self, author):
context = self.context
_context_fields = context.get("author_profile::get_institutions", {})
Expand Down
33 changes: 33 additions & 0 deletions src/user/tests/test_views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import json
from unittest.mock import patch

from django.test import TestCase
from rest_framework.test import APITestCase

from paper.openalex_util import process_openalex_works
from user.tests.helpers import create_random_authenticated_user, create_user
from utils.openalex import OpenAlex
from utils.test_helpers import (
get_authenticated_get_response,
get_authenticated_patch_response,
Expand Down Expand Up @@ -33,6 +38,34 @@ def test_set_has_seen_first_coin_modal(self):
user.refresh_from_db()
self.assertTrue(user.has_seen_first_coin_modal)

@patch.object(OpenAlex, "get_authors")
def test_author_overview(self, mock_get_authors):
from paper.models import Paper

works = None
with open("./paper/tests/openalex_works.json", "r") as file:
response = json.load(file)
works = response.get("results")

with open("./paper/tests/openalex_authors.json", "r") as file:
mock_data = json.load(file)
mock_get_authors.return_value = (mock_data["results"], None)

process_openalex_works(works)

dois = [work.get("doi") for work in works]
dois = [doi.replace("https://doi.org/", "") for doi in dois]

papers = Paper.objects.filter(doi__in=dois)
first_author = papers.first().authors.first()

url = f"/api/author/{first_author.id}/overview/"
response = self.client.get(
url,
)

self.assertGreater(response.data["count"], 0)

def get_actions_response(self, user):
url = f"/api/user/{user.id}/actions/"
return get_authenticated_get_response(user, url)
Expand Down
60 changes: 60 additions & 0 deletions src/user/views/author_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,16 @@
)
from researchhub_comment.models import RhCommentModel
from researchhub_document.related_models.researchhub_post_model import ResearchhubPost
from researchhub_document.related_models.researchhub_unified_document_model import (
ResearchhubUnifiedDocument,
)
from researchhub_document.serializers import DynamicPostSerializer
from researchhub_document.serializers.researchhub_unified_document_serializer import (
DynamicUnifiedDocumentSerializer,
)
from researchhub_document.views.researchhub_unified_document_views import (
ResearchhubUnifiedDocumentViewSet,
)
from review.models.review_model import Review
from user.filters import AuthorFilter, UserFilter
from user.models import Author, Follow, Major, University, User, UserApiToken
Expand Down Expand Up @@ -184,6 +193,8 @@ def profile(self, request, pk=None):
"coauthors",
"summary_stats",
"activity_by_year",
"open_access_pct",
"achievements",
),
)
return Response(serializer.data, status=200)
Expand Down Expand Up @@ -648,6 +659,55 @@ def _get_contribution_context(self, filter_by_user_id):
}
return context

@action(
detail=True,
methods=["get"],
)
def overview(self, request, pk=None):
author = self.get_object()

# We want to only return a few documents for the overview section
NUM_DOCUMENTS_TO_FETCH = 4

# Fetch the authored papers and order by citations
authored_doc_ids = list(
author.authored_papers.filter(is_removed=False)
.order_by("-citations")
.values_list("unified_document_id", flat=True)
)[:NUM_DOCUMENTS_TO_FETCH]

documents = ResearchhubUnifiedDocument.objects.filter(id__in=authored_doc_ids)

# Maintain the ordering authored papers
documents_ordered = sorted(
documents, key=lambda x: authored_doc_ids.index(x.id)
)

context = ResearchhubUnifiedDocumentViewSet._get_serializer_context(self)
page = self.paginate_queryset(documents_ordered)

serializer = DynamicUnifiedDocumentSerializer(
page,
_include_fields=[
"id",
"created_date",
"documents",
"document_filter",
"document_type",
"hot_score",
"hubs",
"reviews",
"score",
"fundraise",
],
many=True,
context=context,
)

serializer_data = serializer.data

return self.get_paginated_response(serializer_data)

@action(
detail=True,
methods=["get"],
Expand Down

0 comments on commit fe7a0f8

Please sign in to comment.