diff --git a/apps/reviews/cron.py b/apps/reviews/cron.py index a9a2d4709f5..f50b43b7d6f 100644 --- a/apps/reviews/cron.py +++ b/apps/reviews/cron.py @@ -4,6 +4,7 @@ import cronjobs from amo.utils import chunked +from addons.models import Addon from . import tasks from .models import Review @@ -18,3 +19,13 @@ def reviews_denorm(): with establish_connection() as conn: for chunk in chunked(pairs, 50): tasks.update_denorm.apply_async(args=chunk, connection=conn) + + +@cronjobs.register +def addon_reviews_ratings(): + """Update all add-on total_reviews and average/bayesian ratings.""" + addons = Addon.objects.values_list('id', flat=True) + with establish_connection() as conn: + for chunk in chunked(addons, 100): + tasks.cron_review_aggregate.apply_async(args=chunk, + connection=conn) diff --git a/apps/reviews/models.py b/apps/reviews/models.py index 4714a07edbd..0616ce4a553 100644 --- a/apps/reviews/models.py +++ b/apps/reviews/models.py @@ -4,7 +4,7 @@ from django.utils import translation import amo.models -from translations.fields import TranslatedField, TranslatedFieldMixin +from translations.fields import TranslatedField from translations.models import Translation from users.models import UserProfile from versions.models import Version @@ -16,6 +16,10 @@ def get_query_set(self): qs = super(ReviewManager, self).get_query_set() return qs.transform(Review.transformer) + def valid(self): + """Get all reviews with rating > 0 that aren't replies.""" + return self.filter(reply_to=None, rating__gt=0) + class Review(amo.models.ModelBase): addon = models.ForeignKey('addons.Addon', related_name='_reviews') @@ -77,6 +81,7 @@ def post_delete(sender, instance, **kwargs): from . import tasks pair = instance.addon_id, instance.user_id tasks.update_denorm(pair) + tasks.addon_review_aggregates.delay(instance.addon_id) @staticmethod def transformer(reviews): diff --git a/apps/reviews/tasks.py b/apps/reviews/tasks.py index c61f0890db4..9de72b92774 100644 --- a/apps/reviews/tasks.py +++ b/apps/reviews/tasks.py @@ -1,7 +1,11 @@ import logging +from django.db.models import Count, Avg, Sum + from celery.decorators import task +import caching.base as caching +from addons.models import Addon from .models import Review log = logging.getLogger('z.task') @@ -18,6 +22,9 @@ def update_denorm(*pairs, **kw): for addon, user in pairs: reviews = list(Review.uncached.filter(addon=addon, user=user) .filter(reply_to=None).order_by('created')) + if not reviews: + continue + for idx, review in enumerate(reviews): review.previous_count = idx review.is_latest = False @@ -25,3 +32,50 @@ def update_denorm(*pairs, **kw): for review in reviews: review.save() + + +@task +def addon_review_aggregates(*addons, **kw): + log.info('[%s@%s] Updating total reviews.' % + (len(addons), addon_review_aggregates.rate_limit)) + stats = (Review.objects.valid().filter(addon__in=addons) + .values_list('addon').annotate(Count('addon'))) + for addon, count in stats: + Addon.objects.filter(id=addon).update(total_reviews=count) + + log.info('[%s@%s] Updating average ratings.' % + (len(addons), addon_review_aggregates.rate_limit)) + stats = (Review.objects.valid().filter(addon__in=addons) + .values_list('addon').annotate(Avg('rating'))) + for addon, avg in stats: + Addon.objects.filter(id=addon).update(average_rating=avg) + + # Delay bayesian calculations to avoid slave lag. + addon_bayesian_rating.apply_async(args=addons, countdown=5) + + +@task +def addon_bayesian_rating(*addons, **kw): + log.info('[%s@%s] Updating bayesian ratings.' % + (len(addons), addon_bayesian_rating.rate_limit)) + f = lambda: Addon.objects.aggregate(rating=Avg('average_rating'), + reviews=Avg('total_reviews')) + avg = caching.cached(f, 'task.bayes.avg', 60 * 60 * 60) + qs = (Addon.uncached.filter(id__in=addons, total_reviews__gt=0) + .annotate(sum_ratings=Sum('_reviews__rating'))) + for addon in qs: + if addon.total_reviews and addon.sum_ratings: + num = (avg['reviews'] * avg['rating']) + addon.sum_ratings + denom = avg['reviews'] + addon.total_reviews + addon.bayesian_rating = float(num) / denom + else: + addon.bayesian_rating = 0 + addon.save() + + +@task(rate_limit='10/m') +def cron_review_aggregate(*addons, **kw): + log.info('[%s@%s] Updating addon review aggregates.' % + (len(addons), cron_review_aggregate.rate_limit)) + # We have this redundant task to get rate limiting for big chunks. + addon_review_aggregates(*addons) diff --git a/apps/reviews/tests/test_cron.py b/apps/reviews/tests/test_cron.py index 34661ee16ba..689f35db103 100644 --- a/apps/reviews/tests/test_cron.py +++ b/apps/reviews/tests/test_cron.py @@ -2,6 +2,7 @@ from nose.tools import eq_ import test_utils +from addons.models import Addon from reviews import cron, tasks from reviews.models import Review @@ -11,6 +12,8 @@ class TestDenormalization(test_utils.TestCase): def setUp(self): Review.objects.update(is_latest=True, previous_count=0) + Addon.objects.update(total_reviews=0, average_rating=0, + bayesian_rating=0) def _check(self): reviews = list(Review.objects.order_by('created')) @@ -21,11 +24,14 @@ def _check(self): r.is_latest = True r.previous_count = len(reviews) - 1 - @mock.patch('reviews.tasks.update_denorm.apply_async') - def test_denorms(self, async): + def _check_addon(self): + addon = Addon.objects.get(id=72) + eq_(addon.total_reviews, 3) + eq_(addon.average_rating, '2.3333') + eq_(addon.bayesian_rating, 2.25) + + def test_denorms(self): cron.reviews_denorm() - kwargs = async.call_args[1] - tasks.update_denorm(*kwargs['args']) self._check() def test_denorm_on_save(self): @@ -37,3 +43,11 @@ def test_denorm_on_delete(self): r = Review.objects.order_by('created')[1] r.delete() self._check() + + def test_addon_review_aggregates(self): + tasks.addon_review_aggregates(72, 3) + self._check_addon() + + def test_cron_review_aggregate(self): + cron.addon_reviews_ratings() + self._check_addon()