Skip to content

Commit

Permalink
tasks for updating aggregate ratings and total reviews on add-ons
Browse files Browse the repository at this point in the history
  • Loading branch information
jbalogh committed Jun 23, 2010
1 parent 4fb8bb2 commit 2c4525a
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 5 deletions.
11 changes: 11 additions & 0 deletions apps/reviews/cron.py
Expand Up @@ -4,6 +4,7 @@

import cronjobs
from amo.utils import chunked
from addons.models import Addon

from . import tasks
from .models import Review
Expand All @@ -18,3 +19,13 @@ def reviews_denorm():
with establish_connection() as conn:
for chunk in chunked(pairs, 50):
tasks.update_denorm.apply_async(args=chunk, connection=conn)


@cronjobs.register
def addon_reviews_ratings():
"""Update all add-on total_reviews and average/bayesian ratings."""
addons = Addon.objects.values_list('id', flat=True)
with establish_connection() as conn:
for chunk in chunked(addons, 100):
tasks.cron_review_aggregate.apply_async(args=chunk,
connection=conn)
7 changes: 6 additions & 1 deletion apps/reviews/models.py
Expand Up @@ -4,7 +4,7 @@
from django.utils import translation

import amo.models
from translations.fields import TranslatedField, TranslatedFieldMixin
from translations.fields import TranslatedField
from translations.models import Translation
from users.models import UserProfile
from versions.models import Version
Expand All @@ -16,6 +16,10 @@ def get_query_set(self):
qs = super(ReviewManager, self).get_query_set()
return qs.transform(Review.transformer)

def valid(self):
"""Get all reviews with rating > 0 that aren't replies."""
return self.filter(reply_to=None, rating__gt=0)


class Review(amo.models.ModelBase):
addon = models.ForeignKey('addons.Addon', related_name='_reviews')
Expand Down Expand Up @@ -77,6 +81,7 @@ def post_delete(sender, instance, **kwargs):
from . import tasks
pair = instance.addon_id, instance.user_id
tasks.update_denorm(pair)
tasks.addon_review_aggregates.delay(instance.addon_id)

@staticmethod
def transformer(reviews):
Expand Down
54 changes: 54 additions & 0 deletions apps/reviews/tasks.py
@@ -1,7 +1,11 @@
import logging

from django.db.models import Count, Avg, Sum

from celery.decorators import task
import caching.base as caching

from addons.models import Addon
from .models import Review

log = logging.getLogger('z.task')
Expand All @@ -18,10 +22,60 @@ def update_denorm(*pairs, **kw):
for addon, user in pairs:
reviews = list(Review.uncached.filter(addon=addon, user=user)
.filter(reply_to=None).order_by('created'))
if not reviews:
continue

for idx, review in enumerate(reviews):
review.previous_count = idx
review.is_latest = False
reviews[-1].is_latest = True

for review in reviews:
review.save()


@task
def addon_review_aggregates(*addons, **kw):
log.info('[%s@%s] Updating total reviews.' %
(len(addons), addon_review_aggregates.rate_limit))
stats = (Review.objects.valid().filter(addon__in=addons)
.values_list('addon').annotate(Count('addon')))
for addon, count in stats:
Addon.objects.filter(id=addon).update(total_reviews=count)

log.info('[%s@%s] Updating average ratings.' %
(len(addons), addon_review_aggregates.rate_limit))
stats = (Review.objects.valid().filter(addon__in=addons)
.values_list('addon').annotate(Avg('rating')))
for addon, avg in stats:
Addon.objects.filter(id=addon).update(average_rating=avg)

# Delay bayesian calculations to avoid slave lag.
addon_bayesian_rating.apply_async(args=addons, countdown=5)


@task
def addon_bayesian_rating(*addons, **kw):
log.info('[%s@%s] Updating bayesian ratings.' %
(len(addons), addon_bayesian_rating.rate_limit))
f = lambda: Addon.objects.aggregate(rating=Avg('average_rating'),
reviews=Avg('total_reviews'))
avg = caching.cached(f, 'task.bayes.avg', 60 * 60 * 60)
qs = (Addon.uncached.filter(id__in=addons, total_reviews__gt=0)
.annotate(sum_ratings=Sum('_reviews__rating')))
for addon in qs:
if addon.total_reviews and addon.sum_ratings:
num = (avg['reviews'] * avg['rating']) + addon.sum_ratings
denom = avg['reviews'] + addon.total_reviews
addon.bayesian_rating = float(num) / denom
else:
addon.bayesian_rating = 0
addon.save()


@task(rate_limit='10/m')
def cron_review_aggregate(*addons, **kw):
log.info('[%s@%s] Updating addon review aggregates.' %
(len(addons), cron_review_aggregate.rate_limit))
# We have this redundant task to get rate limiting for big chunks.
addon_review_aggregates(*addons)
22 changes: 18 additions & 4 deletions apps/reviews/tests/test_cron.py
Expand Up @@ -2,6 +2,7 @@
from nose.tools import eq_
import test_utils

from addons.models import Addon
from reviews import cron, tasks
from reviews.models import Review

Expand All @@ -11,6 +12,8 @@ class TestDenormalization(test_utils.TestCase):

def setUp(self):
Review.objects.update(is_latest=True, previous_count=0)
Addon.objects.update(total_reviews=0, average_rating=0,
bayesian_rating=0)

def _check(self):
reviews = list(Review.objects.order_by('created'))
Expand All @@ -21,11 +24,14 @@ def _check(self):
r.is_latest = True
r.previous_count = len(reviews) - 1

@mock.patch('reviews.tasks.update_denorm.apply_async')
def test_denorms(self, async):
def _check_addon(self):
addon = Addon.objects.get(id=72)
eq_(addon.total_reviews, 3)
eq_(addon.average_rating, '2.3333')
eq_(addon.bayesian_rating, 2.25)

def test_denorms(self):
cron.reviews_denorm()
kwargs = async.call_args[1]
tasks.update_denorm(*kwargs['args'])
self._check()

def test_denorm_on_save(self):
Expand All @@ -37,3 +43,11 @@ def test_denorm_on_delete(self):
r = Review.objects.order_by('created')[1]
r.delete()
self._check()

def test_addon_review_aggregates(self):
tasks.addon_review_aggregates(72, 3)
self._check_addon()

def test_cron_review_aggregate(self):
cron.addon_reviews_ratings()
self._check_addon()

0 comments on commit 2c4525a

Please sign in to comment.