Skip to content

Commit

Permalink
Deport vectors into the cache, this allow DB query reduction and a mo…
Browse files Browse the repository at this point in the history
…re predictive behavior
  • Loading branch information
Fantomas42 committed May 5, 2015
1 parent 631aef0 commit 5e4c144
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 32 deletions.
40 changes: 17 additions & 23 deletions zinnia/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@ class VectorBuilder(object):
"""

def __init__(self, queryset, fields):
self.key = ''
self._columns = []
self._dataset = {}
self.clustered_model = ClusteredModel(queryset, fields)

def build_dataset(self):
Expand All @@ -98,40 +95,37 @@ def build_dataset(self):
words_item_total[word] += 1
data[instance] = words_item_total

self._dataset = {}
self._columns = list(words_total.keys())
dataset = {}
columns = list(words_total.keys())
for instance in data.keys():
self._dataset[instance] = [data[instance].get(word, 0)
for word in self._columns]
self.key = self.generate_key()

def generate_key(self):
"""
Generate key for this list of vectors.
"""
return self.clustered_model.queryset.count()
dataset[instance] = [data[instance].get(word, 0)
for word in columns]
return columns, dataset

def flush(self):
def columns_dataset(self):
"""
Flush the dataset if required.
Cache system for columns and dataset.
"""
if self.key != self.generate_key():
self.build_dataset()
return self._columns, self._dataset
cache = get_comparison_cache()
columns_dataset = cache.get('vectors')
if not columns_dataset:
columns_dataset = self.build_dataset()
cache.set('vectors', columns_dataset)
return columns_dataset

@property
def columns(self):
"""
Access to columns in a secure manner.
Access to columns.
"""
return self.flush()[0]
return self.columns_dataset()[0]

@property
def dataset(self):
"""
Access to dataset in a secure manner.
Access to dataset.
"""
return self.flush()[1]
return self.columns_dataset()[1]


def compute_related(object_id, dataset):
Expand Down
2 changes: 1 addition & 1 deletion zinnia/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def flush_similar_cache_handler(sender, **kwargs):
entry = kwargs['instance']
if entry.is_visible:
cache = get_comparison_cache()
cache.delete('related_entries')
cache.delete_many(['related_entries', 'vectors'])


def count_discussions_handler(sender, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion zinnia/templatetags/zinnia.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def get_similar_entries(context, number=5,

if cache_key not in cache_related:
related_entry_pks = compute_related(
entry.pk, VECTORS._dataset)[:number]
entry.pk, VECTORS.dataset)[:number]
related_entries = sorted(
Entry.objects.filter(pk__in=related_entry_pks),
key=lambda x: related_entry_pks.index(x.pk))
Expand Down
12 changes: 8 additions & 4 deletions zinnia/tests/test_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from zinnia.comparison import compute_related
from zinnia.comparison import VectorBuilder
from zinnia.comparison import ClusteredModel
from zinnia.comparison import get_comparison_cache
from zinnia.signals import disconnect_entry_signals


Expand Down Expand Up @@ -45,6 +46,8 @@ def test_clustered_model(self):
' entry 2 content 2 zinnia test']))

def test_vector_builder(self):
cache = get_comparison_cache()
cache.delete('vectors')
vectors = VectorBuilder(Entry.objects.all(),
['title', 'excerpt', 'content'])
self.assertEqual(vectors.dataset, {})
Expand All @@ -57,12 +60,13 @@ def test_vector_builder(self):
'My second entry',
'slug': 'my-entry-2'}
e2 = Entry.objects.create(**params)
self.assertEqual(vectors._dataset, {})
self.assertEqual(vectors._columns, [])
self.assertEqual(vectors.dataset, {})
self.assertEqual(vectors.columns, [])
cache.delete('vectors')
self.assertEqual(sorted(vectors.columns), sorted(
['1', '2', 'content', 'entry']))
self.assertEqual(sorted(vectors._dataset[e1.pk]), [0, 1, 1, 1])
self.assertEqual(sorted(vectors._dataset[e2.pk]), [0, 0, 1, 2])
self.assertEqual(sorted(vectors.dataset[e1.pk]), [0, 1, 1, 1])
self.assertEqual(sorted(vectors.dataset[e2.pk]), [0, 0, 1, 2])

def test_compute_related(self):
dataset = {1: [1, 2, 3],
Expand Down
15 changes: 12 additions & 3 deletions zinnia/tests/test_templatetags.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from django.template import Context
from django.template import Template
from django.template import TemplateSyntaxError
from django.db.models.signals import post_save
from django.core.paginator import Paginator
from django.core.urlresolvers import reverse
from django.contrib.sites.models import Site
Expand All @@ -27,6 +28,8 @@
from zinnia.tests.utils import urlEqual
from zinnia.signals import disconnect_entry_signals
from zinnia.signals import disconnect_discussion_signals
from zinnia.signals import flush_similar_cache_handler
from zinnia.signals import ENTRY_PS_FLUSH_SIMILAR_CACHE
from zinnia.templatetags.zinnia import widont
from zinnia.templatetags.zinnia import week_number
from zinnia.templatetags.zinnia import get_authors
Expand Down Expand Up @@ -248,6 +251,9 @@ def test_get_popular_entries(self):
self.assertEqual(list(context['entries']), [second_entry])

def test_get_similar_entries(self):
post_save.connect(
flush_similar_cache_handler, sender=Entry,
dispatch_uid=ENTRY_PS_FLUSH_SIMILAR_CACHE)
self.publish_entry()
source_context = Context({'object': self.entry})
with self.assertNumQueries(0):
Expand All @@ -257,7 +263,7 @@ def test_get_similar_entries(self):
'zinnia/tags/entries_similar.html')

source_context = Context({'entry': self.entry})
with self.assertNumQueries(3):
with self.assertNumQueries(1):
context = get_similar_entries(source_context)
self.assertEqual(len(context['entries']), 0)
self.assertEqual(context['template'],
Expand All @@ -280,15 +286,18 @@ def test_get_similar_entries(self):
third_entry.sites.add(self.site)

source_context = Context({'entry': second_entry})
with self.assertNumQueries(4):
with self.assertNumQueries(2):
context = get_similar_entries(source_context, 3,
'custom_template.html')
self.assertEqual(len(context['entries']), 2)
self.assertEqual(context['entries'][0].pk, third_entry.pk)
self.assertEqual(context['template'], 'custom_template.html')
with self.assertNumQueries(1):
with self.assertNumQueries(0):
context = get_similar_entries(source_context, 3,
'custom_template.html')
post_save.disconnect(
sender=Entry,
dispatch_uid=ENTRY_PS_FLUSH_SIMILAR_CACHE)

def test_get_archives_entries(self):
with self.assertNumQueries(0):
Expand Down

0 comments on commit 5e4c144

Please sign in to comment.