0
-Sphinx Search Engine ORM for Django models
0
-http://www.sphinxsearch.com/
0
-Developed and maintained David Cramer <dcramer@gmail.com>
0
-To add a search manager to your model:
0
- search = SphinxSearch([index=<string>, weight=[<int>,], mode=<string>])
0
-To query the engine and retrieve objects:
0
- MyModel.search.query('my string')
0
-To use multiple index support, you need to define a "content_type" field in your SQL
0
-clause. Each index also needs to have the exact same field's. The rules are almost identical
0
-to that of an SQL UNION query.
0
- SELECT id, name, 1 as content_type FROM model_myapp
0
- SELECT id, name, 2 as content_type FROM model_myotherapp
0
- search_results = SphinxSearch()
0
- search_results.on_index('model_myapp model_myotherapp')
0
- search_results.query('hello')
0
-default settings.py values
0
- SPHINX_SERVER = 'localhost'
0
- from sphinxapi import SPH_SORT_RELEVANCE, SPH_GROUPBY_DAY, SPH_MATCH_EXTENDED, VER_COMMAND_SEARCH, SphinxClient
0
- from api117 import SPH_SORT_RELEVANCE, SPH_GROUPBY_DAY, SPH_MATCH_EXTENDED, VER_COMMAND_SEARCH, SphinxClient
0
-from django.db.models.query import QuerySet
0
-from django.conf import settings
0
-__all__ = ('SearchError', 'ConnectionError', 'SphinxSearch')
0
-from django.contrib.contenttypes.models import ContentType
0
-SPHINX_SERVER = getattr(settings, 'SPHINX_SERVER', 'localhost')
0
-SPHINX_PORT = getattr(settings, 'SPHINX_PORT', 3312)
0
-# These require search API 1.19 (Sphinx 0.9.8)
0
-SPHINX_RETRIES = getattr(settings, 'SPHINX_RETRIES', 0)
0
-SPHINX_RETRIES_DELAY = getattr(settings, 'SPHINX_RETRIES_DELAY', 5)
0
-class SearchError(Exception):
0
- def __init__(self, message):
0
- self.message = message
0
- return str(self.message)
0
-class ConnectionError(Exception):
0
- def __init__(self, message):
0
- self.message = message
0
- return str(self.message)
0
-class SphinxProxy(object):
0
- Acts exactly like a normal instance of an object except that
0
- it will handle any special sphinx attributes in a _sphinx class.
0
- __slots__ = ('__dict__', '__instance__', '_sphinx')
0
- def __init__(self, instance, attributes):
0
- object.__setattr__(self, '__instance__', instance)
0
- object.__setattr__(self, '_sphinx', attributes)
0
- def _get_current_object(self):
0
- Return the current object. This is useful if you want the real object
0
- behind the proxy at a time for performance reasons or because you want
0
- to pass the object into a different context.
0
- return self.__instance__
0
- __current_object = property(_get_current_object)
0
- return self.__current_object.__dict__
0
- return AttributeError('__dict__')
0
- __dict__ = property(__dict__)
0
- obj = self.__current_object
0
- return '<%s unbound>' % self.__class__.__name__
0
- def __nonzero__(self):
0
- return bool(self.__current_object)
0
- def __unicode__(self):
0
- return unicode(self.__current_oject)
0
- return dir(self.__current_object)
0
- def __getattr__(self, name, value=None):
0
- if name == '__members__':
0
- return dir(self.__current_object)
0
- elif name == '_sphinx':
0
- return object.__getattr__(self, '_sphinx', value)
0
- return getattr(self.__current_object, name)
0
- def __setattr__(self, name, value):
0
- return object.__setattr__(self, '_sphinx', value)
0
- return setattr(self.__current_object, name, value)
0
- def __setitem__(self, key, value):
0
- self.__current_object[key] = value
0
- def __delitem__(self, key):
0
- del self.__current_object[key]
0
- def __setslice__(self, i, j, seq):
0
- self.__current_object[i:j] = seq
0
- def __delslice__(self, i, j):
0
- del self.__current_object[i:j]
0
- __delattr__ = lambda x, n: delattr(x.__current_object, n)
0
- __str__ = lambda x: str(x.__current_object)
0
- __unicode__ = lambda x: unicode(x.__current_object)
0
- __lt__ = lambda x, o: x.__current_object < o
0
- __le__ = lambda x, o: x.__current_object <= o
0
- __eq__ = lambda x, o: x.__current_object == o
0
- __ne__ = lambda x, o: x.__current_object != o
0
- __gt__ = lambda x, o: x.__current_object > o
0
- __ge__ = lambda x, o: x.__current_object >= o
0
- __cmp__ = lambda x, o: cmp(x.__current_object, o)
0
- __hash__ = lambda x: hash(x.__current_object)
0
- __call__ = lambda x, *a, **kw: x.__current_object(*a, **kw)
0
- __len__ = lambda x: len(x.__current_object)
0
- __getitem__ = lambda x, i: x.__current_object[i]
0
- __iter__ = lambda x: iter(x.__current_object)
0
- __contains__ = lambda x, i: i in x.__current_object
0
- __getslice__ = lambda x, i, j: x.__current_object[i:j]
0
- __add__ = lambda x, o: x.__current_object + o
0
- __sub__ = lambda x, o: x.__current_object - o
0
- __mul__ = lambda x, o: x.__current_object * o
0
- __floordiv__ = lambda x, o: x.__current_object // o
0
- __mod__ = lambda x, o: x.__current_object % o
0
- __divmod__ = lambda x, o: x.__current_object.__divmod__(o)
0
- __pow__ = lambda x, o: x.__current_object ** o
0
- __lshift__ = lambda x, o: x.__current_object << o
0
- __rshift__ = lambda x, o: x.__current_object >> o
0
- __and__ = lambda x, o: x.__current_object & o
0
- __xor__ = lambda x, o: x.__current_object ^ o
0
- __or__ = lambda x, o: x.__current_object | o
0
- __div__ = lambda x, o: x.__current_object.__div__(o)
0
- __truediv__ = lambda x, o: x.__current_object.__truediv__(o)
0
- __neg__ = lambda x: -(x.__current_object)
0
- __pos__ = lambda x: +(x.__current_object)
0
- __abs__ = lambda x: abs(x.__current_object)
0
- __invert__ = lambda x: ~(x.__current_object)
0
- __complex__ = lambda x: complex(x.__current_object)
0
- __int__ = lambda x: int(x.__current_object)
0
- __long__ = lambda x: long(x.__current_object)
0
- __float__ = lambda x: float(x.__current_object)
0
- __oct__ = lambda x: oct(x.__current_object)
0
- __hex__ = lambda x: hex(x.__current_object)
0
- __index__ = lambda x: x.__current_object.__index__()
0
- __coerce__ = lambda x, o: x.__coerce__(x, o)
0
- __enter__ = lambda x: x.__enter__()
0
- __exit__ = lambda x, *a, **kw: x.__exit__(*a, **kw)
0
-class SphinxSearch(object):
0
- def __init__(self, index=None, **kwargs):
0
- self.mode(kwargs['mode'])
0
- if 'weights' in kwargs:
0
- self.weights(kwargs['weights'])
0
- def _clone(self, **kwargs):
0
- # Clones the queryset passing any changed args
0
- c.__dict__.update(self.__dict__)
0
- c.__dict__.update(kwargs)
0
- self._select_related = False
0
- self._select_related_args = {}
0
- self._select_related_fields = []
0
- self.__metadata = None
0
- self._filter_range = None
0
- self._maxmatches = 1000
0
- self._result_cache = None
0
- self._mode = SPH_MATCH_EXTENDED
0
- def __get__(self, instance, instance_model, **kwargs):
0
- raise AttributeError, "Manager isn't accessible via %s instances" % type.__name__
0
- self._model = instance_model
0
- if not self._index and self._model:
0
- self._index = self._model._meta.db_table
0
- return repr(self._get_data())
0
- return len(self._get_data())
0
- return iter(self._get_data())
0
- def __getitem__(self, k):
0
- if not isinstance(k, (slice, int)):
0
- assert (not isinstance(k, slice) and (k >= 0)) \
0
- or (isinstance(k, slice) and (k.start is None or k.start >= 0) and (k.stop is None or k.stop >= 0)), \
0
- "Negative indexing is not supported."
0
- if self._offset < k.start or k.stop-k.start > self._limit:
0
- self._result_cache = None
0
- if k not in range(self._offset, self._limit+self._offset):
0
- self._result_cache = None
0
- if self._result_cache is None:
0
- self._offset = k.start
0
- self._limit = k.stop-k.start
0
- return self._get_results()
0
- return self._get_results()[0]
0
- return self._result_cache[k]
0
- def query(self, string):
0
- return self._clone(_query=string)
0
- return self._clone(_mode=mode)
0
- def group_by(self, attribute, func, groupsort='@group desc'):
0
- return self._clone(_groupby=attribute, _groupfunc=func, _groupsort=groupsort)
0
- def weights(self, weights):
0
- return self._clone(_weights=weights)
0
- # only works on attributes
0
- def filter(self, **kwargs):
0
- filters = self._filters.copy()
0
- for k,v in kwargs.iteritems():
0
- if not isinstance(v, list):
0
- v = [isinstance(value, bool) and value and 1 or 0 or int(value) for value in v]
0
- filters.setdefault(k, []).append(v)
0
- return self._clone(_filters=filters)
0
- def geoanchor(self, **kwargs):
0
- assert(VER_COMMAND_SEARCH >= 0x113, "You must upgrade sphinxapi to version 1.19 to use Geo Anchoring.")
0
- return self._clone(_anchor=kwargs)
0
- def on_index(self, index):
0
- return self._clone(_index=index)
0
- # this actually does nothing, its just a passthru to
0
- # keep things looking/working generally the same
0
- # only works on attributes
0
- def exclude(self, **kwargs):
0
- filters = self._excludes.copy()
0
- for k,v in kwargs.iteritems():
0
- if not isinstance(v, list):
0
- v = [isinstance(value, bool) and value and 1 or 0 or int(value) for value in v]
0
- filters.setdefault(k, []).append(v)
0
- return self._clone(_excludes=filters)
0
- # you cannot order by @weight (it always orders in descending)
0
- # keywords are @id, @weight, @rank, and @relevance
0
- def order_by(self, *args):
0
- sort_by.append('%s %s' % (arg, sort))
0
- return self._clone(_sort=(SPH_SORT_EXTENDED, ', '.join(sort_by)))
0
- # pass these thru on the queryset and let django handle it
0
- def select_related(self, *args, **kwargs):
0
- _args = self._select_related_fields[:]
0
- _kwargs = self._select_related_args.copy()
0
- _kwargs.update(kwargs)
0
- _select_related_fields=_args,
0
- _select_related_args=_kwargs,
0
- def extra(self, **kwargs):
0
- extra = self._extra.copy()
0
- return self._clone(_extra=extra)
0
- return self._get_sphinx_results()['total_found']
0
- if not self.__metadata:
0
- # We have to force execution if this is accessed beforehand
0
- return self.__metadata
0
- _sphinx = property(_sphinx)
0
- # need to find a way to make this work yet
0
- if self._result_cache is None:
0
- self._result_cache = list(self._get_results())
0
- return self._result_cache
0
- def _get_sphinx_results(self):
0
- client = SphinxClient()
0
- client.SetServer(SPHINX_SERVER, SPHINX_PORT)
0
- client.SetSortMode(*self._sort)
0
- client.SetWeights(self._weights)
0
- for name, values in self._filters:
0
- client.SetFilter(name, values)
0
- for name, values in self._excludes:
0
- client.SetFilter(name, values, exclude=1)
0
- if self._filter_range:
0
- client.SetIDRange(*self._filter_range)
0
- client.SetGroupBy(self._groupby, self._groupfunc, self._groupsort)
0
- client.SetGeoAnchor(self._anchor)
0
- client.SetLimits(self._offset, self._limit, self._maxmatches)
0
- if VER_COMMAND_SEARCH >= 0x113:
0
- client.SetRetries(SPHINX_RETRIES, SPHINX_RETRIES_DELAY)
0
- results = client.Query(self._query, self._index)
0
- # The Sphinx API doesn't raise exceptions
0
- if not results and client.GetLastError():
0
- raise SearchError, client.GetLastError()
0
- def _get_results(self):
0
- results = self._get_sphinx_results()
0
- if results['matches'] and self._model:
0
- qs = self._model.objects.filter(pk__in=[r['id'] for r in results['matches']])
0
- if self._select_related:
0
- qs = qs.select_related(*self._select_related_fields, **self._select_related_args)
0
- qs = qs.extra(**self._extra)
0
- queryset = dict([(o.id, o) for o in qs])
0
- 'total': results['total'],
0
- 'total_found': results['total_found'],
0
- 'words': results['words'],
0
- results = [SphinxProxy(queryset[k['id']], {'weight': k['weight']}) for k in results['matches'] if k['id'] in queryset]
0
- elif results['matches']:
0
- "We did a query without a model, lets see if there's a content_type"
0
- if 'content_type' in results['attrs']:
0
- "Now we have to do one query per content_type"
0
- x = results['attrs'].index('content_type')
0
- for r in results['matches']:
0
- if ct not in objcache:
0
- objcache[ct][r['doc']] = None
0
- qs = ContentType.objects.get(pk=ct).model_class().objects.filter(pk__in=objcache[ct])
0
- objcache[ct][o.id] = o
0
- results = [objcache[r['attrs'][x]][r['doc']] for r in results['matches']]
0
- results = results['matches']
0
- self._result_cache = results