/
search_general.py
154 lines (123 loc) · 5.71 KB
/
search_general.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#
# Freesound is (c) MUSIC TECHNOLOGY GROUP, UNIVERSITAT POMPEU FABRA
#
# Freesound is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Freesound is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
# See AUTHORS file.
#
from solr import Solr, SolrException, SolrResponseInterpreter
import sounds
from django.conf import settings
from search.views import search_prepare_sort, search_prepare_query
from search.forms import SEARCH_SORT_OPTIONS_WEB
import logging
logger = logging.getLogger("search")
def convert_to_solr_document(sound):
logger.info("creating solr XML from sound %d" % sound.id)
document = {}
document["id"] = sound.id
document["username"] = sound.user.username
document["created"] = sound.created
document["original_filename"] = sound.original_filename
document["description"] = sound.description
document["tag"] = list(sound.tags.select_related("tag").values_list('tag__name', flat=True))
document["license"] = sound.license.name
document["is_remix"] = bool(sound.sources.count())
document["was_remixed"] = bool(sound.remixes.count())
if sound.pack:
document["pack"] = sound.pack.name
document["grouping_pack"] = str(sound.pack.id) + "_" + sound.pack.name
else:
document["grouping_pack"] = str(sound.id)
document["is_geotagged"] = sound.geotag_id != None
if (sound.geotag_id != None):
document["geotag"] = str(sound.geotag.lon) + " " + str(sound.geotag.lat)
document["type"] = sound.type
document["duration"] = sound.duration
document["bitdepth"] = sound.bitdepth if sound.bitdepth != None else 0
document["bitrate"] = sound.bitrate if sound.bitrate != None else 0
document["samplerate"] = int(sound.samplerate)
document["filesize"] = sound.filesize
document["channels"] = sound.channels
document["md5"] = sound.md5
document["num_downloads"] = sound.num_downloads
document["avg_rating"] = sound.avg_rating
document["num_ratings"] = sound.num_ratings
document["comment"] = list(sound.comments.values_list('comment', flat=True))
document["comments"] = sound.num_comments
document["waveform_path_m"] = sound.locations()["display"]["wave"]["M"]["path"]
document["waveform_path_l"] = sound.locations()["display"]["wave"]["L"]["path"]
document["spectral_path_m"] = sound.locations()["display"]["spectral"]["M"]["path"]
document["spectral_path_l"] = sound.locations()["display"]["spectral"]["L"]["path"]
document["preview_path"] = sound.locations()["preview"]["LQ"]["mp3"]["path"]
return document
def add_sound_to_solr(sound):
logger.info("adding single sound to solr index")
try:
Solr(settings.SOLR_URL).add([convert_to_solr_document(sound)])
except SolrException, e:
logger.error("failed to add sound %d to solr index, reason: %s" % (sound.id, str(e)))
def add_sounds_to_solr(sounds):
logger.info("adding multiple sounds to solr index")
solr = Solr(settings.SOLR_URL)
logger.info("creating XML")
documents = map(convert_to_solr_document, sounds)
logger.info("posting to Solr")
solr.add(documents)
logger.info("optimizing solr index")
#solr.optimize()
logger.info("done")
def add_all_sounds_to_solr(sound_queryset, slice_size=4000, mark_index_clean=False):
# Pass in a queryset to avoid needing a reference to
# the Sound class, it causes circular imports.
num_sounds = sound_queryset.count()
num_correctly_indexed_sounds = 0
for i in range(0, num_sounds, slice_size):
print "Adding %i sounds to solr, slice %i"%(slice_size,i)
try:
sounds_to_update = sound_queryset[i:i+slice_size]
add_sounds_to_solr(sounds_to_update)
if mark_index_clean:
logger.info("Marking sounds as clean.")
sounds.models.Sound.objects.filter(pk__in=[snd.id for snd in sounds_to_update]).update(is_index_dirty=False)
num_correctly_indexed_sounds += len(sounds_to_update)
except SolrException, e:
logger.error("failed to add sound batch to solr index, reason: %s" % str(e))
return num_correctly_indexed_sounds
def get_all_sound_ids_from_solr(limit=False):
logger.info("getting all sound ids from solr.")
if not limit:
limit = 99999999999999
solr = Solr(settings.SOLR_URL)
solr_ids = []
solr_count = None
PAGE_SIZE = 2000
current_page = 1
try:
while (len(solr_ids) < solr_count or solr_count == None) and len(solr_ids) < limit:
#print "Getting page %i" % current_page
response = SolrResponseInterpreter(solr.select(unicode(search_prepare_query('', '', search_prepare_sort('created asc', SEARCH_SORT_OPTIONS_WEB), current_page, PAGE_SIZE, include_facets=False))))
solr_ids += [element['id'] for element in response.docs]
solr_count = response.num_found
current_page += 1
except Exception, e:
raise Exception(e)
return sorted(solr_ids)
def delete_sound_from_solr(sound):
logger.info("deleting sound with id %d" % sound.id)
try:
Solr(settings.SOLR_URL).delete_by_id(sound.id)
except Exception, e:
logger.error('could not delete sound with id %s (%s).' % (sound.id, e))