/
tests.py
447 lines (371 loc) · 19.2 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
# coding: utf-8
import re
import urllib2
import datetime
from django.db import models
from django.contrib.auth.models import User
from django.conf import settings
from django.core.exceptions import MultipleObjectsReturned
from django.test import TestCase
from django.contrib.contenttypes.models import ContentType
from autolex.models import Translation, TranslatedItem
from autolex.detection import LangDetect
from autolex.utils import make_translations, fetch_google_translation, break_into_chunks, communicate_with_google, get_translated_version, check_google_translate_errors
import autolex.utils as autolex_utils
def suite():
#suite = unittest.TestLoader().loadTestsFromTestCase(WidgetTestCase)
return suite
class TestRegularItem(models.Model):
""" Test class for non-translated items """
id = models.AutoField(primary_key=True)
text1 = models.TextField()
next_id = 1
def __init__(self, text1="Text in this object will never be translated."):
self.id = self.next_id
self.__class__.next_id += 1
self.text1 = text1
def __unicode__(self):
return "Test RegularItem"
class TestTranslatedItem(TranslatedItem):
""" Test class for creating items that will be translated """
id = models.AutoField(primary_key=True)
text1 = models.TextField()
text2 = models.TextField()
untranslated_field = models.TextField()
next_id = 1
def get_translated_fields(self):
return ['text1', 'text2']
translated_fields = property(get_translated_fields)
def __init__(self, text1, language, text2=None):
self.id = self.next_id
self.__class__.next_id += 1
self.text1 = text1
self.untranslated_field = "This text will not be translated."
self.language = language
self.translation_id = None
if text2 is not None:
self.text2 = text2
else:
self.text2 = ''
def __unicode__(self):
return "Test TranslatedItem"
class autolex_test(TestCase):
def setUp(self):
# Patch needed settings
self.old_common_identifier = settings.COMMON_IDENTIFIER
settings.COMMON_IDENTIFIER = None
self.user1 = User.objects.create_user('__test_user__', '')
self.user1.first_name = '__test__'
self.user1.last_name = '__user__'
self.user1.save()
self.object1 = TestTranslatedItem("This is some English example text.", 'en')
self.object1.save()
self.testtranslateditem_type = ContentType.objects.get_for_model(TestTranslatedItem)
self.object1_translation = Translation.objects.create(translation="spanish foo", language='es', field='text1',
content_type=self.testtranslateditem_type, object_id=self.object1.id)
self.test_ip = "134.174.191.73"
# Util function for comparing desired strings to output strings using regex
def assert_re_count(self, str_, re_to_find, count, re_flags=None, msg=None):
""" Assert that re_to_find matches oin str_ exactky count times """
if re_flags is None:
f = lambda: re.findall(re_to_find, str_)
else:
f = lambda: re.findall(re_to_find, str_, re_flags)
r = f()
if len(r) != count:
m = "'%s' matches '%s': %s (and not %i times)" % \
(str_, re_to_find, r, count)
if msg:
m = "%s (%s)" % (m, msg)
raise self.failureException, m
############################################################
### Tests that begin with the break_into_chunks function ###
############################################################
def test_break_into_chunks(self):
"""
Tests that long strings are properly broken up via break_into_chunks.
"""
# TO DO: make break_into_chunks more sophisticated.
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
one_chunk=break_into_chunks("This is a single-chunk string.", chunks=[])
self.assertEqual(len(one_chunk), 1)
two_chunks=break_into_chunks("This is a double-string chunk because it is longer than the character limit, which is " \
"given as the second argument of this function. It will be broken into two chunks " \
"because there are two sentences.", chunks=[], length_of_chunk=150)
self.assertEqual(len(two_chunks), 2)
no_periods=break_into_chunks("This is a double-string chunk, but because the first sentence is longer than the " \
"character limit, the break will happen in the middle of the sentence, which will " \
"decrease translation quality. The second chunk contains both periods.", chunks=[], length_of_chunk=150)
self.assertEqual(len(no_periods), 2)
self.assert_re_count(no_periods[0], '\.', 0)
self.assert_re_count(no_periods[1],'\.', 2)
###################################################################
### Tests that begin with the fetch_google_translation function ###
###################################################################
def test_fetch_google_translation(self):
"""
Tests that fetch_google_translation retrieves and saves a proper translation
"""
# 1. create an object to be translated.
# 2. translate using fetch_google_translation
# 3. verify that there is a new translation in the database corresponding with
# that language and object.
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
test_object = TestTranslatedItem("This is some basic text for the example object.", 'en')
test_object.save()
fetch_google_translation(test_object, 'text1', 'es', self.test_ip)
new_translation = Translation.objects.filter(object_id=test_object.id, content_type=self.testtranslateditem_type,
field='text1', language='es')
self.assertEqual(len(new_translation),1)
def test_too_long(self):
"""
Tests that sending to Google a string of >5000 characters returns an error.
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
really_long_string = "This is a really long string. " * 500
really_long_string = really_long_string.encode('utf-8')
try:
communicate_with_google(self.object1, really_long_string, 'es', self.test_ip)
except urllib2.URLError, e:
self.assertEqual(e.reason, 400)
else:
self.fail("Translating a too-long string should have resulted in an error.")
def test_google_error_registry(self):
"""
Tests that fetch_google_translate returns immediately if there are too many
errors in the GOOGLE_TRANSLATE_ERRORS registry
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
recent_error_entry = {'error' : 400, 'time' : datetime.datetime.now() }
older_error_entry = {'error' : 400, 'time' : datetime.datetime.now() - datetime.timedelta(minutes=55)}
oldest_error_entry = { 'error' : 400, 'time' : datetime.datetime.now() - datetime.timedelta(hours=23)}
# too many errors in the last 5 minutes
self.assertEqual(autolex_utils.GOOGLE_TRANSLATE_ON, True)
autolex_utils.GOOGLE_TRANSLATE_ERRORS = [recent_error_entry] * 5
check_google_translate_errors()
self.assertEqual(autolex_utils.GOOGLE_TRANSLATE_ON, False)
# too many errors in the last hour
autolex_utils.GOOGLE_TRANSLATE_ON = True
autolex_utils.GOOGLE_TRANSLATE_ERRORS = [older_error_entry] * 16
autolex_utils.GOOGLE_TRANSLATE_ERRORS.extend([recent_error_entry] * 4)
check_google_translate_errors()
self.assertEqual(autolex_utils.GOOGLE_TRANSLATE_ON, False)
# too many errors in the last day
autolex_utils.GOOGLE_TRANSLATE_ON = True
autolex_utils.GOOGLE_TRANSLATE_ERRORS = [oldest_error_entry] * 31
autolex_utils.GOOGLE_TRANSLATE_ERRORS.extend([older_error_entry] * 15)
autolex_utils.GOOGLE_TRANSLATE_ERRORS.extend([recent_error_entry] * 4)
check_google_translate_errors()
self.assertEqual(autolex_utils.GOOGLE_TRANSLATE_ON, False)
# acceptable number of errors
autolex_utils.GOOGLE_TRANSLATE_ON = True
autolex_utils.GOOGLE_TRANSLATE_ERRORS = [oldest_error_entry] * 30
autolex_utils.GOOGLE_TRANSLATE_ERRORS.extend([older_error_entry] * 15)
autolex_utils.GOOGLE_TRANSLATE_ERRORS.extend([recent_error_entry] * 4)
check_google_translate_errors()
self.assertEqual(autolex_utils.GOOGLE_TRANSLATE_ON, True)
############################################################
### Tests that begin with the make_translations function ###
############################################################
def test_make_translations(self):
"""
Tests that make_translations creates proper translations.
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
Translation.objects.all().delete()
self.object1.text2 = "This is some more sample text."
self.object1.save()
make_translations([self.object1], self.test_ip, to_language="es")
self.assertEqual(Translation.active.count(), 2)
self.assertEqual(Translation.active.all()[0].translation_set, Translation.active.all()[1].translation_set)
self.assertEqual(Translation.active.filter(object_id=self.object1.id,
content_type=self.testtranslateditem_type,
language='es').count(), 2)
def test_multiple_languages(self):
"""
Tests that make_translations does not create translations for objects that
were originally written in the desired language.
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
Translation.objects.all().delete()
self.object_es=TestTranslatedItem("El texto de este objecto era escrito en espanol", 'es')
self.object_es.save()
make_translations([self.object1, self.object_es], self.test_ip, to_language="es")
self.assertEqual(Translation.active.count(), 1)
# also works when using a common identifier
settings.COMMON_IDENTIFIER = 'id'
Translation.objects.all().delete()
make_translations([self.object1, self.object_es], self.test_ip, to_language="es")
self.assertEqual(Translation.active.count(), 1)
settings.COMMON_IDENTIFIER = None
def test_translate_bad_object(self):
"""
Tests that make_translations will not work with objects that are not TranslatedItems
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
# create an object that is not a TranslatedItem
# attempt to translate one of that object's fields.
# verify that no translation was created
bad_object = TestRegularItem()
bad_object.save()
self.assertRaises(AttributeError, lambda: make_translations([bad_object], self.test_ip, 'es'))
def test_translate_empty_field(self):
"""
Tests that using make_translations on an empty field does not create any translation objects.
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
Translation.objects.all().delete()
make_translations([self.object1],self.test_ip, to_language='es')
self.assertEqual(Translation.active.count(), 1)
def test_translate_bad_ip(self):
"""
Tests that make_translations does not call Google unless it has a proper IP address
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
Translation.objects.all().delete()
make_translations([self.object1],"bad.ip", to_language='es')
self.assertEqual(Translation.active.count(), 0)
def test_common_identifier(self):
"""
Tests that marking a field as the common identifier (using settings.COMMON_IDENTIFIER)
does not disable translation.
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
settings.COMMON_IDENTIFIER = 'id'
Translation.objects.all().delete()
make_translations([self.object1], self.test_ip, to_language='es')
self.assertEqual(Translation.active.count(), 1)
settings.COMMON_IDENTIFIER = None
def test_already_translated_field(self):
"""
Tests that, for an object with two translated fields, one of which is already translated,
make_translations only makes a translation for the untranslated field.
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
self.object1.text2 = "New sample text."
self.object1.save()
self.assertEqual(Translation.active.count(), 1)
make_translations([self.object1], self.test_ip, to_language='es')
self.assertEqual(Translation.objects.count(), 2)
self.assertEqual(Translation.objects.get(object_id=self.object1.id, content_type=self.testtranslateditem_type,
language="es", field="text1").translation, self.object1_translation.translation)
def test_inactive_translation(self):
"""
Tests that a new translation is made if an old translation exists but is marked inactive.
"""
if not settings.ENABLE_GOOGLE_TRANSLATE:
return
self.object1.text2 = "This is some more sample text. A new translation will be made because the " \
"relevant translation (created below) is inactive."
self.object1.save()
self.object1_translation2 = Translation.objects.create(object_id=self.object1.id,
content_type=self.testtranslateditem_type,
field="text2", language="es",
translation = "spanish 2 foo",
is_active=False)
self.object1_translation2.save()
self.assertEqual(Translation.objects.count(), 2)
self.assertEqual(Translation.active.count(), 1)
make_translations([self.object1], self.test_ip, to_language='es')
self.assertEqual(Translation.objects.count(), 3)
self.assertEqual(Translation.active.count(), 2)
#################################################################
### Tests that begin with the get_translated_version function ###
#################################################################
def test_get_translated_version(self):
"""
Tests that get_translated_version retrieves a proper translation.
"""
if not settings.ENABLE_TRANSLATIONS:
return
translated_version = get_translated_version(self.object1, "text1", 'es')
self.assertEqual(translated_version, self.object1_translation.translation)
def test_get_no_translation(self):
"""
Tests that get_translated_version falls back on the original text if
there is no appropriate translation object in the database.
"""
if not settings.ENABLE_TRANSLATIONS:
return
translated_version = get_translated_version(self.object1, "text1", 'fr')
self.assertEqual(translated_version, self.object1.text1)
def test_get_two_translations(self):
"""
Tests that get_translated_version returns the most recent active translation.
"""
if not settings.ENABLE_TRANSLATIONS:
return
second_translation = Translation.objects.create(object_id=self.object1.id,
content_type=self.testtranslateditem_type,
field="text1", language='es',
translation="spanish bar")
second_translation.save()
translated_version = get_translated_version(self.object1, "text1", 'es')
self.assertEqual(translated_version, second_translation.translation)
def test_get_inactive_translation(self):
"""
Tests that get_translated_version falls back on the original text if
the matching translation is marked inactive.
"""
if not settings.ENABLE_TRANSLATIONS:
return
self.object1.text2 = "This is some more sample text. It should not be translated because the " \
"relevant translation (created below) is inactive."
self.object1.save()
self.object1_translation2 = Translation.objects.create(object_id=self.object1.id,
content_type=self.testtranslateditem_type,
field="text2", language="es",
translation = "spanish 2 foo",
is_active=False)
self.object1_translation2.save()
fallback_version = get_translated_version(self.object1, "text2", 'es')
self.assertEqual(fallback_version, self.object1.text2)
def test_get_common_identifier(self):
"""
Tests that get_translated_version works correctly with a common identifier.
"""
if not settings.ENABLE_TRANSLATIONS:
return
settings.COMMON_IDENTIFIER = 'id'
translated_version = get_translated_version(self.object1, "text1", 'es')
self.assertEqual(translated_version, self.object1_translation.translation)
settings.COMMON_IDENTIFIER = None
def test_get_bad_field(self):
"""
Tests that using get_translated_version with a non-translated field raises an error.
"""
if not settings.ENABLE_TRANSLATIONS:
return
self.assertRaises(ValueError, lambda: get_translated_version(self.object1, "untranslated_field", 'es'))
def test_detection(self):
"""
Tests that autolex.detection correctly detects a string's language.
"""
if not settings.ENABLE_TRANSLATIONS:
return
text_nl = "De snelle bruine vos springt over de luie hond"
text_en = "The quick brown fox jumps over the lazy dog"
text_fr = "Le renard brun rapide saute par-dessus le chien paresseux"
text_de = "Der schnelle braune Fuchs springt den faulen Hund."
text_es = "El rpido zorro marrn salta sobre el perro perezoso"
text_ru = "迆 函佬券佬典 制來佬 併佩佾 供刺 冽佩兩兒具兕? 迄佬, 侑佺 其佬佾到刻佩例刺佻 刮侖侈佰侏兔佾剁兒!"
ld=LangDetect()
self.assertEqual(ld.detect(text_nl), "nl")
self.assertEqual(ld.detect(text_en), "en")
self.assertEqual(ld.detect(text_fr), "fr")
self.assertEqual(ld.detect(text_de), "de")
self.assertEqual(ld.detect(text_es), "es")
self.assertEqual(ld.detect(text_ru), "ru")
def tearDown(self):
settings.COMMON_IDENTIFIER = self.old_common_identifier