Lowerize the words when building the dataset

Fantomas42 · Jan 14, 2015 · 5903965 · 5903965
1 parent 4ef2302
commit 5903965
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/zinnia/comparison.py b/zinnia/comparison.py
@@ -73,6 +73,7 @@ def build_dataset(self):
         for instance, words in model_data.items():
             words_item_total = {}
             for word in words.split():
+                word = word.lower()
                 words_total.setdefault(word, 0)
                 words_item_total.setdefault(word, 0)
                 words_total[word] += 1

diff --git a/zinnia/tests/test_comparison.py b/zinnia/tests/test_comparison.py
@@ -51,8 +51,8 @@ def test_vector_builder(self):
         Entry.objects.create(**params)
         columns, dataset = vectors()
         self.assertEqual(sorted(columns), sorted(
-            ['content', 'This', 'my', 'is', '1',
+            ['content', 'this', 'is', '1',
              'second', '2', 'first']))
         self.assertEqual(sorted([sorted(row) for row in dataset.values()]),
-                         sorted([sorted([1, 1, 1, 1, 1, 0, 0, 1]),
-                                 sorted([0, 0, 0, 0, 0, 1, 1, 0])]))
+                         sorted([sorted([1, 1, 1, 1, 0, 0, 1]),
+                                 sorted([0, 0, 0, 0, 1, 1, 0])]))