Skip to content

Commit

Permalink
Lowerize the words when building the dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Fantomas42 committed Jan 14, 2015
1 parent 4ef2302 commit 5903965
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
1 change: 1 addition & 0 deletions zinnia/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def build_dataset(self):
for instance, words in model_data.items():
words_item_total = {}
for word in words.split():
word = word.lower()
words_total.setdefault(word, 0)
words_item_total.setdefault(word, 0)
words_total[word] += 1
Expand Down
6 changes: 3 additions & 3 deletions zinnia/tests/test_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def test_vector_builder(self):
Entry.objects.create(**params)
columns, dataset = vectors()
self.assertEqual(sorted(columns), sorted(
['content', 'This', 'my', 'is', '1',
['content', 'this', 'is', '1',
'second', '2', 'first']))
self.assertEqual(sorted([sorted(row) for row in dataset.values()]),
sorted([sorted([1, 1, 1, 1, 1, 0, 0, 1]),
sorted([0, 0, 0, 0, 0, 1, 1, 0])]))
sorted([sorted([1, 1, 1, 1, 0, 0, 1]),
sorted([0, 0, 0, 0, 1, 1, 0])]))

0 comments on commit 5903965

Please sign in to comment.