Permalink
Browse files

more helpful error message when user tries to index an empty corpus

  • Loading branch information...
piskvorky committed Oct 30, 2012
1 parent 9477067 commit f1a87e092fef790e7417a779fcb983308af94c54
Showing with 3 additions and 1 deletion.
  1. +3 −1 gensim/similarities/docsim.py
@@ -485,7 +485,7 @@ class for description of the other parameters.
"""
if num_features is None:
- logger.info("scanning corpus to determine the number of features")
+ logger.warning("scanning corpus to determine the number of features (consider setting `num_features` explicitly)")
num_features = 1 + utils.get_max_id(corpus)
self.num_features = num_features
@@ -494,6 +494,8 @@ class for description of the other parameters.
self.chunksize = chunksize
if corpus is not None:
+ if self.num_features <= 0:
+ raise ValueError("cannot index a corpus with zero features (you must specify either `num_features` or a non-empty corpus in the constructor)")
logger.info("creating matrix for %s documents and %i features" %
(len(corpus), num_features))
self.index = numpy.empty(shape=(len(corpus), num_features), dtype=dtype)

0 comments on commit f1a87e0

Please sign in to comment.