Skip to content

Commit

Permalink
Faster simplification for strings
Browse files Browse the repository at this point in the history
Two important optimisations:

  1. Perform a binary search to chop down lists before doing individual
     simplifications.
  2. Don't try literally every single unicode character smaller than the
     current one to see if it works.
  • Loading branch information
DRMacIver committed Mar 21, 2015
1 parent b452a56 commit b3c406a
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
5 changes: 5 additions & 0 deletions src/hypothesis/searchstrategy/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ def simplify(self, x):

yield ()

if len(x) > 1:
mid = len(x) // 2
yield x[:mid]
yield x[mid:]

for i in hrange(0, len(x)):
if len(x) > 1:
y = list(x)
Expand Down
17 changes: 10 additions & 7 deletions src/hypothesis/searchstrategy/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
binary_type
from hypothesis.searchstrategy.strategies import BadData, SearchStrategy, \
MappedSearchStrategy, strategy, check_type, check_data_type
import hypothesis.internal.distributions as dist


class OneCharStringStrategy(SearchStrategy):
Expand All @@ -35,14 +36,16 @@ class OneCharStringStrategy(SearchStrategy):
)

def produce_parameter(self, random):
return random.random()
alphabet_size = 1 + dist.geometric(random, 0.1)
alphabet = []
while len(alphabet) < alphabet_size:
char = hunichr(random.randint(0, sys.maxunicode))
if unicodedata.category(char) != 'Cs':
alphabet.append(char)
return tuple(alphabet)

def produce_template(self, context, p):
random = context.random
while True:
result = hunichr(random.randint(0, sys.maxunicode))
if unicodedata.category(result) != 'Cs':
return result
return context.random.choice(p)

def simplify(self, x):
if x in self.ascii_characters:
Expand All @@ -53,7 +56,7 @@ def simplify(self, x):
for c in reversed(self.ascii_characters):
yield text_type(c)
yield hunichr(o // 2)
for t in hrange(o - 1, -1, -1):
for t in hrange(o - 1, max(o - 100, -1), -1):
yield hunichr(t)


Expand Down

0 comments on commit b3c406a

Please sign in to comment.