elastic · mikemccand · Apr 23, 2014 · Apr 24, 2014 · May 4, 2014 · May 4, 2014
diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java
@@ -912,7 +912,7 @@ public final Set<IntsRef> toFiniteStrings(final TokenStreamToAutomaton ts2a, Tok
       // TODO: we could walk & add simultaneously, so we
       // don't have to alloc [possibly biggish]
       // intermediate HashSet in RAM:
-      return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
+      return XSpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
   }
 
   final Automaton toLookupAutomaton(final CharSequence key) throws IOException {

diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java
@@ -219,7 +219,7 @@ public TokenStreamToAutomaton getTokenStreamToAutomaton() {
     }
 
     Automaton toLevenshteinAutomata(Automaton automaton) {
-        final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
+        final Set<IntsRef> ref = XSpecialOperations.getFiniteStrings(automaton, -1);
         Automaton subs[] = new Automaton[ref.size()];
         int upto = 0;
         for (IntsRef path : ref) {

diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XSpecialOperations.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XSpecialOperations.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search.suggest.analyzing;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.Set;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.State;
+import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.fst.Util;
+import org.elasticsearch.Version;
+
+class XSpecialOperations {
+
+  // TODO Lucene 4.9: remove this once we upgrade; see
+  // LUCENE-5628
+
+  static {
+    assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_48: "Remove this code once we upgrade to Lucene 4.9 where LUCENE-5628 is fixed";
+  }
+
+  private static class PathNode {
+
+    /** Which state the path node ends on, whose
+     *  transitions we are enumerating. */
+    public State state;
+
+    /** Which state the current transition leads to. */
+    public State to;
+
+    /** Which transition we are on. */
+    public int transition;
+
+    /** Which label we are on, in the min-max range of the
+     *  current Transition */
+    public int label;
+
+    public void resetState(State state) {
+      assert state.numTransitions() != 0;
+      this.state = state;
+      transition = 0;
+      Transition t = state.transitionsArray[transition];
+      label = t.getMin();
+      to = t.getDest();
+    }
+
+    /** Returns next label of current transition, or
+     *  advances to next transition and returns its first
+     *  label, if current one is exhausted.  If there are
+     *  no more transitions, returns -1. */
+    public int nextLabel() {
+      if (label > state.transitionsArray[transition].getMax()) {
+        // We've exhaused the current transition's labels;
+        // move to next transitions:
+        transition++;
+        if (transition >= state.numTransitions()) {
+          // We're done iterating transitions leaving this state
+          return -1;
+        }
+        Transition t = state.transitionsArray[transition];
+        label = t.getMin();
+        to = t.getDest();
+      }
+      return label++;
+    }
+  }
+
+  private static PathNode getNode(PathNode[] nodes, int index) {
+    assert index < nodes.length;
+    if (nodes[index] == null) {
+      nodes[index] = new PathNode();
+    }
+    return nodes[index];
+  }
+
+  // TODO: this is a dangerous method ... Automaton could be
+  // huge ... and it's better in general for caller to
+  // enumerate & process in a single walk:
+
+  /** Returns the set of accepted strings, up to at most
+   *  <code>limit</code> strings. If more than <code>limit</code> 
+   *  strings are accepted, the first limit strings found are returned. If <code>limit</code> == -1, then 
+   *  the limit is infinite.  If the {@link Automaton} has
+   *  cycles then this method might throw {@code
+   *  IllegalArgumentException} but that is not guaranteed
+   *  when the limit is set. */
+  public static Set<IntsRef> getFiniteStrings(Automaton a, int limit) {
+    Set<IntsRef> results = new HashSet<>();
+
+    if (limit == -1 || limit > 0) {
+      // OK
+    } else {
+      throw new IllegalArgumentException("limit must be -1 (which means no limit), or > 0; got: " + limit);
+    }
+
+    if (a.getSingleton() != null) {
+      // Easy case: automaton accepts only 1 string
+      results.add(Util.toUTF32(a.getSingleton(), new IntsRef()));
+    } else {
+
+      if (a.getInitialState().isAccept()) {
+        // Special case the empty string, as usual:
+        results.add(new IntsRef());
+      }
+
+      if (a.getInitialState().numTransitions() > 0 && (limit == -1 || results.size() < limit)) {
+
+        // TODO: we could use state numbers here and just
+        // alloc array, but asking for states array can be
+        // costly (it's lazily computed):
+
+        // Tracks which states are in the current path, for
+        // cycle detection:
+        Set<State> pathStates = Collections.newSetFromMap(new IdentityHashMap<State,Boolean>());
+
+        // Stack to hold our current state in the
+        // recursion/iteration:
+        PathNode[] nodes = new PathNode[4];
+
+        pathStates.add(a.getInitialState());
+        PathNode root = getNode(nodes, 0);
+        root.resetState(a.getInitialState());
+
+        IntsRef string = new IntsRef(1);
+        string.length = 1;
+
+        while (string.length > 0) {
+
+          PathNode node = nodes[string.length-1];
+
+          // Get next label leaving the current node:
+          int label = node.nextLabel();
+
+          if (label != -1) {
+            string.ints[string.length-1] = label;
+
+            if (node.to.isAccept()) {
+              // This transition leads to an accept state,
+              // so we save the current string:
+              results.add(IntsRef.deepCopyOf(string));
+              if (results.size() == limit) {
+                break;
+              }
+            }
+
+            if (node.to.numTransitions() != 0) {
+              // Now recurse: the destination of this transition has
+              // outgoing transitions:
+              if (pathStates.contains(node.to)) {
+                throw new IllegalArgumentException("automaton has cycles");
+              }
+              pathStates.add(node.to);
+
+              // Push node onto stack:
+              if (nodes.length == string.length) {
+                PathNode[] newNodes = new PathNode[ArrayUtil.oversize(nodes.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+                System.arraycopy(nodes, 0, newNodes, 0, nodes.length);
+                nodes = newNodes;
+              }
+              getNode(nodes, string.length).resetState(node.to);
+              string.length++;
+              string.grow(string.length);
+            }
+          } else {
+            // No more transitions leaving this state,
+            // pop/return back to previous state:
+            assert pathStates.contains(node.state);
+            pathStates.remove(node.state);
+            string.length--;
+          }
+        }
+      }
+    }
+
+    return results;
+  }
+}
diff --git a/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java b/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java
@@ -126,13 +126,6 @@ public void testDuellCompletions() throws IOException, NoSuchFieldException, Sec
         final boolean usePayloads = getRandom().nextBoolean();
         final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0;
 
-        // NOTE: remove once we fix getFiniteStrings to not
-        // recurse; this is just a stopgap to mute the test: 
-        // This test fails on Java8, I think because that
-        // version allocates less stack in the Jenkins envs
-        // where we run tests
-        assumeFalse(Constants.JRE_IS_MINIMUM_JAVA8);
-
         XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(TEST_VERSION_CURRENT), null, new StandardAnalyzer(
                 TEST_VERSION_CURRENT), options, 256, -1, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
         LineFileDocs docs = new LineFileDocs(getRandom());