Merge remote-tracking branch 'remotes/asf/trunk'

Heliosearch · Jan 24, 2014 · 9630709 · 9630709
2 parents 67ab571 + f66f5fd
commit 9630709
Show file tree

Hide file tree

Showing 67 changed files with 1,198 additions and 494 deletions.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -57,6 +57,8 @@ API Changes
 * LUCENE-5388: Remove Reader from Tokenizer's constructor. 
   (Benson Margulies via Robert Muir - pull request #16)
 
+* LUCENE-5405: Make ShingleAnalzyerWrapper.getWrappedAnalyzer() public final (gsingers)
+
 Documentation
 
 * LUCENE-5392: Add/improve analysis package documentation to reflect
@@ -102,6 +104,10 @@ New Features
   using SortField.setMissingValue(SortField.STRING_FIRST), or last,
   using SortField.setMissingValue(SortField.STRING_LAST). (Rob Muir,
   Mike McCandless)
+
+* LUCENE-5099: QueryNode should have the ability to detach from its node
+  parent. Added QueryNode.removeFromParent() that allows nodes to be 
+  detached from its parent node. (Adriano Crestani)
 
 
 Build
@@ -137,6 +143,9 @@ Build
 
 * LUCENE-5383: fix changes2html to link pull requests (Steve Rowe)
 
+* LUCENE-5411: Upgrade to released JFlex 1.5.0; stop requiring
+  a locally built JFlex snapshot jar. (Steve Rowe)
+
 Bug fixes
 
 * LUCENE-5285: Improved highlighting of multi-valued fields with
@@ -173,6 +182,10 @@ Bug fixes
   preventing wrong term positions for fields that use
   StringTokenStream. (Michael Busch)
 
+* LUCENE-5377: IndexWriter.addIndexes(Directory[]) would cause corruption
+  on Lucene 4.6 if any index segments were Lucene 4.0-4.5.
+  (Littlestar, Mike McCandless, Shai Erera, Robert Muir)
+
 API Changes
 
 * LUCENE-5339: The facet module was simplified/reworked to make the

diff --git a/lucene/analysis/common/build.xml b/lucene/analysis/common/build.xml
@@ -29,7 +29,7 @@
 
   <import file="../analysis-module-build.xml"/>
 
-  <target name="jflex" depends="jflex-check,clean-jflex,-gen-uax29-supp-macros,
+  <target name="jflex" depends="-install-jflex,clean-jflex,-gen-uax29-supp-macros,
                                 -jflex-StandardAnalyzer,-jflex-UAX29URLEmailTokenizer,
                                 -jflex-wiki-tokenizer,-jflex-HTMLStripCharFilter"/>
 
@@ -40,11 +40,7 @@
   </target>
 
   <target name="-jflex-HTMLStripCharFilter"
-          depends="init,jflex-check,generate-jflex-html-char-entities"
-          if="jflex.present">
-    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
-      <classpath refid="jflex.classpath"/>
-    </taskdef>
+          depends="init,generate-jflex-html-char-entities">
     <jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex"
            outdir="src/java/org/apache/lucene/analysis/charfilter"
            nobak="on" inputstreamctor="false"/>
@@ -65,25 +61,16 @@
     <fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/>
   </target>
 
-  <target name="-jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
-    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
-      <classpath refid="jflex.classpath"/>
-    </taskdef>
+  <target name="-jflex-wiki-tokenizer" depends="init,-install-jflex">
     <run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
   </target>
 
-  <target name="-jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
-    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
-			<classpath refid="jflex.classpath"/>
-    </taskdef>
+  <target name="-jflex-StandardAnalyzer" depends="init,-install-jflex">
     <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
     <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
   </target>
 
-  <target name="-jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present">
-    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
-			<classpath refid="jflex.classpath"/>
-    </taskdef>
+  <target name="-jflex-UAX29URLEmailTokenizer" depends="init,-install-jflex">
     <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
   </target>
 

diff --git a/...e/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java b/...e/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapper.java
@@ -138,7 +138,7 @@ public boolean isOutputUnigramsIfNoShingles() {
   }
 
   @Override
-  protected Analyzer getWrappedAnalyzer(String fieldName) {
+  public final Analyzer getWrappedAnalyzer(String fieldName) {
     return delegate;
   }
 

diff --git a/lucene/common-build.xml b/lucene/common-build.xml
@@ -227,17 +227,6 @@
   <property name="m2.repository.id" value="local"/>
   <property name="m2.credentials.prompt" value="true"/>
 
-  <property name="jflex.home" location="${common.dir}"/>
-
-  <path id="jflex.classpath">
-    <fileset dir="${jflex.home}/">
-      <!-- for a JFlex trunk checkout: -->
-      <include name="jflex/target/*.jar"/>
-      <!-- for a JFlex distribution (not yet available): -->
-      <include name="lib/*.jar"/>
-    </fileset>
-  </path>
-
   <property name="backwards.dir" location="backwards"/>
   <property name="build.dir.backwards" location="${build.dir}/backwards"/>
 
@@ -472,31 +461,6 @@
     </sequential>
   </macrodef>
 
-  <target name="jflex-check">
-    <available property="jflex.present" classname="jflex.anttask.JFlexTask">
-      <classpath refid="jflex.classpath"/>
-    </available>
-    <fail unless="jflex.present">&#xA0;
-      ##################################################################
-      JFlex not found.
-      JFlex Home: ${jflex.home}
-
-      Please install the jFlex 1.5 version (currently not released)
-      from its SVN repository:
-
-       svn co -r 722 https://svn.code.sf.net/p/jflex/code/trunk jflex
-       cd jflex
-       mvn install
-
-      Then, create a build.properties file either in your home
-      directory, or within the Lucene directory and set the jflex.home
-      property to the path where the JFlex trunk checkout is located
-      (in the above example it's the directory called "jflex").
-
-      ##################################################################
-    </fail>
-  </target>
-
   <target name="compile-core" depends="init, clover"
           description="Compiles core classes">
     <compile
@@ -2197,7 +2161,15 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
       </scp>
     </sequential>
   </macrodef>
-
+
+  <!-- JFlex task -->
+  <target name="-install-jflex" unless="jflex.loaded" depends="ivy-availability-check,ivy-configure">
+    <ivy:cachepath organisation="de.jflex" module="jflex" revision="1.5.0"
+                   inline="true" conf="default" transitive="true" pathid="jflex.classpath"/>
+    <taskdef name="jflex" classname="jflex.anttask.JFlexTask" classpathref="jflex.classpath"/>
+    <property name="jflex.loaded" value="true"/>
+  </target>
+
   <!-- GROOVY scripting engine for ANT tasks -->
   <target name="resolve-groovy" unless="groovy.loaded" depends="ivy-availability-check,ivy-configure">
     <ivy:cachepath organisation="org.codehaus.groovy" module="groovy-all" revision="2.2.1"

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java
@@ -67,11 +67,12 @@
  * 
  * @see SegmentInfos
  * @lucene.experimental
- * @deprecated Only for reading old 4.0-4.5 segments
+ * @deprecated Only for reading old 4.0-4.5 segments, and supporting IndexWriter.addIndexes
  */
 @Deprecated
 public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
   private final SegmentInfoReader reader = new Lucene40SegmentInfoReader();
+  private final SegmentInfoWriter writer = new Lucene40SegmentInfoWriter();
 
   /** Sole constructor. */
   public Lucene40SegmentInfoFormat() {
@@ -82,9 +83,11 @@ public SegmentInfoReader getSegmentInfoReader() {
     return reader;
   }
 
+  // we must unfortunately support write, to allow addIndexes to write a new .si with rewritten filenames:
+  // see LUCENE-5377
   @Override
   public SegmentInfoWriter getSegmentInfoWriter() {
-    throw new UnsupportedOperationException("this codec can only be used for reading");
+    return writer;
   }
 
   /** File extension used to store {@link SegmentInfo}. */

diff --git a/...s/lucene40/Lucene40SegmentInfoWriter.java → ...s/lucene40/Lucene40SegmentInfoWriter.java b/...s/lucene40/Lucene40SegmentInfoWriter.java → ...s/lucene40/Lucene40SegmentInfoWriter.java
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -1112,7 +1112,11 @@ public synchronized int numDocs() {
   }
 
   /**
-   * Returns true if this index has deletions (including buffered deletions).
+   * Returns true if this index has deletions (including
+   * buffered deletions).  Note that this will return true
+   * if there are buffered Term/Query deletions, even if it
+   * turns out those buffered deletions don't match any
+   * documents.
    */
   public synchronized boolean hasDeletions() {
     ensureOpen();
@@ -2893,12 +2897,15 @@ public final void commit() throws IOException {
     commitInternal();
   }
 
-  /** Returns true if there are changes that have not been
-   *  committed.  Note that if a merge kicked off as a
-   *  result of flushing a new segment during {@link
-   *  #commit}, or a concurrent merged finished,
-   *  this method may return true right after you
-   *  had just called {@link #commit}. */
+  /** Returns true if there may be changes that have not been
+   *  committed.  There are cases where this may return true
+   *  when there are no actual "real" changes to the index,
+   *  for example if you've deleted by Term or Query but
+   *  that Term or Query does not match any documents.
+   *  Also, if a merge kicked off as a result of flushing a
+   *  new segment during {@link #commit}, or a concurrent
+   *  merged finished, this method may return true right
+   *  after you had just called {@link #commit}. */
   public final boolean hasUncommittedChanges() {
     return changeCount != lastCommitChangeCount || docWriter.anyChanges() || bufferedUpdatesStream.any();
   }

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java
@@ -19,12 +19,18 @@
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseDocValuesFormatTestCase;
+import org.junit.BeforeClass;
 
 /**
  * Tests Lucene40DocValuesFormat
  */
 public class TestLucene40DocValuesFormat extends BaseDocValuesFormatTestCase {
   private final Codec codec = new Lucene40RWCodec();
+
+  @BeforeClass
+  public static void beforeClass() {
+    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+  }
 
   @Override
   protected Codec getCodec() {

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsFormat.java
@@ -19,13 +19,19 @@
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BasePostingsFormatTestCase;
+import org.junit.BeforeClass;
 
 /**
  * Tests Lucene40PostingsFormat
  */
 public class TestLucene40PostingsFormat extends BasePostingsFormatTestCase {
   private final Codec codec = new Lucene40RWCodec();
 
+  @BeforeClass
+  public static void beforeClass() {
+    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+  }
+
   @Override
   protected Codec getCodec() {
     return codec;

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsReader.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40PostingsReader.java
@@ -34,6 +34,7 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
+import org.junit.BeforeClass;
 
 public class TestLucene40PostingsReader extends LuceneTestCase {
   static final String terms[] = new String[100];
@@ -42,6 +43,11 @@ public class TestLucene40PostingsReader extends LuceneTestCase {
       terms[i] = Integer.toString(i+1);
     }
   }
+
+  @BeforeClass
+  public static void beforeClass() {
+    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+  }
 
   /** tests terms with different probabilities of being in the document.
    *  depends heavily on term vectors cross-check at checkIndex

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40StoredFieldsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40StoredFieldsFormat.java
@@ -19,9 +19,15 @@
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
+import org.junit.BeforeClass;
 
 public class TestLucene40StoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
 
+  @BeforeClass
+  public static void beforeClass() {
+    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+  }
+
   @Override
   protected Codec getCodec() {
     return new Lucene40RWCodec();

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java
@@ -19,9 +19,15 @@
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseTermVectorsFormatTestCase;
+import org.junit.BeforeClass;
 
 public class TestLucene40TermVectorsFormat extends BaseTermVectorsFormatTestCase {
 
+  @BeforeClass
+  public static void beforeClass() {
+    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+  }
+
   @Override
   protected Codec getCodec() {
     return new Lucene40RWCodec();

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
@@ -37,10 +37,16 @@
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
+import org.junit.BeforeClass;
 
 // TODO: really this should be in BaseTestPF or somewhere else? useful test!
 public class TestReuseDocsEnum extends LuceneTestCase {
 
+  @BeforeClass
+  public static void beforeClass() {
+    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+  }
+
   public void testReuseDocsEnumNoReuse() throws IOException {
     Directory dir = newDirectory();
     Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41StoredFieldsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41StoredFieldsFormat.java
@@ -19,9 +19,15 @@
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
+import org.junit.BeforeClass;
 
 public class TestLucene41StoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
 
+  @BeforeClass
+  public static void beforeClass() {
+    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+  }
+
   @Override
   protected Codec getCodec() {
     return new Lucene41RWCodec();

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java
@@ -19,13 +19,19 @@
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseCompressingDocValuesFormatTestCase;
+import org.junit.BeforeClass;
 
 /**
  * Tests Lucene42DocValuesFormat
  */
 public class TestLucene42DocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
   private final Codec codec = new Lucene42RWCodec();
 
+  @BeforeClass
+  public static void beforeClass() {
+    OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; // explicitly instantiates ancient codec
+  }
+
   @Override
   protected Codec getCodec() {
     return codec;

diff --git a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@@ -213,6 +213,7 @@ private static IndexUpgrader newIndexUpgrader(Directory dir) {
 
   @BeforeClass
   public static void beforeClass() throws Exception {
+    assertFalse("test infra is broken!", LuceneTestCase.OLD_FORMAT_IMPERSONATION_IS_ACTIVE);
     List<String> names = new ArrayList<String>(oldNames.length + oldSingleSegmentNames.length);
     names.addAll(Arrays.asList(oldNames));
     names.addAll(Arrays.asList(oldSingleSegmentNames));