<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array">
    <added>
      <filename>src/core/org/apache/hadoop/io/BloomMapFile.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/bloom/BloomFilter.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/bloom/CountingBloomFilter.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/bloom/DynamicBloomFilter.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/bloom/Filter.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/bloom/HashFunction.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/bloom/Key.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/bloom/RemoveScheme.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/bloom/RetouchedBloomFilter.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/hash/Hash.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/hash/JenkinsHash.java</filename>
    </added>
    <added>
      <filename>src/core/org/apache/hadoop/util/hash/MurmurHash.java</filename>
    </added>
    <added>
      <filename>src/test/org/apache/hadoop/io/TestBloomMapFile.java</filename>
    </added>
  </added>
  <modified type="array">
    <modified>
      <diff>@@ -86,6 +86,9 @@ Trunk (unreleased changes)
 
     HADOOP-4826. Introduce admin command saveNamespace. (shv)
 
+    HADOOP-3063  BloomMapFile - fail-fast version of MapFile for sparsely
+                 populated key space (Andrzej Bialecki via stack)
+
   IMPROVEMENTS
 
     HADOOP-4749. Added a new counter REDUCE_INPUT_BYTES. (Yongqiang He via </diff>
      <filename>CHANGES.txt</filename>
    </modified>
    <modified>
      <diff>@@ -200,3 +200,45 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+
+APACHE HADOOP SUBCOMPONENTS:
+
+The Apache Hadoop project contains subcomponents with separate copyright
+notices and license terms. Your use of the source code for the these
+subcomponents is subject to the terms and conditions of the following
+licenses. 
+
+For the org.apache.hadoop.util.bloom.* classes:
+
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract
+ * 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * &quot;AS IS&quot; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */</diff>
      <filename>LICENSE.txt</filename>
    </modified>
    <modified>
      <diff>@@ -1264,6 +1264,34 @@ creations/deletions), or &quot;all&quot;.&lt;/description&gt;
 &lt;/property&gt;
 
 &lt;property&gt;
+  &lt;name&gt;io.mapfile.bloom.size&lt;/name&gt;
+  &lt;value&gt;1048576&lt;/value&gt;
+  &lt;description&gt;The size of BloomFilter-s used in BloomMapFile. Each time this many
+  keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter).
+  Larger values minimize the number of filters, which slightly increases the performance,
+  but may waste too much space if the total number of keys is usually much smaller
+  than this number.
+  &lt;/description&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;io.mapfile.bloom.error.rate&lt;/name&gt;
+  &lt;value&gt;0.005&lt;/value&gt;
+  &lt;description&gt;The rate of false positives in BloomFilter-s used in BloomMapFile.
+  As this value decreases, the size of BloomFilter-s increases exponentially. This
+  value is the probability of encountering false positives (default is 0.5%).
+  &lt;/description&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;hadoop.util.hash.type&lt;/name&gt;
+  &lt;value&gt;murmur&lt;/value&gt;
+  &lt;description&gt;The default implementation of Hash. Currently this can take one of the
+  two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash.
+  &lt;/description&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
   &lt;name&gt;map.sort.class&lt;/name&gt;
   &lt;value&gt;org.apache.hadoop.util.QuickSort&lt;/value&gt;
   &lt;description&gt;The default sort class for sorting keys.</diff>
      <filename>conf/hadoop-default.xml</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>450f6f26bb0de62cae8f189fbda09d1a9a036edf</id>
    </parent>
  </parents>
  <author>
    <name>Michael Stack</name>
    <email>stack@apache.org</email>
  </author>
  <url>http://github.com/yahoo/hadoop/commit/88f57aff394d5a3ee4054469b1647d782586e246</url>
  <id>88f57aff394d5a3ee4054469b1647d782586e246</id>
  <committed-date>2008-12-15T12:57:34-08:00</committed-date>
  <authored-date>2008-12-15T12:57:34-08:00</authored-date>
  <message>HBASE-3063 BloomMapFile - fail-fast version of MapFile for sparsely populated key space

git-svn-id: https://svn.eu.apache.org/repos/asf/hadoop/core/trunk@726797 13f79535-47bb-0310-9956-ffa450edef68</message>
  <tree>0c641c493cdb3ccac638a62bf9555bde57741f3d</tree>
  <committer>
    <name>Michael Stack</name>
    <email>stack@apache.org</email>
  </committer>
</commit>
