From f6fd3773b5785a94fe2595dbb6d69278f9524c90 Mon Sep 17 00:00:00 2001 From: Dmytro Ivchenko Date: Mon, 25 Feb 2013 11:13:04 -0800 Subject: [PATCH] Added support for *RangeShift operations in BigNestedIntArray --- .../bobo/api/BoboIndexReader.java | 27 ++-- .../impl/DefaultFacetCountCollector.java | 83 ++--------- .../bobo/util/BigNestedIntArray.java | 131 ++++++++++++++++++ 3 files changed, 160 insertions(+), 81 deletions(-) diff --git a/bobo-browse/src/main/java/com/browseengine/bobo/api/BoboIndexReader.java b/bobo-browse/src/main/java/com/browseengine/bobo/api/BoboIndexReader.java index d556a945..ea35aaef 100644 --- a/bobo-browse/src/main/java/com/browseengine/bobo/api/BoboIndexReader.java +++ b/bobo-browse/src/main/java/com/browseengine/bobo/api/BoboIndexReader.java @@ -567,6 +567,17 @@ protected BoboIndexReader(IndexReader reader, boolean useSubReaders) throws IOException { super(useSubReaders ? new MultiReader(createSubReaders(reader, workArea), false) : reader); + _runtimeFacetHandlerFactories = facetHandlerFactories; + _runtimeFacetHandlerFactoryMap = new HashMap>(); + if (_runtimeFacetHandlerFactories!=null) + { + for(RuntimeFacetHandlerFactory factory : _runtimeFacetHandlerFactories) + { + _runtimeFacetHandlerFactoryMap.put(factory.getName(), factory); + } + } + _facetHandlers = facetHandlers; + _workArea = workArea; if(useSubReaders) { _dir = reader.directory(); @@ -581,23 +592,17 @@ protected BoboIndexReader(IndexReader reader, { _subReaders[i]._dir = _dir; if(facetHandlers != null) _subReaders[i].setFacetHandlers(facetHandlers); + if (facetHandlerFactories != null) + { + _subReaders[i]._runtimeFacetHandlerFactories = _runtimeFacetHandlerFactories; + _subReaders[i]._runtimeFacetHandlerFactoryMap = _runtimeFacetHandlerFactoryMap; + } _starts[i] = maxDoc; maxDoc += _subReaders[i].maxDoc(); } _starts[_subReaders.length] = maxDoc; } } - _runtimeFacetHandlerFactories = facetHandlerFactories; - _runtimeFacetHandlerFactoryMap = new HashMap>(); - if (_runtimeFacetHandlerFactories!=null) - { - for(RuntimeFacetHandlerFactory factory : _runtimeFacetHandlerFactories) - { - _runtimeFacetHandlerFactoryMap.put(factory.getName(), factory); - } - } - _facetHandlers = facetHandlers; - _workArea = workArea; } protected void facetInit() throws IOException diff --git a/bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFacetCountCollector.java b/bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFacetCountCollector.java index e8afe0b3..398093c7 100644 --- a/bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFacetCountCollector.java +++ b/bobo-browse/src/main/java/com/browseengine/bobo/facets/impl/DefaultFacetCountCollector.java @@ -19,17 +19,6 @@ package com.browseengine.bobo.facets.impl; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; - -import javax.management.MBeanServer; -import javax.management.ObjectName; -import javax.management.StandardMBean; - -import org.apache.log4j.Logger; - import com.browseengine.bobo.api.BrowseFacet; import com.browseengine.bobo.api.BrowseSelection; import com.browseengine.bobo.api.ComparatorFactory; @@ -45,62 +34,30 @@ import com.browseengine.bobo.facets.data.TermLongList; import com.browseengine.bobo.facets.data.TermShortList; import com.browseengine.bobo.facets.data.TermValueList; -import com.browseengine.bobo.jmx.JMXUtil; import com.browseengine.bobo.util.BigSegmentedArray; import com.browseengine.bobo.util.IntBoundedPriorityQueue; import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator; import com.browseengine.bobo.util.LazyBigIntArray; -import com.browseengine.bobo.util.MemoryManager; -import com.browseengine.bobo.util.MemoryManagerAdminMBean; +import org.apache.log4j.Logger; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; public abstract class DefaultFacetCountCollector implements FacetCountCollector { private static final Logger log = Logger.getLogger(DefaultFacetCountCollector.class.getName()); - protected final FacetSpec _ospec; + public BigSegmentedArray _count; - public int _countlength; - protected FacetDataCache _dataCache; - private final String _name; + + protected final FacetSpec _ospec; + protected final FacetDataCache _dataCache; protected final BrowseSelection _sel; protected final BigSegmentedArray _array; - private int _docBase; - protected final LinkedList intarraylist = new LinkedList(); - private Iterator _iterator; - private boolean _closed = false; - protected static MemoryManager intarraymgr = new MemoryManager(new MemoryManager.Initializer() - { - public void init(BigSegmentedArray buf) - { - buf.fill(0); - } - - public BigSegmentedArray newInstance(int size) - { - return new LazyBigIntArray(size); - } - - public int size(BigSegmentedArray buf) - { - assert buf != null; - return buf.size(); - } - - }); - - static{ - try{ - // register memory manager mbean - MBeanServer mbeanServer = java.lang.management.ManagementFactory.getPlatformMBeanServer(); - ObjectName mbeanName = new ObjectName(JMXUtil.JMX_DOMAIN,"name","DefaultFacetCountCollector-MemoryManager"); - StandardMBean mbean = new StandardMBean(intarraymgr.getAdminMBean(), MemoryManagerAdminMBean.class); - mbeanServer.registerMBean(mbean, mbeanName); - } - catch(Exception e){ - log.error(e.getMessage(),e); - } - } + private final String _name; + private boolean _closed = false; public DefaultFacetCountCollector(String name,FacetDataCache dataCache,int docBase, BrowseSelection sel,FacetSpec ospec) @@ -109,19 +66,9 @@ public DefaultFacetCountCollector(String name,FacetDataCache dataCache,int docBa _ospec = ospec; _name = name; _dataCache=dataCache; - _countlength = _dataCache.freqs.length; - - if (_dataCache.freqs.length <= 3096) - { - _count = new LazyBigIntArray(_countlength); - } else - { - _count = intarraymgr.get(_countlength); - intarraylist.add(_count); - } - + _countlength = _dataCache.valArray.size(); + _count = new LazyBigIntArray(_countlength); _array = _dataCache.orderArray; - _docBase = docBase; } public String getName() @@ -270,10 +217,6 @@ public void close() return; } _closed = true; - while(!intarraylist.isEmpty()) - { - intarraymgr.release(intarraylist.poll()); - } } /** diff --git a/bobo-browse/src/main/java/com/browseengine/bobo/util/BigNestedIntArray.java b/bobo-browse/src/main/java/com/browseengine/bobo/util/BigNestedIntArray.java index a6fbb869..d166d162 100644 --- a/bobo-browse/src/main/java/com/browseengine/bobo/util/BigNestedIntArray.java +++ b/bobo-browse/src/main/java/com/browseengine/bobo/util/BigNestedIntArray.java @@ -679,6 +679,39 @@ else if(val != MISSING) } return false; } + + public final boolean containsValueInRangeShift(int id, int value, int startValue, int endValue, int shift, int mask) + { + final int[] page = _list[id >> PAGEID_SHIFT]; + if(page == null) return false; + + final int val = page[id & SLOTID_MASK]; + if (val >= 0) + { + int shiftedVal = val >> shift; + if(shiftedVal >= startValue && shiftedVal < endValue) + { + if(value < 0 || (val & mask) == value) + return true; + } + } + else if(val != MISSING) + { + int idx = - (val >> VALIDX_SHIFT);// signed shift, remember this is a negative number + int end = idx + (val & COUNT_MASK); + while(idx < end) + { + int shiftedVal = val >> shift; + if(shiftedVal >= startValue && shiftedVal < endValue) + { + if(value < 0 || (val & mask) == value) + return true; + } + idx++; + } + } + return false; + } public final boolean contains(int id, OpenBitSet values) { @@ -861,6 +894,54 @@ else if(val != MISSING) return DocIdSetIterator.NO_MORE_DOCS; } + + public final int findValueInRangeShift(int startValue, int endValue, int value, int shift, int mask, int id, int maxID) + { + int[] page = _list[id >> PAGEID_SHIFT]; + if(page == null) + page = MISSING_PAGE; + + while(true) + { + int val = page[id & SLOTID_MASK]; + if (val >= 0) + { + int shiftedVal = val >> shift; + if(shiftedVal >= startValue && shiftedVal < endValue) + { + if(value < 0 || (val & mask) == value) + return id; + } + } + else if(val != MISSING) + { + int idx = - (val >> VALIDX_SHIFT);// signed shift, remember this is a negative number + int end = idx + (val & COUNT_MASK); + while(idx < end) + { + val = page[idx++]; + int shiftedVal = val >> shift; + if(shiftedVal >= startValue && shiftedVal < endValue) + { + if(value < 0 || (val & mask) == value) + return id; + } + } + } + if(id >= maxID) + break; + + if((++id & SLOTID_MASK) == 0) + { + page = _list[id >> PAGEID_SHIFT]; + if(page == null) + page = MISSING_PAGE; + } + } + + return DocIdSetIterator.NO_MORE_DOCS; + } + public final int count(final int id, final int[] count) { final int[] page = _list[id >> PAGEID_SHIFT]; @@ -1058,6 +1139,56 @@ else if(val != MISSING) return; } + public final void countNoReturnWithRangeShift(final int id, final BigSegmentedArray count, + int startVal, int endVal, int mask, int shift) + { + final int[] page = _list[id >> PAGEID_SHIFT]; + if(page == null) { + count.add(0, count.get(0) + 1); + return; + } + + int val = page[id & SLOTID_MASK]; + if(val >= 0) + { + int shiftedVal = val >> shift; + if (shiftedVal >= startVal && shiftedVal < endVal) + { + val = val & mask; + count.add(val, count.get(val) + 1); + } + return; + } + else if(val != MISSING) + { + int idx = - (val >> VALIDX_SHIFT); // signed shift, remember val is a negative number + int cnt = (val & COUNT_MASK); + int end = idx + cnt; + + if (idx < end) + { + int prev = -1; + while(idx < end) + { + int value = page[idx++]; + int shiftedVal = value >> shift; + if (shiftedVal >= startVal && shiftedVal < endVal) + { + value = value & mask; + if (prev != -1 && value != prev) + count.add(prev, count.get(prev) + 1); + prev = value; + } + } + if (prev != -1) + count.add(prev, count.get(prev) + 1); + } + return; + } + count.add(0, count.get(0) + 1); + return; + } + /** * returns the number data items for id * @param id