Skip to content

Commit

Permalink
ttools: add HEALPix presorting to CDS upload skymatch
Browse files Browse the repository at this point in the history
Sorting the rows of an uploaded table by HEALPix pixel index for a
multi-block upload match job should in theory improve performance.
Add a parameter presort to the cdsskymatch task to turn this on.
There are some disadvantages though, so set it false by default.

It also makes it harder to do progress monitoring, so for now do
not provide this option in topcat.  For the topact xmatch progress
bar to work with this option would require (I think) a new optional
method getRowCount on ConeQuerySequenceFactory.
  • Loading branch information
mbtaylor authored and mmpcn committed Nov 27, 2014
1 parent e0f3a20 commit 2b8a2d5
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 6 deletions.
14 changes: 14 additions & 0 deletions table/src/main/uk/ac/starlink/table/Tables.java
Expand Up @@ -394,6 +394,20 @@ else if ( ival > Integer.MAX_VALUE ) {
}
}

/**
* Casts a long to an int, with an assertion that no truncation occurs.
*
* @param lval long value, asserted to be in the range
* Integer.MIN_VALUE..Integer.MAX_VALUE
* @return truncated version of <code>lval</code>
*/
public static int assertLongToInt( long lval ) {
int ival = (int) lval;
assert ival == lval
: "Long value " + lval + " unexpectedly out of int range";
return ival;
}

/**
* Returns an array of strings suitable as labels or label suffixes
* for elements of an array as returned by {@link ValueInfo#getShape}.
Expand Down
@@ -0,0 +1,109 @@
package uk.ac.starlink.ttools.cone;

import java.io.IOException;
import java.util.Arrays;
import java.util.logging.Logger;
import uk.ac.starlink.table.RowPermutedStarTable;
import uk.ac.starlink.table.StarTable;
import uk.ac.starlink.table.Tables;

/**
* QuerySequenceFactory that presorts rows according to HEALPix pixel index.
*
* @author Mark Taylor
* @since 3 Jul 2014
*/
public class HealpixSortedQuerySequenceFactory implements QuerySequenceFactory {

private final QuerySequenceFactory baseFact_;
private final PixtoolsHealpix hpix_;
private static final Logger logger_ =
Logger.getLogger( "uk.ac.starlink.ttools.cone" );

/** The largest order for which Healpix IDs can fit into an int is 13(?). */
private static final int ORDER = 12;

/**
* Constructor.
*
* @param baseFact query sequence factory on which this one is based
*/
public HealpixSortedQuerySequenceFactory( QuerySequenceFactory baseFact ) {
baseFact_ = baseFact;
hpix_ = PixtoolsHealpix.getInstance();
}

public ConeQueryRowSequence createQuerySequence( StarTable table )
throws IOException {
if ( ! table.isRandom() ) {
throw new IllegalArgumentException( "Non-random-access table" );
}
logger_.info( "Pre-sorting rows by HEALPix index, order " + ORDER );

/* First store the HEALPix pixel index for each row alongside
* a record of the row index in the underlying table to which it
* corresponds. We pack these into a single long (32 bits for each).
* 32 bits each is enough: for the HEALPix index because we know
* the order, and for the table because we bail out if it has more
* than 2^31 rows. */
int nrow = Tables.checkedLongToInt( table.getRowCount() );
long[] codes = new long[ nrow ];
ConeQueryRowSequence preSeq = baseFact_.createQuerySequence( table );
int irow = 0;
while ( preSeq.next() ) {
double ra = preSeq.getRa();
double dec = preSeq.getDec();
long lndex = preSeq.getIndex();
int hpixIndex = Tables
.assertLongToInt( hpix_.ang2pix( ORDER, ra, dec ) );
int rowIndex = Tables.assertLongToInt( lndex );
codes[ irow ] = packHpixRow( hpixIndex, rowIndex );
irow++;
}
preSeq.close();

/* Sort them. Since the most significant 32 bits is composed of the
* HEALPix index, they come out in the right order. The other end
* of the long is unimportant. */
logger_.config( "Sorting " + irow + " HEALPix indices" );
Arrays.sort( codes, 0, irow );

/* Turn the array of packed longs into an array of row indices,
* by discarding the HEALPix index part, no longer needed. */
final long[] rowMap = new long[ irow ];
for ( int ir = 0; ir < irow; ir++ ) {
rowMap[ ir ] = unpackRowIndex( codes[ ir ] );
}

/* Use this row index mapping array to provide a query sequence
* with an adjusted row ordering. */
StarTable sortedTable = new RowPermutedStarTable( table, rowMap );
return new WrapperQuerySequence( baseFact_
.createQuerySequence( sortedTable ) ) {
@Override
public long getIndex() throws IOException {
return rowMap[ Tables.assertLongToInt( super.getIndex() ) ];
}
};
}

/**
* Packs two integers into a long. The healpix part goes in the
* most significant bits.
*
* @param healpixIndex healpix pixel identifier
* @param rowIndex index in the underlying table of the row
*/
private static long packHpixRow( int healpixIndex, int rowIndex ) {
return ( (long) healpixIndex << 32 ) | (long) rowIndex;
}

/**
* Returns the row index from a packed long.
*
* @return value previously packed as <code>rowIndex</code>
*/
private static int unpackRowIndex( long packed ) {
return (int) packed;
}
}
46 changes: 40 additions & 6 deletions ttools/src/main/uk/ac/starlink/ttools/task/CdsUploadSkyMatch.java
Expand Up @@ -23,6 +23,7 @@
import uk.ac.starlink.ttools.cone.CdsUploadMatcher;
import uk.ac.starlink.ttools.cone.Coverage;
import uk.ac.starlink.ttools.cone.CoverageQuerySequenceFactory;
import uk.ac.starlink.ttools.cone.HealpixSortedQuerySequenceFactory;
import uk.ac.starlink.ttools.cone.JELQuerySequenceFactory;
import uk.ac.starlink.ttools.cone.QuerySequenceFactory;
import uk.ac.starlink.ttools.cone.ServiceFindMode;
Expand All @@ -46,6 +47,7 @@ public class CdsUploadSkyMatch extends SingleMapperTask {
private final IntegerParameter maxrecParam_;
private final URLParameter urlParam_;
private final BooleanParameter usemocParam_;
private final BooleanParameter presortParam_;
private final JoinFixActionParameter fixcolsParam_;
private final Parameter insuffixParam_;
private final Parameter cdssuffixParam_;
Expand Down Expand Up @@ -201,6 +203,34 @@ public CdsUploadSkyMatch() {
usemocParam_.setDefault( Boolean.TRUE.toString() );
paramList.add( usemocParam_ );

presortParam_ = new BooleanParameter( "presort" );
presortParam_.setPrompt( "Pre-sort rows before uploading?" );
presortParam_.setDescription( new String[] {
"<p>If true, the rows are sorted by HEALPix index before",
"they are uploaded to the CDS X-Match service.",
"If the match is done in multiple blocks,",
"this may improve efficiency,",
"since when matching against a large remote catalogue",
"the X-Match service likes to process requests",
"in which sources are grouped into a small region",
"rather than scattered all over the sky.",
"</p>",
"<p>Note this will have a couple of other side effects that may",
"be undesirable:",
"it will read all the input rows into the task at once,",
"which may make it harder to assess progress,",
"and it will affect the order of the rows in the output table.",
"</p>",
"<p>It is <em>probably</em> only worth setting true for rather",
"large (multi-million-row?) multi-block matches,",
"where both local and remote catalogues are spread over",
"a significant fraction of the sky.",
"But feel free to experiment",
"</p>",
} );
presortParam_.setDefault( Boolean.FALSE.toString() );
paramList.add( presortParam_ );

fixcolsParam_ = new JoinFixActionParameter( "fixcols" );
insuffixParam_ =
fixcolsParam_.createSuffixParameter( "suffixin",
Expand Down Expand Up @@ -230,7 +260,7 @@ public TableProducer createProducer( Environment env )
"Bad value " + cdsName );
}
double srDeg = sr / 3600.;
final QuerySequenceFactory qsFact =
final QuerySequenceFactory qsFact0 =
new JELQuerySequenceFactory( raString, decString,
Double.toString( srDeg ) );
UserFindMode userMode = findParam_.objectValue( env );
Expand All @@ -242,6 +272,7 @@ public TableProducer createProducer( Environment env )
final Coverage coverage = usemocParam_.booleanValue( env )
? UrlMocCoverage.getVizierMoc( cdsName, -1 )
: null;
final boolean presort = presortParam_.booleanValue( env );
UploadMatcher umatcher =
new CdsUploadMatcher( url, cdsId, sr, serviceMode );
String tableName = "xmatch(" + cdsIdToTableName( cdsId ) + ")";
Expand Down Expand Up @@ -277,11 +308,14 @@ public StarTable getTable() throws IOException, TaskException {
else {
cov = null;
}
QuerySequenceFactory qsFact2 =
cov == null
? qsFact
: new CoverageQuerySequenceFactory( qsFact, cov );
return blocker.runMatch( inTable, qsFact2, storage );
QuerySequenceFactory qsFact1 = qsFact0;
if ( cov != null ) {
qsFact1 = new CoverageQuerySequenceFactory( qsFact1, cov );
}
if ( presort ) {
qsFact1 = new HealpixSortedQuerySequenceFactory( qsFact1 );
}
return blocker.runMatch( inTable, qsFact1, storage );
}
};
}
Expand Down

0 comments on commit 2b8a2d5

Please sign in to comment.