-
Notifications
You must be signed in to change notification settings - Fork 23
/
VotCopyHandler.java
616 lines (561 loc) · 21.9 KB
/
VotCopyHandler.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
package uk.ac.starlink.ttools.copy;
import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.DefaultHandler;
import uk.ac.starlink.table.OnceRowPipe;
import uk.ac.starlink.table.RowStore;
import uk.ac.starlink.table.StarTable;
import uk.ac.starlink.table.StoragePolicy;
import uk.ac.starlink.table.UnrepeatableSequenceException;
import uk.ac.starlink.votable.DataFormat;
import uk.ac.starlink.votable.TableContentHandler;
import uk.ac.starlink.votable.TableHandler;
import uk.ac.starlink.votable.VOSerializer;
import uk.ac.starlink.votable.VOTableVersion;
/**
* SAX content handler which takes SAX events and converts them to
* an output stream in a VOTable-sensitive way. As far as is possible
* given the SAX model, each input SAX event is sent to the output
* unchanged apart from events within a DATA element, which are written
* in one of the VOTable encodings as selected by the user.
*
* <p>One exception to the rule is that, for implementation-specific
* reasons, FIELD elements with <tt>datatype="unsignedByte"</tt> are
* changed to have <tt>datatype="short"</tt> instead.
*
* @author Mark Taylor (Starlink)
* @since 18 Apr 2005
*/
public class VotCopyHandler
implements ContentHandler, LexicalHandler, TableHandler {
private final DataFormat format_;
private final VOTableVersion version_;
private final boolean inline_;
private final boolean squashMagic_;
private final String baseLoc_;
private final boolean strict_;
private final TableContentHandler votParser_;
private final SAXWriter saxWriter_;
private final ContentHandler discardHandler_;
private final HandlerStack handlerStack_;
private final TableHandler tableHandler_;
private ContentHandler handler_;
private BufferedWriter out_;
private Locator locator_;
private int iTable_;
private final static Logger logger_ =
Logger.getLogger( "uk.ac.starlink.ttools" );
/**
* Constructor. The copy can be done in either cached or streamed
* mode, determined by the <tt>cache</tt> parameter.
* In streamed mode, each row encountered in the input SAX stream
* is copied to the output stream as soon as it is encountered.
* In cached mode, the whole table is assembled first, and then
* written out at the end of the input. Streamed mode is more efficient,
* but may not be possible under some circumstances, e.g. for FITS
* output when the number of rows is not known in advance.
* If a streamed copy is attempted when it's not possible,
* it will fail with a
* {@link uk.ac.starlink.table.UnrepeatableSequenceException}
* (wrapped in a SAXException).
*
* @param strict whether to effect strict interpretation of the
* VOTable standard
* @param format encoding type for output DATA elements; may be null
* for DATA-less output
* @param version VOTable standard version for output; may be null for
* unknown or indeterminate, in which case input version
* will be copied as far as possible
* @param inline true for tables written inline, false for tables written
* to an href-referenced stream
* @param squashMagic if true, any VALUES/null attributes are not
* passed through
* @param base base table location; used to construct URIs for
* out-of-line table streams (only used if inline=false)
* @param cache whether tables will be cached prior to writing
* @param policy storage policy for cached tables
*/
public VotCopyHandler( boolean strict, DataFormat format,
VOTableVersion version, boolean inline,
boolean squashMagic,
String base, boolean cache, StoragePolicy policy ) {
if ( ! inline && base == null ) {
throw new IllegalArgumentException( "Must specify base location " +
"for out-of-line tables" );
}
format_ = format;
version_ = version;
inline_ = inline;
squashMagic_ = squashMagic;
baseLoc_ = base;
strict_ = strict;
votParser_ = new TableContentHandler( strict );
votParser_.setReadHrefTables( true );
votParser_.setTableHandler( this );
saxWriter_ = new SAXWriter();
discardHandler_ = new DefaultHandler();
handlerStack_ = new HandlerStack();
handler_ = saxWriter_;
setOutput( new OutputStreamWriter( System.out ) );
/* Set up a handler to which table events will be forwarded. */
if ( format_ == null ) {
tableHandler_ = new EmptyTableHandler();
}
else if ( cache ) {
tableHandler_ = new CacheTableHandler( policy );
}
else {
tableHandler_ = new StreamTableHandler();
}
}
/**
* Sets the output stream for output. By default output is written to
* standard output using the platform's default encoding.
*
* @param out output writer
*/
public void setOutput( Writer out ) {
out_ = new BufferedWriter( out );
saxWriter_.setOutput( out_ );
}
public void startTable( final StarTable meta ) throws SAXException {
assert handler_ == discardHandler_;
tableHandler_.startTable( meta );
}
public void rowData( Object[] row ) throws SAXException {
assert handler_ == discardHandler_;
tableHandler_.rowData( row );
}
public void endTable() throws SAXException {
assert handler_ == discardHandler_;
tableHandler_.endTable();
}
public void setDocumentLocator( Locator locator ) {
locator_ = locator;
votParser_.setDocumentLocator( locator );
saxWriter_.setDocumentLocator( locator );
}
public void startDocument() throws SAXException {
votParser_.startDocument();
saxWriter_.startDocument();
iTable_ = 0;
}
public void endDocument() throws SAXException {
votParser_.endDocument();
saxWriter_.endDocument();
}
public void startElement( String namespaceURI, String localName,
String qName, Attributes atts )
throws SAXException {
votParser_.startElement( namespaceURI, localName, qName, atts );
if ( "VOTABLE".equals( localName ) && version_ != null ) {
AttributesImpl newAtts = new AttributesImpl( atts );
fixAttribute( newAtts, "version", version_.getVersionNumber() );
fixAttribute( newAtts, "xmlns", version_.getXmlNamespace() );
int ixsl = newAtts
.getIndex( "http://www.w3.org/2001/XMLSchema-instance",
"schemaLocation" );
if ( ixsl >= 0 ) {
newAtts.setValue( ixsl, version_.getXmlNamespace() + " "
+ version_.getSchemaLocation() );
}
atts = newAtts;
}
else if ( "DATA".equals( localName ) ) {
handlerStack_.push( handler_ );
handler_ = discardHandler_;
saxWriter_.flush();
}
else if ( "FIELD".equals( localName ) ) {
String datatype = atts.getValue( "datatype" );
/* Unfortunately we have to translate unsignedByte datatypes
* to short ones here. This is because the serializers in the
* VOTable package all use short as an internal representation
* for unsignedByte because of the difficulties of representing
* an unsigned value in Java. */
if ( "unsignedByte".equals( datatype ) ) {
AttributesImpl newAtts = new AttributesImpl( atts );
int itype = newAtts.getIndex( "datatype" );
newAtts.setValue( itype, "short" );
atts = newAtts;
log( Level.WARNING, "FIELD datatype has been changed from " +
"unsignedByte to short" );
}
/* Fix up arraysize values here. */
if ( ( "char".equals( datatype ) ||
"unicodeChar".equals( datatype ) ) &&
atts.getValue( "arraysize" ) == null ) {
String arraysize;
if ( strict_ ) {
arraysize = "1";
log( Level.INFO, "Inserted arraysize=\"1\" attribute " +
"to reduce confusion" );
}
else {
arraysize = "*";
log( Level.WARNING, "Inserted assumed arrraysize=\"*\"" +
"attribute" );
}
AttributesImpl newAtts = new AttributesImpl( atts );
newAtts.addAttribute( "", "arraysize", "arraysize", "CDATA",
arraysize );
atts = newAtts;
}
}
else if ( "VALUES".equals( localName ) && squashMagic_ ) {
handlerStack_.push( handler_ );
saxWriter_.flush();
handler_ = new SquashAttributeHandler( out_, "null", true );
}
handler_.startElement( namespaceURI, localName, qName, atts );
}
public void endElement( String namespaceURI, String localName,
String qName ) throws SAXException {
votParser_.endElement( namespaceURI, localName, qName );
handler_.endElement( namespaceURI, localName, qName );
if ( "DATA".equals( localName ) ) {
handler_ = handlerStack_.pop();
}
else if ( "VALUES".equals( localName ) &&
handler_ instanceof SquashAttributeHandler ) {
((SquashAttributeHandler) handler_).flush();
handler_ = handlerStack_.pop();
}
}
public void characters( char[] ch, int start, int length )
throws SAXException {
votParser_.characters( ch, start, length );
handler_.characters( ch, start, length );
}
public void ignorableWhitespace( char[] ch, int start, int length )
throws SAXException {
votParser_.ignorableWhitespace( ch, start, length );
handler_.ignorableWhitespace( ch, start, length );
}
public void startPrefixMapping( String prefix, String uri )
throws SAXException {
votParser_.startPrefixMapping( prefix, uri );
handler_.startPrefixMapping( prefix, uri );
}
public void endPrefixMapping( String prefix ) throws SAXException {
votParser_.endPrefixMapping( prefix );
handler_.endPrefixMapping( prefix );
}
public void skippedEntity( String name ) throws SAXException {
votParser_.skippedEntity( name );
handler_.skippedEntity( name );
}
public void processingInstruction( String target, String data )
throws SAXException {
votParser_.processingInstruction( target, data );
handler_.processingInstruction( target, data );
}
public void comment( char[] ch, int start, int length )
throws SAXException {
if ( handler_ instanceof LexicalHandler ) {
((LexicalHandler) handler_).comment( ch, start, length );
}
}
public void startCDATA() throws SAXException {
if ( handler_ instanceof LexicalHandler ) {
((LexicalHandler) handler_).startCDATA();
}
}
public void endCDATA() throws SAXException {
if ( handler_ instanceof LexicalHandler ) {
((LexicalHandler) handler_).endCDATA();
}
}
public void startDTD( String name, String publicId, String systemId )
throws SAXException {
handlerStack_.push( handler_ );
handler_ = discardHandler_;
if ( handler_ instanceof LexicalHandler ) {
((LexicalHandler) handler_).startDTD( name, publicId, systemId );
}
}
public void endDTD() throws SAXException {
if ( handler_ instanceof LexicalHandler ) {
((LexicalHandler) handler_).endDTD();
}
handler_ = handlerStack_.pop();
}
public void startEntity( String name ) throws SAXException {
if ( handler_ instanceof LexicalHandler ) {
((LexicalHandler) handler_).startEntity( name );
}
}
public void endEntity( String name ) throws SAXException {
if ( handler_ instanceof LexicalHandler ) {
((LexicalHandler) handler_).endEntity( name );
}
}
/**
* Outputs a DATA element representing a table to the destination stream
* according to the current settings.
*
* @param table table to write
*/
public void writeDataElement( StarTable table ) throws IOException {
iTable_++;
/* Construct a serializer which can write the table data. */
VOTableVersion serVers = version_ != null
? version_
: VOTableVersion.V13;
VOSerializer voser =
VOSerializer.makeSerializer( format_, serVers, table );
/* If it's out-of-line, open a new file for output and write data
* to it. */
final String ext;
if ( format_ == DataFormat.BINARY ) {
ext = ".bin";
}
else if ( format_ == DataFormat.BINARY2 ) {
ext = ".bin2";
}
else if ( format_ == DataFormat.FITS ) {
ext = ".fits";
}
else {
ext = null;
}
if ( ext != null && ! inline_ && baseLoc_ != null ) {
File file = new File( baseLoc_ + "-" + iTable_ + ext );
if ( file.exists() ) {
log( Level.WARNING, "Overwriting file " + file + " for table " +
iTable_ + " data" );
}
else {
log( Level.INFO, "Writing data for table " + iTable_ +
" in file " + file );
}
String href = file.toString();
DataOutputStream datstrm = new DataOutputStream(
new BufferedOutputStream(
new FileOutputStream( file ) ) );
voser.writeHrefDataElement( out_, href, datstrm );
datstrm.flush();
datstrm.close();
}
/* Otherwise, just write the data inline. */
else {
voser.writeInlineDataElement( out_ );
}
}
/**
* Writes a message through the log system.
*
* @param level log level
* @param msg message
*/
private void log( Level level, String msg ) {
StringBuffer buf = new StringBuffer();
if ( locator_ != null ) {
int line = locator_.getLineNumber();
int col = locator_.getColumnNumber();
if ( line >= 0 ) {
buf.append( "l." + line );
if ( col >= 0 ) {
buf.append( ", c." + col );
}
buf.append( ": " );
}
}
buf.append( msg );
logger_.log( level, buf.toString() );
}
/**
* Sets the value of a given attribute to a given value.
* If the value is present it will be overwritten, otherwise a new
* one (type CDATA) will be added.
*
* @param atts attribute set
* @param name qualified name of attribute
* @param value new value of attribute
*/
private static void fixAttribute( AttributesImpl atts, String name,
String value ) {
int iatt = atts.getIndex( name );
if ( iatt >= 0 ) {
atts.setValue( iatt, value );
}
else {
atts.addAttribute( "", name, name, "CDATA", value );
}
}
/**
* Table handler implementation which writes no DATA element.
*/
private class EmptyTableHandler implements TableHandler {
public void startTable( StarTable meta ) {
try {
out_.write( "<!-- no data -->" );
}
catch ( IOException e ) {
// doesn't really matter
}
}
public void rowData( Object[] row ) {
}
public void endTable() {
}
}
/**
* Table handler implementation which copies table data from a stream
* as it comes in. This is only any good if the output can be written
* using a one-pass stream.
*/
private class StreamTableHandler implements TableHandler {
private Thread streamThread_;
private OnceRowPipe streamStore_;
private IOException error_;
public void startTable( final StarTable meta ) throws SAXException {
assert streamThread_ == null;
streamStore_ = new OnceRowPipe();
streamStore_.acceptMetadata( meta );
streamThread_ = new Thread( "Table Streamer" ) {
public void run() {
try {
writeDataElement( streamStore_.waitForStarTable() );
}
catch ( IOException e ) {
error_ = e;
}
}
};
streamThread_.start();
}
public void rowData( Object[] row ) throws SAXException {
try {
streamStore_.acceptRow( row );
}
catch ( IOException e ) {
throw (SAXException)
new SAXParseException( e.getMessage(), locator_ )
.initCause( e );
}
}
public void endTable() throws SAXException {
streamStore_.endRows();
try {
streamThread_.join();
}
catch ( InterruptedException e ) {
throw (SAXException)
new SAXParseException( "Interrupted", locator_ )
.initCause( e );
}
streamThread_ = null;
streamStore_ = null;
/* If an error was encountered during writing the table (at the
* other end of the stream), rethrow it here. */
if ( error_ != null ) {
String msg;
if ( error_ instanceof UnrepeatableSequenceException ) {
msg = "Can't stream, " +
"table requires multiple reads for metadata - " +
"try with caching";
}
else {
msg = error_.getMessage();
}
throw (SAXException) new SAXParseException( msg, locator_ )
.initCause( error_ );
}
}
}
/**
* Table handler implementation which writes the table to a data cache
* (as determined by a StoragePolicy object) and then copies it to
* output at the end.
*/
private class CacheTableHandler implements TableHandler {
private final StoragePolicy policy_;
private RowStore rowStore_;
public CacheTableHandler( StoragePolicy policy ) {
policy_ = policy;
}
public void startTable( StarTable meta ) throws SAXException {
assert rowStore_ == null;
rowStore_ = policy_.makeRowStore();
try {
rowStore_.acceptMetadata( meta );
}
catch ( IOException e ) {
throw (SAXException)
new SAXParseException( e.getMessage(), locator_ )
.initCause( e );
}
}
public void rowData( Object[] row ) throws SAXException {
try {
rowStore_.acceptRow( row );
}
catch ( IOException e ) {
throw (SAXException)
new SAXParseException( e.getMessage(), locator_ )
.initCause( e );
}
}
public void endTable() throws SAXException {
try {
rowStore_.endRows();
writeDataElement( rowStore_.getStarTable() );
rowStore_ = null;
}
catch ( IOException e ) {
throw (SAXException)
new SAXParseException( e.getMessage(), locator_ )
.initCause( e );
}
}
}
/**
* Helper class for saving ContentHandler context.
* This may be overkill; as currently
* implemented all events are passed to a SAX copier under normal
* circumstances, or ignored within a DATA element, so there's only
* ever either zero or one element on the stack.
*/
private static class HandlerStack {
private final List stack_ = new ArrayList();
private ContentHandler top_;
/**
* Pushes a new handler on the stack.
*
* @param handler new top handler
*/
public void push( ContentHandler handler ) {
stack_.add( handler );
top_ = handler;
}
/**
* Pops a handler off the stack.
*
* @return newly-removed handler
*/
public ContentHandler pop() {
int n = stack_.size();
top_ = n > 1 ? (ContentHandler) stack_.get( n - 2 )
: null;
return (ContentHandler) stack_.remove( n - 1 );
}
}
}