/
DownloadInstanceWriter.java
465 lines (393 loc) · 25.4 KB
/
DownloadInstanceWriter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package edu.harvard.iq.dataverse.api;
import java.lang.reflect.Type;
import java.lang.annotation.Annotation;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.IOException;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.Response;
import javax.ws.rs.ext.MessageBodyWriter;
import javax.ws.rs.ext.Provider;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.dataaccess.*;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.engine.command.Command;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.impl.CreateGuestbookResponseCommand;
import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean;
import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry;
import java.io.File;
import java.io.FileInputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.inject.Inject;
import javax.ws.rs.NotFoundException;
import javax.ws.rs.RedirectionException;
import javax.ws.rs.ServiceUnavailableException;
/**
*
* @author Leonid Andreev
*/
@Provider
public class DownloadInstanceWriter implements MessageBodyWriter<DownloadInstance> {
@Inject
MakeDataCountLoggingServiceBean mdcLogService;
private static final Logger logger = Logger.getLogger(DownloadInstanceWriter.class.getCanonicalName());
@Override
public boolean isWriteable(Class<?> clazz, Type type, Annotation[] annotation, MediaType mediaType) {
return clazz == DownloadInstance.class;
}
@Override
public long getSize(DownloadInstance di, Class<?> clazz, Type type, Annotation[] annotation, MediaType mediaType) {
return -1;
//return getFileSize(di);
}
@Override
public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[] annotation, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream outstream) throws IOException, WebApplicationException {
if (di.getDownloadInfo() != null && di.getDownloadInfo().getDataFile() != null) {
DataAccessRequest daReq = new DataAccessRequest();
DataFile dataFile = di.getDownloadInfo().getDataFile();
StorageIO<DataFile> storageIO = DataAccess.getStorageIO(dataFile, daReq);
if (storageIO != null) {
try {
storageIO.open();
} catch (IOException ioex) {
//throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
logger.log(Level.INFO, "Datafile {0}: Failed to locate and/or open physical file. Error message: {1}", new Object[]{dataFile.getId(), ioex.getLocalizedMessage()});
throw new NotFoundException("Datafile "+dataFile.getId()+": Failed to locate and/or open physical file.");
}
if (di.getConversionParam() != null) {
// Image Thumbnail and Tabular data conversion:
// NOTE: only supported on local files, as of 4.0.2!
// NOTE: should be supported on all files for which StorageIO drivers
// are available (but not on harvested files1) -- L.A. 4.6.2
if (di.getConversionParam().equals("imageThumb") && !dataFile.isHarvested()) {
if ("".equals(di.getConversionParamValue())) {
storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE);
} else {
try {
int size = new Integer(di.getConversionParamValue());
if (size > 0) {
storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, size);
}
} catch (java.lang.NumberFormatException ex) {
storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE);
}
// and, since we now have tabular data files that can
// have thumbnail previews... obviously, we don't want to
// add the variable header to the image stream!
storageIO.setNoVarHeader(Boolean.TRUE);
storageIO.setVarHeader(null);
}
} else if (dataFile.isTabularData()) {
logger.fine("request for tabular data download;");
// We can now generate thumbnails for some tabular data files (specifically,
// tab files tagged as "geospatial"). We are going to assume that you can
// do only ONE thing at a time - request the thumbnail for the file, or
// request any tabular-specific services.
if (di.getConversionParam().equals("noVarHeader")) {
logger.fine("tabular data with no var header requested");
storageIO.setNoVarHeader(Boolean.TRUE);
storageIO.setVarHeader(null);
} else if (di.getConversionParam().equals("format")) {
// Conversions, and downloads of "stored originals" are
// now supported on all DataFiles for which StorageIO
// access drivers are available.
if ("original".equals(di.getConversionParamValue())) {
logger.fine("stored original of an ingested file requested");
storageIO = StoredOriginalFile.retreive(storageIO);
} else {
// Other format conversions:
logger.fine("format conversion on a tabular file requested ("+di.getConversionParamValue()+")");
String requestedMimeType = di.getServiceFormatType(di.getConversionParam(), di.getConversionParamValue());
if (requestedMimeType == null) {
// default mime type, in case real type is unknown;
// (this shouldn't happen in real life - but just in case):
requestedMimeType = "application/octet-stream";
}
storageIO =
DataConverter.performFormatConversion(dataFile,
storageIO,
di.getConversionParamValue(), requestedMimeType);
}
} else if (di.getConversionParam().equals("subset")) {
logger.fine("processing subset request.");
// TODO:
// If there are parameters on the list that are
// not valid variable ids, or if the do not belong to
// the datafile referenced - I simply skip them;
// perhaps I should throw an invalid argument exception
// instead.
if (di.getExtraArguments() != null && di.getExtraArguments().size() > 0) {
logger.fine("processing extra arguments list of length "+di.getExtraArguments().size());
List <Integer> variablePositionIndex = new ArrayList<>();
String subsetVariableHeader = null;
for (int i = 0; i < di.getExtraArguments().size(); i++) {
DataVariable variable = (DataVariable)di.getExtraArguments().get(i);
if (variable != null) {
if (variable.getDataTable().getDataFile().getId().equals(dataFile.getId())) {
logger.fine("adding variable id "+variable.getId()+" to the list.");
variablePositionIndex.add(variable.getFileOrder());
if (subsetVariableHeader == null) {
subsetVariableHeader = variable.getName();
} else {
subsetVariableHeader = subsetVariableHeader.concat("\t");
subsetVariableHeader = subsetVariableHeader.concat(variable.getName());
}
} else {
logger.warning("variable does not belong to this data file.");
}
}
}
if (variablePositionIndex.size() > 0) {
try {
File tempSubsetFile = File.createTempFile("tempSubsetFile", ".tmp");
TabularSubsetGenerator tabularSubsetGenerator = new TabularSubsetGenerator();
tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), tempSubsetFile.getAbsolutePath(), variablePositionIndex, dataFile.getDataTable().getCaseQuantity(), "\t");
if (tempSubsetFile.exists()) {
FileInputStream subsetStream = new FileInputStream(tempSubsetFile);
long subsetSize = tempSubsetFile.length();
InputStreamIO subsetStreamIO = new InputStreamIO(subsetStream, subsetSize);
logger.fine("successfully created subset output stream.");
subsetVariableHeader = subsetVariableHeader.concat("\n");
subsetStreamIO.setVarHeader(subsetVariableHeader);
String tabularFileName = storageIO.getFileName();
if (tabularFileName != null && tabularFileName.endsWith(".tab")) {
tabularFileName = tabularFileName.replaceAll("\\.tab$", "-subset.tab");
} else if (tabularFileName != null && !"".equals(tabularFileName)) {
tabularFileName = tabularFileName.concat("-subset.tab");
} else {
tabularFileName = "subset.tab";
}
subsetStreamIO.setFileName(tabularFileName);
subsetStreamIO.setMimeType(storageIO.getMimeType());
storageIO = subsetStreamIO;
} else {
storageIO = null;
}
} catch (IOException ioex) {
storageIO = null;
}
}
} else {
logger.fine("empty list of extra arguments.");
}
}
}
if (storageIO == null) {
//throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
// 404/not found may be a better return code option here
// (similarly to what the Access API returns when a thumbnail is requested on a text file, etc.)
throw new NotFoundException("datafile access error: requested optional service (image scaling, format conversion, etc.) could not be performed on this datafile.");
}
} else if (di.getAuxiliaryFile() != null) {
// Make sure to close the InputStream for the main datafile:
try {storageIO.getInputStream().close();} catch (IOException ioex) {}
String auxTag = di.getAuxiliaryFile().getFormatTag();
String auxVersion = di.getAuxiliaryFile().getFormatVersion();
if (auxVersion != null) {
auxTag = auxTag + "_" + auxVersion;
}
long auxFileSize = di.getAuxiliaryFile().getFileSize();
InputStreamIO auxStreamIO = new InputStreamIO(storageIO.getAuxFileAsInputStream(auxTag), auxFileSize);
auxStreamIO.setFileName(storageIO.getFileName() + "." + auxTag);
auxStreamIO.setMimeType(di.getAuxiliaryFile().getContentType());
storageIO = auxStreamIO;
} else {
if (storageIO instanceof S3AccessIO && !(dataFile.isTabularData()) && ((S3AccessIO) storageIO).downloadRedirectEnabled()) {
// definitely close the (still open) S3 input stream,
// since we are not going to use it. The S3 documentation
// emphasizes that it is very important not to leave these
// lying around un-closed, since they are going to fill
// up the S3 connection pool!
try {storageIO.getInputStream().close();} catch (IOException ioex) {}
// [attempt to] redirect:
String redirect_url_str;
try {
redirect_url_str = ((S3AccessIO)storageIO).generateTemporaryS3Url();
} catch (IOException ioex) {
redirect_url_str = null;
}
if (redirect_url_str == null) {
throw new ServiceUnavailableException();
}
logger.fine("Data Access API: direct S3 url: "+redirect_url_str);
URI redirect_uri;
try {
redirect_uri = new URI(redirect_url_str);
} catch (URISyntaxException ex) {
logger.info("Data Access API: failed to create S3 redirect url ("+redirect_url_str+")");
redirect_uri = null;
}
if (redirect_uri != null) {
// increment the download count, if necessary:
if (di.getGbr() != null) {
try {
logger.fine("writing guestbook response, for an S3 download redirect.");
Command<?> cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner());
di.getCommand().submit(cmd);
MakeDataCountEntry entry = new MakeDataCountEntry(di.getRequestUriInfo(), di.getRequestHttpHeaders(), di.getDataverseRequestService(), di.getGbr().getDataFile());
mdcLogService.logEntry(entry);
} catch (CommandException e) {
}
}
// finally, issue the redirect:
Response response = Response.seeOther(redirect_uri).build();
logger.fine("Issuing redirect to the file location on S3.");
throw new RedirectionException(response);
}
throw new ServiceUnavailableException();
}
}
InputStream instream = storageIO.getInputStream();
if (instream != null) {
// headers:
String fileName = storageIO.getFileName();
String mimeType = storageIO.getMimeType();
// Provide both the "Content-disposition" and "Content-Type" headers,
// to satisfy the widest selection of browsers out there.
// Encode the filename as UTF-8, then deal with spaces. "encode" changes
// a space to + so we change it back to a space (%20).
String finalFileName = URLEncoder.encode(fileName, "UTF-8").replaceAll("\\+", "%20");
httpHeaders.add("Content-disposition", "attachment; filename=\"" + finalFileName + "\"");
httpHeaders.add("Content-Type", mimeType + "; name=\"" + finalFileName + "\"");
long contentSize;
boolean useChunkedTransfer = false;
//if ((contentSize = getFileSize(di, storageIO.getVarHeader())) > 0) {
if ((contentSize = getContentSize(storageIO)) > 0) {
logger.fine("Content size (retrieved from the AccessObject): "+contentSize);
httpHeaders.add("Content-Length", contentSize);
} else {
//httpHeaders.add("Transfer-encoding", "chunked");
//useChunkedTransfer = true;
}
// (the httpHeaders map must be modified *before* writing any
// data in the output stream!)
int bufsize;
byte [] bffr = new byte[4*8192];
byte [] chunkClose = "\r\n".getBytes();
// before writing out any bytes from the input stream, flush
// any extra content, such as the variable header for the
// subsettable files:
if (storageIO.getVarHeader() != null) {
if (storageIO.getVarHeader().getBytes().length > 0) {
if (useChunkedTransfer) {
String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length);
outstream.write(chunkSizeLine.getBytes());
}
outstream.write(storageIO.getVarHeader().getBytes());
if (useChunkedTransfer) {
outstream.write(chunkClose);
}
}
}
while ((bufsize = instream.read(bffr)) != -1) {
if (useChunkedTransfer) {
String chunkSizeLine = String.format("%x\r\n", bufsize);
outstream.write(chunkSizeLine.getBytes());
}
outstream.write(bffr, 0, bufsize);
if (useChunkedTransfer) {
outstream.write(chunkClose);
}
}
if (useChunkedTransfer) {
String chunkClosing = "0\r\n\r\n";
outstream.write(chunkClosing.getBytes());
}
logger.fine("di conversion param: "+di.getConversionParam()+", value: "+di.getConversionParamValue());
// Downloads of thumbnail images (scaled down, low-res versions of graphic image files) and
// "preprocessed metadata" records for tabular data files are NOT considered "real" downloads,
// so these should not produce guestbook entries:
if (di.getGbr() != null && !(isThumbnailDownload(di) || isPreprocessedMetadataDownload(di))) {
try {
logger.fine("writing guestbook response.");
Command<?> cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner());
di.getCommand().submit(cmd);
MakeDataCountEntry entry = new MakeDataCountEntry(di.getRequestUriInfo(), di.getRequestHttpHeaders(), di.getDataverseRequestService(), di.getGbr().getDataFile());
mdcLogService.logEntry(entry);
} catch (CommandException e) {}
} else {
logger.fine("not writing guestbook response");
}
instream.close();
outstream.close();
return;
}
}
}
throw new NotFoundException();
}
private boolean isThumbnailDownload(DownloadInstance downloadInstance) {
if (downloadInstance == null) return false;
if (downloadInstance.getConversionParam() == null) return false;
return downloadInstance.getConversionParam().equals("imageThumb");
}
private boolean isPreprocessedMetadataDownload(DownloadInstance downloadInstance) {
if (downloadInstance == null) return false;
if (downloadInstance.getConversionParam() == null) return false;
if (downloadInstance.getConversionParamValue() == null) return false;
return downloadInstance.getConversionParam().equals("format") && downloadInstance.getConversionParamValue().equals("prep");
}
private long getContentSize(StorageIO<?> accessObject) {
long contentSize = 0;
if (accessObject.getSize() > -1) {
contentSize+=accessObject.getSize();
if (accessObject.getVarHeader() != null) {
if (accessObject.getVarHeader().getBytes().length > 0) {
contentSize+=accessObject.getVarHeader().getBytes().length;
}
}
return contentSize;
}
return -1;
}
private long getFileSize(DownloadInstance di) {
return getFileSize(di, null);
}
private long getFileSize(DownloadInstance di, String extraHeader) {
if (di.getDownloadInfo() != null && di.getDownloadInfo().getDataFile() != null) {
DataFile df = di.getDownloadInfo().getDataFile();
// For non-tabular files, we probably know the file size:
// (except for when this is a thumbNail rquest on an image file -
// because the size will obviously be different... can still be
// figured out - but perhaps we shouldn't bother; since thumbnails
// are essentially guaranteed to be small)
if (!df.isTabularData() && (di.getConversionParam() == null || "".equals(di.getConversionParam()))) {
if (df.getFilesize() > 0) {
return df.getFilesize();
}
}
// For Tabular files:
// If it's just a straight file download, it's pretty easy - we
// already know the size of the file on disk (just like in the
// fragment above); we just need to make sure if we are also supplying
// the additional variable name header - then we need to add its
// size to the total... But the cases when it's a format conversion
// and, especially, subsets are of course trickier. (these are not
// supported yet).
if (df.isTabularData() && (di.getConversionParam() == null || "".equals(di.getConversionParam()))) {
long fileSize = df.getFilesize();
if (fileSize > 0) {
if (extraHeader != null) {
fileSize += extraHeader.getBytes().length;
}
return fileSize;
}
}
}
return -1;
}
}