/
Curator.java
603 lines (556 loc) · 19.9 KB
/
Curator.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.curate;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.Site;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CommunityService;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.factory.CoreServiceFactory;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
/**
* Curator orchestrates and manages the application of a one or more curation
* tasks to a DSpace object. It provides common services and runtime
* environment to the tasks.
*
* @author richardrodgers
*/
public class Curator {
// status code values
/**
* Curator unable to find requested task
*/
public static final int CURATE_NOTASK = -3;
/**
* no assigned status code - typically because task not yet performed
*/
public static final int CURATE_UNSET = -2;
/**
* task encountered an error in processing
*/
public static final int CURATE_ERROR = -1;
/**
* task completed successfully
*/
public static final int CURATE_SUCCESS = 0;
/**
* task failed
*/
public static final int CURATE_FAIL = 1;
/**
* task was not applicable to passed object
*/
public static final int CURATE_SKIP = 2;
// invocation modes - used by Suspendable tasks
public static enum Invoked {
INTERACTIVE, BATCH, ANY
}
// transaction scopes
public static enum TxScope {
OBJECT, CURATION, OPEN
}
private static final Logger log = LogManager.getLogger();
protected static final ThreadLocal<Context> curationCtx = new ThreadLocal<>();
protected final Map<String, String> runParameters = new HashMap<>();
protected Map<String, TaskRunner> trMap = new HashMap<>();
protected List<String> perfList = new ArrayList<>();
protected TaskQueue taskQ = null;
protected Appendable reporter = null;
protected Invoked iMode = null;
protected TaskResolver resolver = new TaskResolver();
protected TxScope txScope = TxScope.OPEN;
protected CommunityService communityService;
protected ItemService itemService;
protected HandleService handleService;
/**
* No-arg constructor
*/
public Curator() {
communityService = ContentServiceFactory.getInstance().getCommunityService();
itemService = ContentServiceFactory.getInstance().getItemService();
handleService = HandleServiceFactory.getInstance().getHandleService();
resolver = new TaskResolver();
}
/**
* Set a parameter visible to all tasks in this Curator instance.
* @param name the parameter's name.
* @param value the parameter's value.
*/
public void addParameter(String name, String value) {
runParameters.put(name, value);
}
/**
* Set many parameters visible to all tasks in this Curator instance.
* @param parameters parameter name/value pairs.
*/
public void addParameters(Map<String, String> parameters) {
runParameters.putAll(parameters);
}
/**
* Look up a run parameter.
* @param name the name of the desired parameter.
* @return the value of the named parameter.
*/
public String getRunParameter(String name) {
return runParameters.get(name);
}
/**
* Add a task to the set to be performed. Caller should make no assumptions
* on execution ordering.
*
* @param taskName - logical name of task
* @return this curator - to support concatenating invocation style
*/
public Curator addTask(String taskName) {
ResolvedTask task = resolver.resolveTask(taskName);
if (task != null) {
try {
task.init(this);
trMap.put(taskName, new TaskRunner(task));
// performance order currently FIFO - to be revisited
perfList.add(taskName);
} catch (IOException ioE) {
System.out.println("Task: '" + taskName + "' initialization failure: " + ioE.getMessage());
}
} else {
System.out.println("Task: '" + taskName + "' does not resolve");
}
return this;
}
/**
* Returns whether this curator has the specified task
*
* @param taskName - logical name of the task
* @return true if task has been configured, else false
*/
public boolean hasTask(String taskName) {
return perfList.contains(taskName);
}
/**
* Removes a task from the set to be performed.
*
* @param taskName - logical name of the task
* @return this curator - to support concatenating invocation style
*/
public Curator removeTask(String taskName) {
trMap.remove(taskName);
perfList.remove(taskName);
return this;
}
/**
* Assigns invocation mode.
*
* @param mode one of INTERACTIVE, BATCH, ANY
* @return the Curator instance.
*/
public Curator setInvoked(Invoked mode) {
iMode = mode;
return this;
}
/**
* Sets the reporting stream for this curator.
*
* @param reporter name of reporting stream. The name '-'
* causes reporting to standard out.
* @return return self (Curator instance) with reporter set
*/
public Curator setReporter(Appendable reporter) {
this.reporter = reporter;
return this;
}
/**
* Defines the transactional scope of curator executions.
* The default is 'open' meaning that no commits are
* performed by the framework during curation. A scope of
* 'curation' means that a single commit will occur after the
* entire performance is complete, and a scope of 'object'
* will commit for each object (e.g. item) encountered in
* a given execution.
*
* @param scope transactional scope
* @return return self (Curator instance) with given scope set
*/
public Curator setTransactionScope(TxScope scope) {
txScope = scope;
return this;
}
/**
* Performs all configured tasks upon object identified by id. If
* the object can be resolved as a handle, the DSO will be the
* target object.
*
* <p>
* Note: this method has the side-effect of setting this instance's Context
* reference. The setting is retained on return.
*
* @param c a DSpace context
* @param id an object identifier
* @throws IOException if IO error
*/
public void curate(Context c, String id) throws IOException {
if (id == null) {
throw new IOException("Cannot perform curation task(s) on a null object identifier!");
}
try {
//Save the context on current execution thread
curationCtx.set(c);
DSpaceObject dso = handleService.resolveToObject(c, id);
if (dso != null) {
curate(dso);
} else {
for (String taskName : perfList) {
trMap.get(taskName).run(c, id);
}
}
// if curation scoped, commit transaction
if (txScope.equals(TxScope.CURATION)) {
Context ctx = curationCtx.get();
if (ctx != null) {
ctx.complete();
}
}
} catch (SQLException sqlE) {
throw new IOException(sqlE.getMessage(), sqlE);
} finally {
curationCtx.remove();
}
}
/**
* Performs all configured tasks upon DSpace object
* (Community, Collection or Item).
* @param dso the DSpace object
* @throws IOException if IO error
*/
public void curate(DSpaceObject dso) throws IOException {
if (dso == null) {
throw new IOException("Cannot perform curation task(s) on a null DSpaceObject!");
}
int type = dso.getType();
for (String taskName : perfList) {
TaskRunner tr = trMap.get(taskName);
// do we need to iterate over the object ?
if (type == Constants.ITEM || tr.task.isDistributive()) {
tr.run(dso);
} else if (type == Constants.COLLECTION) {
doCollection(tr, (Collection) dso);
} else if (type == Constants.COMMUNITY) {
doCommunity(tr, (Community) dso);
} else if (type == Constants.SITE) {
doSite(tr, (Site) dso);
}
}
}
/**
* Performs all configured tasks upon DSpace object
* (Community, Collection or Item).
*
* <p>
* Note: this method has the side-effect of setting this instance's Context
* reference. The setting is retained on return.
*
* @param c session context in which curation takes place.
* @param dso the single object to be curated.
* @throws java.io.IOException passed through.
*/
public void curate(Context c, DSpaceObject dso)
throws IOException {
curationCtx.set(c);
curate(dso);
}
/**
* Places a curation request for the object identified by id on a
* managed queue named by the queueId.
*
* @param c A DSpace context
* @param id an object Id
* @param queueId name of a queue. If queue does not exist, it will
* be created automatically.
* @throws IOException if IO error
*/
public void queue(Context c, String id, String queueId) throws IOException {
if (taskQ == null) {
taskQ = (TaskQueue) CoreServiceFactory.getInstance().getPluginService().getSinglePlugin(TaskQueue.class);
}
if (taskQ != null) {
taskQ.enqueue(queueId, new TaskQueueEntry(c.getCurrentUser().getName(),
System.currentTimeMillis(), perfList, id));
} else {
System.out.println("curate - no TaskQueue implemented");
}
}
/**
* Removes all configured tasks from the Curator.
*/
public void clear() {
trMap.clear();
perfList.clear();
}
/**
* Adds a message to the configured reporting stream.
*
* @param message the message to output to the reporting stream.
*/
public void report(String message) {
try {
reporter.append(message);
} catch (IOException ex) {
System.out.println("Task reporting failure: " + ex);
}
}
/**
* Returns the status code for the latest performance of the named task.
*
* @param taskName the task name
* @return the status code - one of CURATE_ values
*/
public int getStatus(String taskName) {
TaskRunner tr = trMap.get(taskName);
return (tr != null) ? tr.statusCode : CURATE_NOTASK;
}
/**
* Returns the result string for the latest performance of the named task.
*
* @param taskName the task name
* @return the result string, or <code>null</code> if task has not set it.
*/
public String getResult(String taskName) {
TaskRunner tr = trMap.get(taskName);
return (tr != null) ? tr.result : null;
}
/**
* Assigns a result to the performance of the named task.
*
* @param taskName the task name
* @param result a string indicating results of performing task.
*/
public void setResult(String taskName, String result) {
TaskRunner tr = trMap.get(taskName);
if (tr != null) {
tr.setResult(result);
}
}
/**
* Returns the context object used in the current curation thread.
* This is primarily a utility method to allow tasks access to the context when necessary.
* <p>
* If the context is null or not set, then this just returns
* a brand new Context object representing an Anonymous User.
*
* @return curation thread's Context object (or a new, anonymous Context if no curation Context exists)
* @throws SQLException An exception that provides information on a database access error or other errors.
*/
public static Context curationContext() throws SQLException {
// Return curation context or new context if undefined/invalid
Context curCtx = curationCtx.get();
if (curCtx == null || !curCtx.isValid()) {
//Create a new context (represents an Anonymous User)
curCtx = new Context();
//Save it to current execution thread
curationCtx.set(curCtx);
}
return curCtx;
}
/**
* Returns whether a given DSO is a 'container' - collection or community
*
* @param dso a DSpace object
* @return true if a container, false otherwise
*/
public static boolean isContainer(DSpaceObject dso) {
return (dso.getType() == Constants.COMMUNITY ||
dso.getType() == Constants.COLLECTION);
}
/**
* Run task for entire Site (including all Communities, Collections and Items)
*
* @param tr TaskRunner
* @param site DSpace Site object
* @return true if successful, false otherwise
* @throws IOException if IO error
*/
protected boolean doSite(TaskRunner tr, Site site) throws IOException {
Context ctx = null;
try {
//get access to the curation thread's current context
ctx = curationContext();
// Site-wide Tasks really should have an EPerson performer associated with them,
// otherwise they are run as an "anonymous" user with limited access rights.
if (ctx.getCurrentUser() == null && !ctx.ignoreAuthorization()) {
log.warn("You are running one or more Site-Wide curation tasks in ANONYMOUS USER mode," +
" as there is no EPerson 'performer' associated with this task. To associate an EPerson " +
"'performer' " +
" you should ensure tasks are called via the Curator.curate(Context, ID) method.");
}
//Run task for the Site object itself
if (!tr.run(site)) {
return false;
}
//Then, perform this task for all Top-Level Communities in the Site
// (this will recursively perform task for all objects in DSpace)
for (Community subcomm : communityService.findAllTop(ctx)) {
if (!doCommunity(tr, subcomm)) {
return false;
}
}
} catch (SQLException sqlE) {
throw new IOException(sqlE);
}
return true;
}
/**
* Run task for Community along with all sub-communities and collections.
*
* @param tr TaskRunner
* @param comm Community
* @return true if successful, false otherwise
* @throws IOException if IO error
*/
protected boolean doCommunity(TaskRunner tr, Community comm) throws IOException {
if (!tr.run(comm)) {
return false;
}
for (Community subcomm : comm.getSubcommunities()) {
if (!doCommunity(tr, subcomm)) {
return false;
}
}
for (Collection coll : comm.getCollections()) {
if (!doCollection(tr, coll)) {
return false;
}
}
return true;
}
/**
* Run task for Collection along with all Items in that collection.
*
* @param tr TaskRunner
* @param coll Collection
* @return true if successful, false otherwise
* @throws IOException if IO error
*/
protected boolean doCollection(TaskRunner tr, Collection coll) throws IOException {
try {
if (!tr.run(coll)) {
return false;
}
Context context = curationContext();
Iterator<Item> iter = itemService.findByCollection(context, coll);
while (iter.hasNext()) {
Item item = iter.next();
boolean shouldContinue = tr.run(item);
context.uncacheEntity(item);
if (!shouldContinue) {
return false;
}
}
} catch (SQLException sqlE) {
throw new IOException(sqlE.getMessage(), sqlE);
}
return true;
}
/**
* Record a 'visit' to a DSpace object and enforce any policies set
* on this curator.
*
* @param dso the DSpace object
* @throws IOException A general class of exceptions produced by failed or interrupted I/O operations.
*/
protected void visit(DSpaceObject dso) throws IOException {
Context curCtx = curationCtx.get();
if (curCtx != null) {
if (txScope.equals(TxScope.OBJECT)) {
curCtx.dispatchEvents();
}
}
}
protected class TaskRunner {
ResolvedTask task = null;
int statusCode = CURATE_UNSET;
String result = null;
public TaskRunner(ResolvedTask task) {
this.task = task;
}
public boolean run(DSpaceObject dso) throws IOException {
try {
if (dso == null) {
throw new IOException("DSpaceObject is null");
}
statusCode = task.perform(dso);
String id = (dso.getHandle() != null) ? dso.getHandle() : "workflow item: " + dso.getID();
log.info(logMessage(id));
visit(dso);
return !suspend(statusCode);
} catch (IOException ioe) {
//log error & pass exception upwards
System.out.println("Error executing curation task '" + task.getName() + "'; " + ioe);
throw ioe;
}
}
public boolean run(Context c, String id) throws IOException {
try {
if (c == null || id == null) {
throw new IOException("Context or identifier is null");
}
statusCode = task.perform(c, id);
log.info(logMessage(id));
visit(null);
return !suspend(statusCode);
} catch (IOException ioe) {
//log error & pass exception upwards
System.out.println("Error executing curation task '" + task.getName() + "'; " + ioe);
throw ioe;
}
}
public void setResult(String result) {
this.result = result;
}
protected boolean suspend(int code) {
Invoked mode = task.getMode();
if (mode != null && (mode.equals(Invoked.ANY) || mode.equals(iMode))) {
for (int i : task.getCodes()) {
if (code == i) {
return true;
}
}
}
return false;
}
/**
* Builds a useful log message for a curation task.
*
* @param id ID of DSpace Object
* @return log message text
*/
protected String logMessage(String id) {
StringBuilder mb = new StringBuilder();
mb.append("Curation task: ").append(task.getName()).
append(" performed on: ").append(id).
append(" with status: ").append(statusCode);
if (result != null) {
mb.append(". Result: '").append(result).append("'");
}
return mb.toString();
}
}
}