-
Notifications
You must be signed in to change notification settings - Fork 478
/
HarvestingClientsIT.java
289 lines (229 loc) · 13.3 KB
/
HarvestingClientsIT.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
package edu.harvard.iq.dataverse.api;
import java.util.logging.Logger;
import io.restassured.RestAssured;
import static io.restassured.RestAssured.given;
import io.restassured.path.json.JsonPath;
import io.restassured.response.Response;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import static jakarta.ws.rs.core.Response.Status.CREATED;
import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED;
import static jakarta.ws.rs.core.Response.Status.ACCEPTED;
import static jakarta.ws.rs.core.Response.Status.OK;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.jupiter.api.Assertions.*;
/**
* This class tests Harvesting Client functionality.
* Note that these methods test BOTH the proprietary Dataverse rest API for
* creating and managing harvesting clients, AND the underlining OAI-PMH
* harvesting functionality itself. I.e., we will use the Dataverse
* /api/harvest/clients/ api to run an actual harvest of a control set and
* then validate the resulting harvested content.
*/
public class HarvestingClientsIT {
private static final Logger logger = Logger.getLogger(HarvestingClientsIT.class.getCanonicalName());
private static final String HARVEST_CLIENTS_API = "/api/harvest/clients/";
private static final String ROOT_COLLECTION = "root";
private static final String HARVEST_URL = "https://demo.dataverse.org/oai";
private static final String ARCHIVE_URL = "https://demo.dataverse.org";
private static final String HARVEST_METADATA_FORMAT = "oai_dc";
private static final String ARCHIVE_DESCRIPTION = "RestAssured harvesting client test";
private static final String CONTROL_OAI_SET = "controlTestSet2";
private static final int DATASETS_IN_CONTROL_SET = 8;
private static String normalUserAPIKey;
private static String adminUserAPIKey;
private static String harvestCollectionAlias;
String clientApiPath = null;
@BeforeAll
public static void setUpClass() {
RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
// Create the users, an admin and a non-admin:
setupUsers();
// Create a collection that we will use to harvest remote content into:
setupCollection();
}
@AfterEach
public void cleanup() {
if (clientApiPath != null) {
Response deleteResponse = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.delete(clientApiPath);
clientApiPath = null;
System.out.println("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
}
}
private static void setupUsers() {
Response cu0 = UtilIT.createRandomUser();
normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0);
Response cu1 = UtilIT.createRandomUser();
String un1 = UtilIT.getUsernameFromResponse(cu1);
Response u1a = UtilIT.makeSuperUser(un1);
adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1);
}
private static void setupCollection() {
Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey);
createDataverseResponse.prettyPrint();
assertEquals(CREATED.getStatusCode(), createDataverseResponse.getStatusCode());
harvestCollectionAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
// publish dataverse:
Response publishDataverse = UtilIT.publishDataverseViaNativeApi(harvestCollectionAlias, adminUserAPIKey);
assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode());
}
@Test
public void testCreateEditDeleteClient() throws InterruptedException {
// This method focuses on testing the native Dataverse harvesting client
// API.
String nickName = "h" + UtilIT.getRandomString(6);
String clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName);
String clientJson = String.format("{\"dataverseAlias\":\"%s\","
+ "\"type\":\"oai\","
+ "\"harvestUrl\":\"%s\","
+ "\"archiveUrl\":\"%s\","
+ "\"metadataFormat\":\"%s\"}",
ROOT_COLLECTION, HARVEST_URL, ARCHIVE_URL, HARVEST_METADATA_FORMAT);
// Try to create a client as normal user, should fail:
Response rCreate = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
.body(clientJson)
.post(clientApiPath);
assertEquals(UNAUTHORIZED.getStatusCode(), rCreate.getStatusCode());
// Try to create the same as admin user, should succeed:
rCreate = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.body(clientJson)
.post(clientApiPath);
assertEquals(CREATED.getStatusCode(), rCreate.getStatusCode());
// Try to update the client we have just created:
String updateJson = String.format("{\"archiveDescription\":\"%s\"}", ARCHIVE_DESCRIPTION);
Response rUpdate = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.body(updateJson)
.put(clientApiPath);
assertEquals(OK.getStatusCode(), rUpdate.getStatusCode());
// Now let's retrieve the client we've just created and edited:
Response getClientResponse = given()
.get(clientApiPath);
logger.info("getClient.getStatusCode(): " + getClientResponse.getStatusCode());
logger.info("getClient printresponse: " + getClientResponse.prettyPrint());
assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());
// ... and validate the values:
getClientResponse.then().assertThat()
.body("status", equalTo(ApiConstants.STATUS_OK))
.body("data.type", equalTo("oai"))
.body("data.nickName", equalTo(nickName))
.body("data.archiveDescription", equalTo(ARCHIVE_DESCRIPTION))
.body("data.dataverseAlias", equalTo(ROOT_COLLECTION))
.body("data.harvestUrl", equalTo(HARVEST_URL))
.body("data.archiveUrl", equalTo(ARCHIVE_URL))
.body("data.metadataFormat", equalTo(HARVEST_METADATA_FORMAT));
// Try to delete the client as normal user should fail:
Response rDelete = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
.delete(clientApiPath);
logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode());
assertEquals(UNAUTHORIZED.getStatusCode(), rDelete.getStatusCode());
// Try to delete as admin user should work:
rDelete = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.delete(clientApiPath);
logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode());
assertEquals(OK.getStatusCode(), rDelete.getStatusCode());
}
@Test
public void testHarvestingClientRun_AllowHarvestingMissingCVV_False() throws InterruptedException {
harvestingClientRun(false);
}
@Test
public void testHarvestingClientRun_AllowHarvestingMissingCVV_True() throws InterruptedException {
harvestingClientRun(true);
}
private void harvestingClientRun(boolean allowHarvestingMissingCVV) throws InterruptedException {
int expectedNumberOfSetsHarvested = allowHarvestingMissingCVV ? DATASETS_IN_CONTROL_SET : DATASETS_IN_CONTROL_SET - 1;
// This test will create a client and attempt to perform an actual
// harvest and validate the resulting harvested content.
// Setup: create the client via native API
// since this API is tested somewhat extensively in the previous
// method, we don't need to pay too much attention to this method, aside
// from confirming the expected HTTP status code.
String nickName = "h" + UtilIT.getRandomString(6);
clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName);
String clientJson = String.format("{\"dataverseAlias\":\"%s\","
+ "\"type\":\"oai\","
+ "\"harvestUrl\":\"%s\","
+ "\"archiveUrl\":\"%s\","
+ "\"set\":\"%s\","
+ "\"allowHarvestingMissingCVV\":%s,"
+ "\"metadataFormat\":\"%s\"}",
harvestCollectionAlias, HARVEST_URL, ARCHIVE_URL, CONTROL_OAI_SET, allowHarvestingMissingCVV, HARVEST_METADATA_FORMAT);
Response createResponse = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.body(clientJson)
.post(clientApiPath);
assertEquals(CREATED.getStatusCode(), createResponse.getStatusCode());
// API TEST 1. Run the harvest using the configuration (client) we have
// just created
String runHarvestApiPath = String.format(HARVEST_CLIENTS_API+"%s/run", nickName);
// TODO? - verify that a non-admin user cannot perform this operation (401)
Response runResponse = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
.post(runHarvestApiPath);
assertEquals(ACCEPTED.getStatusCode(), runResponse.getStatusCode());
// API TEST 2. As indicated by the ACCEPTED status code above, harvesting
// is an asynchronous operation that will be performed in the background.
// Verify that this "in progress" status is properly reported while it's
// running, and that it completes in some reasonable amount of time.
int i = 0;
int maxWait=20; // a very conservative interval; this harvest has no business taking this long
do {
// Give it an initial 1 sec. delay, to make sure the client state
// has been updated in the database, which can take some appreciable
// amount of time on a heavily-loaded server running a full suite of
// tests:
Thread.sleep(1000L);
// keep checking the status of the client with the GET api:
Response getClientResponse = given()
.get(clientApiPath);
assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());
JsonPath responseJsonPath = getClientResponse.body().jsonPath();
assertNotNull(responseJsonPath, "Invalid JSON in GET client response");
assertEquals(ApiConstants.STATUS_OK, responseJsonPath.getString("status"));
String clientStatus = responseJsonPath.getString("data.status");
assertNotNull(clientStatus);
if ("inProgress".equals(clientStatus) || "IN PROGRESS".equals(responseJsonPath.getString("data.lastResult"))) {
// we'll sleep for another second
i++;
} else {
logger.info("getClientResponse.prettyPrint: "
+ getClientResponse.prettyPrint());
// Check the values in the response:
// a) Confirm that the harvest has completed:
assertEquals("inActive", clientStatus, "Unexpected client status: "+clientStatus);
// b) Confirm that it has actually succeeded:
assertEquals("SUCCESS", responseJsonPath.getString("data.lastResult"), "Last harvest not reported a success (took "+i+" seconds)");
String harvestTimeStamp = responseJsonPath.getString("data.lastHarvest");
assertNotNull(harvestTimeStamp);
// c) Confirm that the other timestamps match:
assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastSuccessful"));
assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastNonEmpty"));
// d) Confirm that the correct number of datasets have been harvested:
assertEquals(expectedNumberOfSetsHarvested, responseJsonPath.getInt("data.lastDatasetsHarvested"));
// ok, it looks like the harvest has completed successfully.
break;
}
} while (i<maxWait);
System.out.println("Waited " + i + " seconds for the harvest to complete.");
Response searchHarvestedDatasets = UtilIT.search("metadataSource:" + nickName, normalUserAPIKey);
searchHarvestedDatasets.prettyPrint();
searchHarvestedDatasets.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.total_count", equalTo(expectedNumberOfSetsHarvested));
// Fail if it hasn't completed in maxWait seconds
assertTrue(i < maxWait);
// TODO(?) use the native Dataverses/Datasets apis to verify that the expected
// datasets have been harvested. This may or may not be necessary, seeing
// how we have already confirmed the number of successfully harvested
// datasets from the control set; somewhat hard to imagine a practical
// situation where that would not be enough (?).
}
}