forked from alxp/islandora
/
fedora_utils.inc
373 lines (337 loc) · 11.4 KB
/
fedora_utils.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
<?php
/**
* @file
* Base utilities used by the Islandora fedora module.
*/
if (!function_exists('str_getcsv')) {
/**
* Functions that emulate php5.3 functionality for backwards compatiablity
*
* @param type $input
* @param type $delimiter
* @param type $enclosure
* @param type $escape
* @param type $eol
*
* @return type
*/
function str_getcsv($input, $delimiter = ',', $enclosure = '"', $escape = NULL, $eol = NULL) {
$temp = fopen("php://memory", "rw");
fwrite($temp, $input);
fseek($temp, 0);
$r = fgetcsv($temp, 4096, $delimiter, $enclosure);
fclose($temp);
return $r;
}
}
/**
*
* Utility method to get data from a url location using curl_exec and return
*
* This method takes identical parameters to do_curl_ext and passes them
* directly to do_curl_ext. The output from do_curl_ext is processed into
* either a TRUE, FALSE, or NULL value.
*
* This method exists for historical reasons as existing code makes use of
* do_curl directly and expects a simple return value and not an array.
*
* @param $url
* URL to be accessed by the function
* @param $return_to_variable
* Indicates whether the resource accessed by the curl call (HTML page,
* XML page, etc.) is returned directly from the function to the calling
* program or if it is sent directly to output.
* @param $number_of_post_vars
* Number of variable sot be posted
* @param $post
* Whether the curl_exec is done as a "get" or a "post"
*
* @return mixed
* TRUE, FALSE, NULL, or the data returned from accessing the URL
*/
function do_curl($url, $return_to_variable = 1, $number_of_post_vars = 0, $post = NULL) {
$return_array = do_curl_ext($url, $return_to_variable, $number_of_post_vars, $post);
return ($return_array != NULL) ? $return_array[0] : NULL;
}
/**
* Utility method to get data from a url location using curl_exec
*
* This method takes a URL and three associated parameters and initializes the
* structure required to make a call to curl_exec. As a part of the
* initialization the permissions associated with the (global) user are added
* to the call. This ensures access to any potentially user restricted URLs.
*
* Various defaults are used for the parameters required by curl_exec including
* the user agent and timeout. These are hard-coded.
*
* @param string $url
* URL to be accessed by the function
* @param $return_to_variable
* Indicates whether the resource accessed by the curl call (HTML page,
* XML page, etc.) is returned directly from the function to the calling
* program or if it is sent directly to output.
* @param $number_of_post_vars
* Number of variable sot be posted
* @param $post
* Whether the curl_exec is done as a "get" or a "post"
*
* @return mixed
* an array that consists of three value or NULL if curl is not suported:
* - element 0:
* The value returned from the curl_exec function call.
* This is either a boolean value or the actual data returned from
* accessing the URL.
* - element 1:
* The error code reslting from attempting to access the URL with curl_exec
* - element 2:
* A string representing a textual representation of the error code that
* resulted from attempting to access the URL with curl_exec
*/
function do_curl_ext($url, $return_to_variable = TRUE, $number_of_post_vars = 0, $post = NULL) {
global $user;
// Check if we are inside Drupal and there is a valid user.
// If the user is not valid for a Fedora call curl will throw an exception.
if ((!isset($user)) || $user->uid == 0) {
$fedora_user = 'anonymous';
$fedora_pass = 'anonymous';
}
else {
$fedora_user = $user->name;
$fedora_pass = $user->pass;
}
if (function_exists("curl_init")) {
$ch = curl_init();
$user_agent = "Mozilla/4.0 pp(compatible; MSIE 5.01; Windows NT 5.0)";
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
// Fail on errors.
curl_setopt($ch, CURLOPT_FAILONERROR, TRUE);
// Allow redirects.
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
// Times out after 90s.
curl_setopt($ch, CURLOPT_TIMEOUT, 90);
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
// Return into a variable.
curl_setopt($ch, CURLOPT_RETURNTRANSFER, $return_to_variable);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERPWD, "$fedora_user:$fedora_pass");
//curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_ANY);
if ($number_of_post_vars > 0 && $post) {
curl_setopt($ch, CURLOPT_POST, $number_of_post_vars);
curl_setopt($ch, CURLOPT_POSTFIELDS, "$post");
}
$ret_val = curl_exec($ch);
$error_code = curl_errno($ch);
$error_string = curl_error($ch);
return array($ret_val, $error_code, $error_string);
}
else {
if (function_exists(drupal_set_message)) {
drupal_set_message(t('No curl support.'), 'error');
}
return NULL;
}
}
/**
* Fedora available
*
* @return mixed
*/
function fedora_available() {
$response = do_curl_ext(variable_get('fedora_base_url', 'http://localhost:8080/fedora') . '/describe');
return ($response != NULL) ? ($response[1] == 0) : FALSE;
}
/**
* Resource index search available
*
* @return mixed
*/
function risearch_available() {
$response = do_curl_ext(variable_get('fedora_repository_url', 'http://localhost:8080/fedora/risearch'));
return ($response != NULL) ? ($response[1] == 0) : FALSE;
}
/**
* Returns a UTF-8-encoded transcripiton of the string given in $in_str.
*
* @param string $in_str
*
* @return string A UTF-8 encoded string.
*/
function fix_encoding($in_str) {
$cur_encoding = mb_detect_encoding($in_str);
if ($cur_encoding == "UTF-8" && mb_check_encoding($in_str, "UTF-8")) {
return $in_str;
}
else {
return utf8_encode($in_str);
}
}
/**
* valid pid ??
*
* @param type $pid
*
* @return boolean
*/
function valid_pid($pid) {
$valid = FALSE;
if (strlen(trim($pid)) <= 64 && preg_match('/^([A-Za-z0-9]|-|\.)+:(([A-Za-z0-9])|-|\.|~|_|(%[0-9A-F]{2}))+$/', trim($pid))) {
$valid = TRUE;
}
return $valid;
}
/**
* Valid Dsid ??
*
* @param type $dsid
*
* @return boolean
*/
function valid_dsid($dsid) {
$valid = FALSE;
if (strlen(trim($dsid)) <= 64 && preg_match('/^[a-zA-Z0-9\_\-\.]+$/', trim($dsid))) {
$valid = TRUE;
}
return $valid;
}
/**
* fixDsid ??
*
* @param type $dsid
*
* @return string
*/
function fix_dsid($dsid) {
$new_dsid = trim($dsid);
$find = '/[^a-zA-Z0-9\.\_\-]/';
$replace = '';
$new_dsid = preg_replace($find, $replace, $new_dsid);
if (strlen($new_dsid) > 63) {
$new_dsid = substr($new_dsid, -63);
}
if (preg_match('/^[^a-zA-Z]/', $dsid)) {
$new_dsid = 'x' . $new_dsid;
}
if (strlen($new_dsid) == 0) {
$new_dsid = 'item' . rand(1, 100);
}
return $new_dsid;
}
/**
* Function: get_collections_as_option_array
*
* @return array
* Returns an associative array of all collection objects in Fedora instance
*/
function get_collections_as_option_array() {
module_load_include('inc', 'fedora_repository', 'api/fedora_utils');
$restricted = variable_get('fedora_namespace_restriction_enforced', TRUE);
$allowed_string = variable_get('fedora_pids_allowed', 'default: demo: changeme: islandora:');
$namespaces = explode(':', $allowed_string);
$query = 'select $object $title from <#ri>
where ($object <fedora-model:label> $title
and $object <info:fedora/fedora-system:def/model#hasModel> <info:fedora/islandora:collectionCModel>
and $object <fedora-model:state> <info:fedora/fedora-system:def/model#Active>)
order by $title';
$url = variable_get('fedora_repository_url', 'http://localhost:8080/fedora/risearch');
$options = array(
'type' => 'tuples',
'flush' => 'TRUE',
'format' => 'csv',
'limit' => '1000',
'lang' => 'itql',
'stream' => 'on',
'query' => $query,
);
// The url function will take care of URL encoding.
$content = do_curl(url($url, array('query' => $options)));
$list = explode("\n", $content);
array_shift($list);
$list = preg_replace('/info:fedora\//', '', $list);
foreach ($namespaces as $namespace) {
$trimmed_names[] = trim($namespace);
}
$options = array();
// Removes blanks.
foreach ($list as $item) {
if ($item) {
$parts = explode(',', $item);
$namespace = explode(':', $parts[0]);
$namespace = trim($namespace[0]);
$bool = in_array($namespace, $trimmed_names);
if (!$restricted || in_array($namespace, $trimmed_names)) {
$options[$parts[0]] = $parts[1];
}
}
}
unset($options['islandora:ContentModelCollection']);
return $options;
}
/**
* Function: get_content_models_as_option_array
*
* @return array
* associative array of all available content models in Fedora instance
*/
function get_content_models_as_option_array() {
module_load_include('inc', 'fedora_repository', 'api/fedora_item');
module_load_include('inc', 'fedora_repository', 'api/fedora_utils');
$restricted = variable_get('fedora_namespace_restriction_enforced', TRUE);
$allowed_string = variable_get('fedora_pids_allowed', 'default: demo: changeme: islandora:');
$namespaces = explode(':', $allowed_string);
foreach ($namespaces as $namespace) {
if ($namespace) {
$allowed[] = trim($namespace);
}
}
$query = 'select $object $title from <#ri>
where ($object <fedora-model:label> $title
and ($object <fedora-model:hasModel> <info:fedora/fedora-system:ContentModel-3.0>
or $object <fedora-rels-ext:isMemberOfCollection> <info:fedora/islandora:ContentModelsCollection>)
and $object <fedora-model:state> <info:fedora/fedora-system:def/model#Active>)
order by $title';
$url = variable_get('fedora_repository_url', 'http://localhost:8080/fedora/risearch');
$url .= "?type=tuples&flush=TRUE&format=csv&limit=1000&lang=itql&stream=on&query=";
$content = do_curl($url . htmlentities(urlencode($query)));
$list = explode("\n", $content);
array_shift($list);
$list = preg_replace('/info:fedora\//', '', $list);
// Removes blanks.
foreach ($list as $item) {
if ($item) {
$parts = explode(',', $item);
$nameparts = explode(':', $parts[0]);
if (!$restricted || in_array($nameparts[0], $allowed)) {
if (!preg_match('/fedora-system/', $nameparts[0])) {
$options[$parts[0]] = $parts[1] . ' ~ ' . $parts[0];
}
}
}
}
return $options;
}
/**
* This function will retrieve the collections that the given PID belongs to.
*
* @param string $PID
* The PID to find the parents of.
*
* @return array
* $object_PIDs the list of PIDs of the collections that the
* indicated object is a member of.
*/
function get_parent_collections_from_pid($PID) {
$query_string = 'select $parent from <#ri>
where ($object <fedora-rels-ext:isMemberOf> $parent
or $object <fedora-rels-ext:isMemberOfCollection> $parent)
and $object <dc:identifier> \'' . $PID . '\'
order by $object';
$query_string = htmlentities(urlencode($query_string));
$url = variable_get('fedora_repository_url', 'http://localhost:8080/fedora/risearch');
$url .= '?type=tuples&flush=true&format=csv&limit=13000&lang=itql&stream=on&query=' . $query_string;
$content = do_curl($url, TRUE);
$results = explode("\n", $content);
$object_PIDs = preg_replace('/^info:fedora\/|"parent"| /', '', $results);
$object_PIDs = array_values(array_filter($object_PIDs));
return $object_PIDs;
}