-
Notifications
You must be signed in to change notification settings - Fork 0
/
RequestObject.py
458 lines (396 loc) · 18.9 KB
/
RequestObject.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
# RequestObject.py
# -*- coding: utf-8 -*-
"""
Source code for the RequestObject class
"""
##########################################################################
# Copyright (c) 2014 Katherine Deibel
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
##########################################################################
import bidict
import codecs
import re
from random import shuffle
try:
from lxml import etree
except ImportError: # No lxml installed
import xml.etree.cElementTree as etree
class RequestObject(object):
"""
Class object containing the necessary information for making a request
to download data from an analytic using the RESTful API.
Attributes:
Simple Boolean indicating if the Request is simple (<=65001
rows, does not need uniqueID, no sortedBy*, no
multiprocessing, etc.).
URL The resource URL for the Alma API.
Paths A list of file paths to the analytic to download from.
To better suppport parallel requests, it is recommended
you create copies of the analytic and have separate
threads access different copies.
Keys A list of apikeys for authentication with the Alma API.
As with Paths, parallelism is best supported by using
separate REST apikeys for the Alma API.
ColumnMap A named bidirectional dictionary (bidict) for mapping
the column order from the analytic XML to preferred XML
names (e.g., <Column1></Column1> --> <Title></Title>).
The two mappings can be accessed directly as
ColumnMap.columns and ColumnMap.names, respectively.
NamesOrder A list containing the keys in ColumnMap.names in the
order they should appear in the output.
uniqueID The XML name from ColumnMap.names that acts as a unique
identifier for each entry in the analytic's results
table.
sortedBy The XML name from ColumnMap.names that is used for
sorting the entries in the analytic's results table.
sortedByType The OBIEE datatype (decimal, string, date, etc.) of
sortedBy.
sortedByOBIEE The name of the OBIEE field.subfield that contains the
data for sortedBy
jobCount Number of jobs listed. Defaults to 1 for simple requests.
JobBounds A list of values used to distinguish the different jobs
that parallel code will perform by creating filters on
the sortedBy field/column in the analytic. Each job is
defined by two consecutive bounds, meaning that for N
jobs, there will be N+1 total bounds. For a single job,
the bounds x and y will make the Agent find only
results such that:
{ x <= row.sortedBy < y }
A None at the beginning or end of the bounds list means
the lower (or upper) limit is unbounded. For example, if
JobBounds = [ None, 'H', 'P' ], the two jobs will be:
{ row.sortedBy <'H' } and { 'H' <= rw.sortedBy <'P' }
"""
def __init__(self):
"""
Basic constructor for initializing the RequestObject.
"""
self.Simple = None
self.URL = None
self.Paths = []
self.Keys = []
self.uniqueID = None
self.sortedBy = None
self.sortedByType = None
self.sortedByOBIEE = None
self.jobCount = 1
self.JobBounds = [None, None]
self.NamesOrder = []
self.ColumnMap = bidict.namedbidict('biMap', 'columns', 'names')({})
# end __init__
@classmethod
def fromFilename(cls, filename, simpleRequest=False):
"""Helper class method for creating a valid RequestObject from
a filestream.
See external documentation for specifications on the input format
and what distinguishes simple versus complex request objects.
Parameters:
filename A path to a file containing the request input
simpleRequest Boolean to indicate if the Request is to be
Simple or Complex
Returns:
RequestObject
Throws:
Exceptions upon malformed or invalid input
"""
ro = cls()
ro._parse_input(codecs.open(filename, 'r', encoding='utf-8'),
simpleRequest=simpleRequest)
return ro
@classmethod
def fromFilestream(cls, filestream, simpleRequest=False):
"""Helper class method for creating a valid RequestObject from
a filestream.
See external documentation for specifications on the input format
and what distinguishes simple versus complex request objects.
Parameters:
filestream A file object (with the read property activated)
simpleRequest Boolean to indicate if the Request is to be
Simple or Complex
Returns:
RequestObject
Throws:
Exceptions upon malformed or invalid input
"""
ro = cls()
ro._parse_input(filestream, simpleRequest=simpleRequest)
return ro
def reset(self):
"""Helper method for emptying all data contained in the RequestObject.
This RequestObject is now equivalent to a newly constructed one.
"""
for x in self.__dict__.keys():
setattr(self, x, None)
self.Paths = []
self.Keys = []
self.jobCount = 1;
self.JobBounds = [None,None]
self.NamesOrder = []
self.ColumnMap = bidict.namedbidict('biMap', 'columns', 'names')({})
def _parse_input(self, reader, simpleRequest=False):
"""Private method that places data in the attributes from the file
object parameter 'reader.' Once all data is loaded, a validation
check is performed.
May raise an exception if there are any syntax errors in the input
dat or if the resulting RequestObject is not valid.
Parameters:
reader A filestream (read) from which the RequestObject
will attempt to load its data
simpleRequest Boolean to indicate if the Request is to be
Simple or Complex
Throws:
Exceptions upon malformed input or validation failure
"""
self.reset()
self.Simple = simpleRequest
inColumnMap = False
lineNo = 1
for line in reader:
lineNo = lineNo + 1
line = line.lstrip()
if line.startswith('#') or len(line.strip()) == 0:
continue
tokens = line.split('\t')
tokens[0] = tokens[0].strip()
if tokens[0] == 'url':
if self.URL is not None:
self._parse_error(reader,lineNo,'Only one url allowed in a request')
try:
self.URL = tokens[1].strip()
except:
self._parse_error(reader,lineNo,'Problem extracting URL')
elif tokens[0] == 'path':
try:
self.Paths.append(tokens[1].strip())
except:
self._parse_error(reader,lineNo,'Problem extracting path')
raise Exception(reader.name + ': error on line ' \
+ unicode(lineNo))
elif tokens[0] == 'apikey':
try:
self.Keys.append(tokens[1].strip())
except:
self._parse_error(reader,lineNo,'Problem extracting apikey.')
elif tokens[0] == 'jobBounds' :
if simpleRequest:
continue;
if self.sortedByType is None:
self._parse_error(reader,lineNo,'job_bounds cannot appear before sortedBy in the input file')
self.JobBounds = []
try:
for bound in (line.split('\t',1)[1].split(',')):
bound = bound.strip()
if bound == '':
self.JobBounds.append(None)
elif self.sortedByType == 'decimal':
self.JobBounds.append(float(bound))
elif self.sortedByType == 'string':
self.JobBounds.append(bound.capitalize())
else:
self.JobBounds.append(bound)
except:
self._parse_error(reader,lineNo,'Problem with reading bounds or type conversion')
elif tokens[0] == 'uniqueID':
if simpleRequest:
continue;
if self.uniqueID is not None:
self._parse_error(reader,lineNo,'One one uniqueID allowed')
try:
self.uniqueID = tokens[1].strip()
except:
self._parse_error(reader,lineNo,'Problem extracting unique ID')
elif tokens[0] == 'sortedBy':
if simpleRequest:
continue;
if self.sortedBy is not None:
self._parse_error(reader,lineNo,'One one sortedBy is allowed')
try:
self.sortedBy = tokens[1].strip()
except:
self._parse_error(reader,lineNo,'Problem extracting sortedBy name.')
elif tokens[0] == 'sortedByType':
if simpleRequest:
continue;
if self.sortedByType is not None:
self._parse_error(reader,lineNo,'One one sortedByType is allowed')
try:
self.sortedByType = tokens[1].strip()
except:
self._parse_error(reader,lineNo,'Problem extracting sortedByType data.')
elif tokens[0] == 'sortedByOBIEE':
if simpleRequest:
continue;
if self.sortedByOBIEE is not None:
self._parse_error(reader,lineNo,'One one sortedByOBIEE is allowed')
try:
self.sortedByOBIEE = tokens[1].strip()
except:
self._parse_error(reader,lineNo,'Problem extracting sortedByOBIEE data.')
elif tokens[0] == 'Begin NamesToColumns':
if len(self.ColumnMap) > 0 or inColumnMap:
self._parse_error(reader,lineNo,'Begin NamesToColumns can appear only once')
inColumnMap = True
elif tokens[0] == 'End NamesToColumns':
if not inColumnMap:
self._parse_error(reader,lineNo,'Unmatched End NamesToColumns')
inColumnMap = False
elif inColumnMap:
# read the next lines and put into the bidirectional dict
try:
self.NamesOrder.append(tokens[0].strip())
self.ColumnMap[tokens[1].strip().capitalize()] = tokens[0].strip()
except:
self._parse_error(reader,lineNo,'Problems reading column to name data')
else:
self._parse_error(reader,lineNo,'Unrecognized error')
# end for line in reader
# if jobbounds was empty, make it none,none
if self.JobBounds is None:
self.JobBounds = [None, None]
# set jobCount accordingly
if simpleRequest:
self.jobCount = 1
else:
self.jobCount = len(self.JobBounds) - 1
#shuffle the Paths and Keys for the heck of it
shuffle(self.Paths)
shuffle(self.Keys)
errors = []
if not self.validate(log=errors,simpleRequest=simpleRequest):
msg = u"Error(s) found in this RequestObject's data:\n"
for e in errors:
msg = msg + unicode(e) + u"\n"
raise Exception(msg)
# end parse
def _parse_error(self, reader, lineNo, msg):
"""
Private helper function for raising exceptions and giving
feedback when a parsing error occurs.
Parameters:
reader Filestream being used by _parse_input(...)
lineNo The line number the error was encountered on
msg A string message describing the error
Throws:
Always throws an exception that includes the passed
in information
"""
raise Exception(reader.name + ', line ' + unicode(lineNo) \
+ ': ' + msg)
def validate(self,log=[],simpleRequest=None):
"""Helper method for checking that sufficient data is found within
this RequestObject for performing a RESTful query as either a
simple or complex request. For each error found, a text statement
is added to log to support debugging. As such, validity is
equivalent to (len(log) == 0).
Parameters:
log An empty list object into which debugging messages
will be appended.
NOTE: If log is not initially empty, validate
will by default return False
simpleRequest Boolean to indicate if the Request is to be
considered as Simple or Complex for validation
purposes.If left blank, this object's Simple
attribute is used instead.
Returns:
True or False depending on if any errors were found (equivalent
to len(log) == 0).
"""
if simpleRequest is None and self.Simple is None:
log.append('RequestObject.Simple is None. Must be set to a boolean value.')
return False
else:
simpleReqest = self.Simple
# must be a URL
if self.URL is None or len(self.URL) == 0:
log.append('Resource url (url) not found or is empty')
# must be at least one path that is non-zero
if len(self.Paths) == 0:
log.append('No analytic paths (path)')
for p in self.Paths:
if len(p) == 0:
log.append('Empty analytic path (path)')
# must be at least one key that is non-zero
if len(self.Keys) == 0:
log.append('No api keys (apikeys)')
for k in self.Keys:
if len(k) == 0:
log.append('Empty apikey (apikey)')
# columns to names
if not simpleRequest and len(self.ColumnMap) == 0:
log.append('Table is not described. NamesToColumns is empty')
for col in self.ColumnMap.columns.keys():
if len(col) == 0:
log.append('Column names cannot be zero length')
if re.match("^Column[0-9]+$",col) is None:
log.append('Column name ' + col + ' is not of the form Column[0-9]+')
xid = self.ColumnMap.columns[col]
if len(xid) == 0:
log.append('Preferred name for a column cannot be zero length')
try: # see if xid will be a valid xml element name
root = etree.Element(xid)
except:
log.append(xid + ': is not a valid xml element name')
# uniqueID
if simpleRequest:
pass
elif self.uniqueID is None or len(self.uniqueID) == 0:
log.append('uniqueID not found or is empty')
elif self.uniqueID not in self.ColumnMap.names.keys():
log.append('uniqueID was not found in NamesToColumns')
# sortedBy
if simpleRequest:
pass
elif self.sortedBy is None or len(self.sortedBy) == 0:
log.append('sortedBy not found or is empty')
elif self.sortedBy not in self.ColumnMap.names.keys():
log.append('sortedBy was not found in NamesToColumns')
# sortedByType
if simpleRequest:
pass
elif self.sortedByType is None or len(self.sortedByType) == 0:
log.append('The type for sortedBy was not found or is empty')
elif self.sortedByType not in ['decimal', 'string']:
log.append(self.sortedByType + ' is not a currently supported type')
# sortedByOBIEE
if simpleRequest:
pass
elif self.sortedByOBIEE is None or len(self.sortedByOBIEE) == 0:
log.append('sortedByOBIEE was not found or is empty')
elif len(self.sortedByOBIEE.split('.')) != 2:
log.append(self.sortedByOBIEE + ' is not of the form Field.Subfield\n or has extra periods in it.')
# JobBounds: if exists, None only at start and end and JB[i] < JB[i+1]
if simpleRequest:
pass
elif self.JobBounds is None:
self.JobBounds = [None, None]
elif len(self.JobBounds) == 1:
log.append('job_bounds must contain at least 2 bounds.')
else:
for i in xrange(0, len(self.JobBounds) - 1):
if (self.JobBounds[i] is None) and (i != 0):
log.append('An infinity bound (empty bound) can occur only ' + \
'at the start or end of the list')
elif (self.JobBounds[i] >= self.JobBounds[i+1]) and (self.JobBounds[i+1] is not None):
log.append('JobBounds[' + unicode(i) + '] is greater than or equal to ' \
+ 'JobBounds[' + unicode(i+1) + ']: ' + unicode(self.JobBounds[i]) \
+ ' >= ' + unicode(self.JobBounds[i+1]) \
)
# force jobCount to be correct
if simpleRequest:
self.jobCount = 1
else:
self.jobCount = len(self.JobBounds) - 1
return (len(log) == 0)
# end validate
# end class RequestObject