/
engfmt.py
375 lines (345 loc) · 12.7 KB
/
engfmt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
"""Engineering Notation
Provides support for producing, reading, and translating numbers into and out of
engineering format.
Examples:
Numbers in engineering notation:
1MHz
2.4G
1uF
Numbers in traditional notation:
1e6
2.4e9
1e-6
"""
import re as RE
# define regular expressions use to identify numbers of various forms
_NumWithScaleFactorAndTrailingUnits = \
RE.compile(r'\A([-+]?[0-9]*\.?[0-9]+)(([YZEPTGMKk_munpfazy])([a-zA-Z_]*))?\Z')
_NumWithExpAndTrailingUnits = \
RE.compile(r'\A([-+]?[0-9]*\.?[0-9]+[eE][-+]?[0-9]+)([a-zA-Z_]*)\Z')
_NumWithScaleFactorAndLeadingUnits = \
RE.compile(r'\A(\$)([-+]?[0-9]*\.?[0-9]+)([YZEPTGMKk_munpfazy])?\Z')
_NumWithExpAndLeadingUnits = \
RE.compile(r'\A(\$)([-+]?[0-9]*\.?[0-9]+[eE][-+]?[0-9]+)\Z')
_NanWithTrailingUnits = \
RE.compile(r'\A([-+]?(inf|nan))\b ?([a-zA-Z_]*)\Z', RE.IGNORECASE)
_NanWithLeadingUnits = \
RE.compile(r'\A(\$)([-+]?(inf|nan))\Z', RE.IGNORECASE)
_ScaleFactors = {
'Y' : (1e24, 'e24')
, 'Z' : (1e21, 'e21')
, 'E' : (1e18, 'e18')
, 'P' : (1e15, 'e15')
, 'T' : (1e12, 'e12')
, 'G' : (1e9, 'e9')
, 'M' : (1e6, 'e6')
, 'K' : (1e3, 'e3')
, 'k' : (1e3, 'e3')
, '_' : (1, '')
, '' : (1, '')
, 'm' : (1e-3, 'e-3')
, 'u' : (1e-6, 'e-6')
, 'n' : (1e-9, 'e-9')
, 'p' : (1e-12, 'e-12')
, 'f' : (1e-15, 'e-15')
, 'a' : (1e-18, 'e-18')
, 'z' : (1e-21, 'e-21')
, 'y' : (1e-24, 'e-24')
}
_DefaultPrecision = 4
def setDefaultPrecision(prec):
"""Set the default precision.
Precision is given in digits and must be positive. A precision of zero
corresponds to one significant figure.
"""
global _DefaultPrecision
_DefaultPrecision = prec
_Spacer = ''
def setSpacer(spacer=''):
"""Set the spacer character for toEngFmt().
Can make this a space if you prefer a space between number and
scalefactor/units, but if you add the space, the numbers you generate
using toEngFmt() will not be recognized by fromEngFmt().
"""
global _Spacer
_Spacer = spacer
def isNaN(val):
"""Tests for not a number."""
# math.isnan() is available from Python 2.6 on, this is only needed in
# earlier versions.
return str(val) == str(float('nan'))
def toEngFmt(num, units="", prec=-1):
"""Converts real numbers with units into easily readable form.
Converts a number and optional units into a string. For those numbers in
range of the SI scale factors, the exponent is stripped off and replaced
with a standard SI scale factor.
"""
# precision must be non-negative
if prec < 0:
global _DefaultPrecision
prec = _DefaultPrecision
assert (prec >= 0)
# check for infinities or NaN
if num == float('inf') or num == float('-inf') or isNaN(num):
if units:
if units == "$":
return units + str(num)
return str(num) + ' ' + units # cannot use spacer here, because we cannot afford it to be empty
else:
return str(num)
# convert into scientific notation with proper precision
sNum = "%.*e" % (prec, num)
sMant, sExp = sNum.split("e")
exp = int(sExp)
# define scale factors (eliminate the ones nobody recognizes)
big = "KMGT" #big = "KMGTPEZY"
small = "munpfa" #small = "munpfazy"
# find scale factor
index = exp // 3
shift = exp % 3
sf = "e%d" % (exp - shift)
if index == 0:
sf = ""
elif (index > 0):
if index <= len(big):
sf = big[index-1]
else:
index = -index
if index <= len(small):
sf = small[index-1]
# move decimal point as needed
if shift == 0:
num = float(sMant)
elif (shift == 1):
num = 10*float(sMant)
else:
num = 100*float(sMant)
sMant = "%.*f" % (prec-shift, num)
#remove trailing zeros (except if sMant does not contain a .)
if sMant.find('.') >= 0:
sMant = sMant.rstrip("0") # pragma: no cover
#remove trailing decimal point
sMant = sMant.rstrip(".")
if units == "$":
return units + sMant + sf
elif not units or units == "":
return sMant + sf
else:
if len(sf) <= 1:
return sMant + _Spacer + sf + units
else:
return sMant + sf + _Spacer + units
def toNumber(str):
"""Converts strings in engineering format into a real number and units.
The real number and units are returned as a tuple. If the argument is
not recognized as a number, None is returned.
"""
# with scale factor and trailing units
match = _NumWithScaleFactorAndTrailingUnits.match(str)
if match:
number, scaleFactorAndUnits, scaleFactor, units = match.groups()
if scaleFactor:
number = float(number)*_ScaleFactors[scaleFactor][0]
else:
number = float(number)
if units == None:
units = ''
return number, units
# with exponent and trailing units
match = _NumWithExpAndTrailingUnits.match(str)
if match:
number, units = match.groups()
if units == None:
units = ''
return float(number), units
# with scale factor and leading units ($)
match = _NumWithScaleFactorAndLeadingUnits.match(str)
if match:
units, number, scaleFactor = match.groups()
if scaleFactor:
number = float(number)*_ScaleFactors[scaleFactor][0]
else:
number = float(number)
if units == None:
units = ''
return number, units
# with exponent and leading units ($)
match = _NumWithExpAndLeadingUnits.match(str)
if match:
units, number = match.groups()
if units == None:
units = ''
return float(number), units
match = _NanWithTrailingUnits.match(str)
if match:
number, ignore, units = match.groups()
if units == None:
units = ''
return float(number), units
match = _NanWithLeadingUnits.match(str)
if match:
units, number, ignore = match.groups()
if units == None:
units = ''
return float(number), units
return None
def fromEngFmt(str, stripUnits = True):
"""Converts a string that contains a number in engineering format
into a string that contains the same number formatted traditionally (the
number returned has all the precision of the one provided, the scale
factor is just converted to 'E' notation. If stripUnits is true, a
simple string is returned. Otherwise, a tuple is return that contains
(number, units).
"""
# with scale factor and trailing units
match = _NumWithScaleFactorAndTrailingUnits.match(str)
if match:
trailingUnits = True
number, scaleFactorAndUnits, scaleFactor, units = match.groups()
if scaleFactor:
number = number+_ScaleFactors[scaleFactor][1]
else:
# with exponent and trailing units
match = _NumWithExpAndTrailingUnits.match(str)
if match:
trailingUnits = True
number, units = match.groups()
else:
# with scale factor and leading units ($)
match = _NumWithScaleFactorAndLeadingUnits.match(str)
if match:
trailingUnits = False
units, number, scaleFactor = match.groups()
if scaleFactor:
number = number+_ScaleFactors[scaleFactor][1]
else:
# with exponent and leading units ($)
match = _NumWithExpAndLeadingUnits.match(str)
if match:
trailingUnits = False
units, number = match.groups()
else:
match = _NanWithTrailingUnits.match(str)
if match:
trailingUnits = True
number, ignore, units = match.groups()
else:
match = _NanWithLeadingUnits.match(str)
if match:
trailingUnits = False
units, number, ignore = match.groups()
else:
return None
if stripUnits:
return number
else:
return (number, units)
def isNumber(str):
"""Tests whether string is a number in either traditional or engineering formats."""
if _NumWithScaleFactorAndTrailingUnits.match(str):
return True
if _NumWithExpAndTrailingUnits.match(str):
return True
if _NumWithScaleFactorAndLeadingUnits.match(str):
return True
if _NumWithExpAndLeadingUnits.match(str):
return True
if _NanWithTrailingUnits.match(str):
return True
if _NanWithLeadingUnits.match(str):
return True
return False
def stripUnits(str):
"""Returns the number as given except any units are removed."""
match = _NumWithScaleFactorAndTrailingUnits.match(str)
if match:
number, scaleFactorAndUnits, scaleFactor, units = match.groups()
if scaleFactor and scaleFactor != '_':
return number+scaleFactor
else:
return number
match = _NumWithExpAndTrailingUnits.match(str)
if match:
number, units = match.groups()
return number
match = _NumWithScaleFactorAndLeadingUnits.match(str)
if match:
units, number, scaleFactor = match.groups()
if scaleFactor and scaleFactor != '_':
return number+scaleFactor
else:
return number
match = _NumWithExpAndLeadingUnits.match(str)
if match:
units, number = match.groups()
return number
match = _NanWithTrailingUnits.match(str)
if match:
number, ignore, units = match.groups()
return number
match = _NanWithLeadingUnits.match(str)
if match:
units, number, ignore = match.groups()
return number
return None
#
# The following two functions are used to convert numbers found in
# heterogeneous strings (strings that contain numbers within a larger block of
# text). Previous functions that translate numbers assume that the string
# contains only a number and no other text.
#
# This is a challenging problem and my solution is somewhat dubious. I
# really struggled with something like +1.034e-029Hz because it can either be
# treated as 1.034e-029 with units of Hz, or as 1.034 - 29 where 1.034 has
# units of e. I tried to overcome the problem by writing
# _EmbeddedNumberWithScaleFactor such that it requires a scale factor but will
# accurately identify such a number within text. It is used in allFromEngFmt
# where it is not necessary to match a number without scale factors because we
# only map the ones with scale factors.
#_EmbeddedNumberWithScaleFactor = RE.compile(r'([-+]?[0-9]*\.?[0-9]+)([YZEPTGMKk_munpfazy])([a-zA-Z_]*)([^-+0-9a-zA-Z.]|\Z)')
_EmbeddedNumberWithScaleFactor = RE.compile(r'([-+]?[0-9]*\.?[0-9]+)([YZEPTGMKk_munpfazy])([a-zA-Z_]*)([^-+0-9]|\Z)')
# I replaced the first version of numberWithScaleFactor4 because it did not
# recognize "1mA." (1mA at the end of a sentence). I did not really
# understand why letters, and periods could not follow a number, so I
# removed them, but it is clear the [-+0-9] must remain to avoid matches
# with 1E2, 1E-2, 1E+2.
_EmbeddedNumberWithoutScaleFactor = RE.compile(r'(?:\A|(?<=[^a-zA-Z.0-9]))(([0-9]*\.?[0-9]+)[eE][-+]?[0-9]+)([a-zA-Z_]*)\b')
def allToEngFmt(str):
"""Replace all occurrences of numbers found in a string into engineering
format.
"""
out = ''
prevEnd = 0
for match in _EmbeddedNumberWithoutScaleFactor.finditer(str):
beginMatch = match.start(1)
number, mantissa, units = match.groups()
#digits = len(mantissa)-2
try:
newNumber = toEngFmt(float(number), units)
except ValueError:
newNumber = number + units
# something unexpected went wrong, but this is unessential, so
# recover and move on.
if __debug__:
print('EXCEPTION: mis-translation of number')
out += str[prevEnd:beginMatch] + newNumber
prevEnd = match.end(3)
out += str[prevEnd:]
return out
def allFromEngFmt(str):
"""Replace all occurrences of numbers found in a string into traditional
format.
"""
out = ''
prevEnd = 0
for match in _EmbeddedNumberWithScaleFactor.finditer(str):
beginMatch = match.start(1)
mantissa, scaleFactor, units, discard = match.groups()
if float(mantissa) == 0:
number = '0'
else:
number = mantissa + _ScaleFactors[scaleFactor][1]
out += str[prevEnd:beginMatch] + number + units
prevEnd = match.end(3)
out += str[prevEnd:]
return out