diff --git a/python/tidy.py b/python/tidy.py index c65425de01a4..6050efc456c1 100644 --- a/python/tidy.py +++ b/python/tidy.py @@ -12,9 +12,10 @@ import itertools import re import sys +import toml from licenseck import licenses -filetypes_to_check = [".rs", ".rc", ".cpp", ".c", ".h", ".py"] +filetypes_to_check = [".rs", ".rc", ".cpp", ".c", ".h", ".py", ".toml"] reftest_directories = ["tests/ref"] reftest_filetype = ".list" python_dependencies = [ @@ -145,14 +146,25 @@ def check_flake8(file_paths): return num_errors +def check_toml(contents): + contents = contents.splitlines(True) + for idx, line in enumerate(contents): + if line.find("*") != -1: + yield (idx + 1, "found asterisk instead of minimum version number") + + def collect_errors_for_files(files_to_check, checking_functions): for file_name in files_to_check: with open(file_name, "r") as fp: contents = fp.read() - for check in checking_functions: - for error in check(contents): - # filename, line, message + if file_name.endswith(".toml"): + for error in check_toml(contents): yield (file_name, error[0], error[1]) + else: + for check in checking_functions: + for error in check(contents): + # filename, line, message + yield (file_name, error[0], error[1]) def check_reftest_order(files_to_check): diff --git a/python/toml/toml.py b/python/toml/toml.py index aa43ecae9414..8ab0d3e260c6 100644 --- a/python/toml/toml.py +++ b/python/toml/toml.py @@ -1,5 +1,18 @@ import datetime, decimal, re +class TomlTz(datetime.tzinfo): + + def __new__(self, toml_offset): + self._raw_offset = toml_offset + self._hours = int(toml_offset[:3]) + self._minutes = int(toml_offset[4:6]) + + def tzname(self, dt): + return "UTC"+self._raw_offset + + def utcoffset(self, dt): + return datetime.timedelta(hours=self._hours, minutes=self._minutes) + try: _range = xrange except NameError: @@ -8,31 +21,31 @@ basestring = str unichr = chr -def load(f): +def load(f, _dict=dict): """Returns a dictionary containing the named file parsed as toml.""" if isinstance(f, basestring): with open(f) as ffile: - return loads(ffile.read()) + return loads(ffile.read(), _dict) elif isinstance(f, list): for l in f: if not isinstance(l, basestring): raise Exception("Load expects a list to contain filenames only") - d = [] + d = _dict() for l in f: d.append(load(l)) - r = {} + r = _dict() for l in d: toml_merge_dict(r, l) return r elif f.read: - return loads(f.read()) + return loads(f.read(), _dict) else: raise Exception("You can only load a file descriptor, filename or list") -def loads(s): +def loads(s, _dict=dict): """Returns a dictionary containing s, a string, parsed as toml.""" implicitgroups = [] - retval = {} + retval = _dict() currentlevel = retval if isinstance(s, basestring): try: @@ -42,36 +55,93 @@ def loads(s): sl = list(s) openarr = 0 openstring = False + openstrchar = "" + multilinestr = False arrayoftables = False beginline = True keygroup = False + keyname = 0 delnum = 1 for i in range(len(sl)): - if sl[i] == '"': + if sl[i] == '\r' and sl[i+1] == '\n': + sl[i] = ' ' + continue + if keyname: + if sl[i] == '\n': + raise Exception("Key name found without value. Reached end of line.") + if openstring: + if sl[i] == openstrchar: + keyname = 2 + openstring = False + openstrchar = "" + continue + elif keyname == 1: + if sl[i].isspace(): + keyname = 2 + continue + elif sl[i].isalnum() or sl[i] == '_' or sl[i] == '-': + continue + elif keyname == 2 and sl[i].isspace(): + continue + if sl[i] == '=': + keyname = 0 + else: + raise Exception("Found invalid character in key name: '"+sl[i]+"'. Try quoting the key name.") + if sl[i] == "'" and openstrchar != '"': + k = 1 + try: + while sl[i-k] == "'": + k += 1 + if k == 3: + break + except IndexError: + pass + if k == 3: + multilinestr = not multilinestr + openstring = multilinestr + else: + openstring = not openstring + if openstring: + openstrchar = "'" + else: + openstrchar = "" + if sl[i] == '"' and openstrchar != "'": oddbackslash = False + k = 1 + tripquote = False try: - k = 1 - j = sl[i-k] - oddbackslash = False - while j == '\\': + while sl[i-k] == '"': + k += 1 + if k == 3: + tripquote = True + break + while sl[i-k] == '\\': oddbackslash = not oddbackslash k += 1 - j = sl[i-k] except IndexError: pass if not oddbackslash: - openstring = not openstring - if keygroup and (sl[i] == ' ' or sl[i] == '\t'): - keygroup = False - if arrayoftables and (sl[i] == ' ' or sl[i] == '\t'): - arrayoftables = False - if sl[i] == '#' and not openstring and not keygroup and not arrayoftables: + if tripquote: + multilinestr = not multilinestr + openstring = multilinestr + else: + openstring = not openstring + if openstring: + openstrchar = '"' + else: + openstrchar = "" + if sl[i] == '#' and not openstring and not keygroup and \ + not arrayoftables: j = i - while sl[j] != '\n': - sl.insert(j, ' ') - sl.pop(j+1) - j += 1 - if sl[i] == '[' and not openstring and not keygroup and not arrayoftables: + try: + while sl[j] != '\n': + sl.insert(j, ' ') + sl.pop(j+1) + j += 1 + except IndexError: + break + if sl[i] == '[' and not openstring and not keygroup and \ + not arrayoftables: if beginline: if sl[i+1] == '[': arrayoftables = True @@ -88,22 +158,58 @@ def loads(s): else: openarr -= 1 if sl[i] == '\n': - if openstring: - raise Exception("Unbalanced quotes") - if openarr: + if openstring or multilinestr: + if not multilinestr: + raise Exception("Unbalanced quotes") + if sl[i-1] == "'" or sl[i-1] == '"': + sl.insert(i, sl[i-1]) + sl.pop(i+1) + sl[i-3] = ' ' + elif openarr: sl.insert(i, ' ') sl.pop(i+1) else: beginline = True elif beginline and sl[i] != ' ' and sl[i] != '\t': beginline = False - keygroup = True + if not keygroup and not arrayoftables: + if sl[i] == '=': + raise Exception("Found empty keyname. ") + keyname = 1 s = ''.join(sl) s = s.split('\n') else: raise Exception("What exactly are you trying to pull?") + multikey = None + multilinestr = "" + multibackslash = False for line in s: line = line.strip() + if multikey: + if multibackslash: + strippedline = line.lstrip(' \t\n') + if strippedline == '': + continue + multilinestr += strippedline + else: + multilinestr += line + multibackslash = False + if len(line) > 2 and line[-1] == multilinestr[0] and \ + line[-2] == multilinestr[0] and line[-3] == multilinestr[0]: + value, vtype = load_value(multilinestr) + currentlevel[multikey] = value + multikey = None + multilinestr = "" + else: + k = len(multilinestr) -1 + while k > -1 and multilinestr[k] == '\\': + multibackslash = not multibackslash + k -= 1 + if multibackslash: + multilinestr = multilinestr[:-1] + else: + multilinestr += "\n" + continue if line == "": continue if line[0] == '[': @@ -115,12 +221,25 @@ def loads(s): line = line[1:].split(']', 1) if line[1].strip() != "": raise Exception("Key group not on a line by itself.") - line = line[0] - if '[' in line: - raise Exception("Key group name cannot contain '['") - if ']' in line: - raise Exception("Key group name cannot contain']'") - groups = line.split('.') + groups = line[0].split('.') + i = 0 + while i < len(groups): + groups[i] = groups[i].strip() + if groups[i][0] == '"' or groups[i][0] == "'": + groupstr = groups[i] + j = i+1 + while not groupstr[0] == groupstr[-1]: + j += 1 + groupstr = '.'.join(groups[i:j]) + groups[i] = groupstr[1:-1] + j -= 1 + while j > i: + groups.pop(j) + j -= 1 + else: + if not re.match(r'^[A-Za-z0-9_-]+$', groups[i]): + raise Exception("Invalid group name '"+groups[i]+"'. Try quoting it.") + i += 1 currentlevel = retval for i in range(len(groups)): group = groups[i] @@ -134,23 +253,25 @@ def loads(s): if arrayoftables: raise Exception("An implicitly defined table can't be an array") elif arrayoftables: - currentlevel[group].append({}) + currentlevel[group].append(_dict()) else: raise Exception("What? "+group+" already exists?"+str(currentlevel)) except TypeError: if i != len(groups) - 1: implicitgroups.append(group) - currentlevel = currentlevel[0] - if arrayoftables: - currentlevel[group] = [{}] - else: - currentlevel[group] = {} + currentlevel = currentlevel[-1] + try: + currentlevel[group] + except KeyError: + currentlevel[group] = _dict() + if i == len(groups) - 1 and arrayoftables: + currentlevel[group] = [_dict()] except KeyError: if i != len(groups) - 1: implicitgroups.append(group) - currentlevel[group] = {} + currentlevel[group] = _dict() if i == len(groups) - 1 and arrayoftables: - currentlevel[group] = [{}] + currentlevel[group] = [_dict()] currentlevel = currentlevel[group] if arrayoftables: try: @@ -160,32 +281,112 @@ def loads(s): elif "=" in line: i = 1 pair = line.split('=', i) + if re.match(r'^[0-9]', pair[-1]): + pair[-1] = re.sub(r'([0-9])_(?=[0-9])', r'\1', pair[-1]) l = len(line) - while pair[-1][0] != ' ' and pair[-1][0] != '\t' and pair[-1][0] != '"' and pair[-1][0] != '[' and pair[-1] != 'true' and pair[-1] != 'false': + while pair[-1][0] != ' ' and pair[-1][0] != '\t' and \ + pair[-1][0] != "'" and pair[-1][0] != '"' and \ + pair[-1][0] != '[' and pair[-1] != 'true' and \ + pair[-1] != 'false': try: float(pair[-1]) break except ValueError: - try: - datetime.datetime.strptime(pair[-1], "%Y-%m-%dT%H:%M:%SZ") - break - except ValueError: - i += 1 - pair = line.split('=', i) + pass + if load_date(pair[-1]) != None: + break + i += 1 + prev_val = pair[-1] + pair = line.split('=', i) + if re.match(r'^[0-9]', pair[-1]): + pair[-1] = re.sub(r'([0-9])_(?=[0-9])', r'\1', pair[-1]) + if prev_val == pair[-1]: + raise Exception("Invalid date or number") newpair = [] newpair.append('='.join(pair[:-1])) newpair.append(pair[-1]) pair = newpair pair[0] = pair[0].strip() + if (pair[0][0] == '"' or pair[0][0] == "'") and \ + (pair[0][-1] == '"' or pair[0][-1] == "'"): + pair[0] = pair[0][1:-1] pair[1] = pair[1].strip() - value, vtype = load_value(pair[1]) + if len(pair[1]) > 2 and (pair[1][0] == '"' or pair[1][0] == "'") \ + and pair[1][1] == pair[1][0] and pair[1][2] == pair[1][0] \ + and not (len(pair[1]) > 5 and pair[1][-1] == pair[1][0] \ + and pair[1][-2] == pair[1][0] and \ + pair[1][-3] == pair[1][0]): + k = len(pair[1]) -1 + while k > -1 and pair[1][k] == '\\': + multibackslash = not multibackslash + k -= 1 + if multibackslash: + multilinestr = pair[1][:-1] + else: + multilinestr = pair[1] + "\n" + multikey = pair[0] + else: + value, vtype = load_value(pair[1]) try: currentlevel[pair[0]] raise Exception("Duplicate keys!") except KeyError: - currentlevel[pair[0]] = value + if multikey: + continue + else: + currentlevel[pair[0]] = value return retval +def load_date(val): + microsecond = 0 + tz = None + if len(val) > 19 and val[19] == '.': + microsecond = int(val[20:26]) + if len(val) > 26: + tz = TomlTz(val[26:31]) + elif len(val) > 20: + tz = TomlTz(val[19:24]) + try: + d = datetime.datetime(int(val[:4]), int(val[5:7]), int(val[8:10]), int(val[11:13]), int(val[14:16]), int(val[17:19]), microsecond, tz) + except ValueError: + return None + return d + +def load_unicode_escapes(v, hexbytes, prefix): + hexchars = ['0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'] + skip = False + i = len(v) - 1 + while i > -1 and v[i] == '\\': + skip = not skip + i -= 1 + for hx in hexbytes: + if skip: + skip = False + i = len(hx) - 1 + while i > -1 and hx[i] == '\\': + skip = not skip + i -= 1 + v += prefix + v += hx + continue + hxb = "" + i = 0 + hxblen = 4 + if prefix == "\\U": + hxblen = 8 + while i < hxblen: + try: + if not hx[i].lower() in hexchars: + raise IndexError("This is a hack") + except IndexError: + raise Exception("Invalid escape sequence") + hxb += hx[i].lower() + i += 1 + v += unichr(int(hxb, 16)) + v += unicode(hx[len(hxb):]) + return v + def load_value(v): if v == 'true': return (True, "bool") @@ -193,6 +394,8 @@ def load_value(v): return (False, "bool") elif v[0] == '"': testv = v[1:].split('"') + if testv[0] == '' and testv[1] == '': + testv = testv[2:-2] closed = False for tv in testv: if tv == '': @@ -213,76 +416,60 @@ def load_value(v): raise Exception("Stuff after closed string. WTF?") else: closed = True - escapes = ['0', 'b', 'f', '/', 'n', 'r', 't', '"', '\\'] - escapedchars = ['\0', '\b', '\f', '/', '\n', '\r', '\t', '\"', '\\'] + escapes = ['0', 'b', 'f', 'n', 'r', 't', '"', '\\'] + escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"', '\\'] escapeseqs = v.split('\\')[1:] backslash = False for i in escapeseqs: if i == '': backslash = not backslash else: - if i[0] not in escapes and i[0] != 'u' and not backslash: + if i[0] not in escapes and i[0] != 'u' and i[0] != 'U' and \ + not backslash: raise Exception("Reserved escape sequence used") if backslash: backslash = False - if "\\u" in v: - hexchars = ['0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'] - hexbytes = v.split('\\u') - newv = hexbytes[0] - hexbytes = hexbytes[1:] - for hx in hexbytes: - hxb = "" - try: - if hx[0].lower() in hexchars: - hxb += hx[0].lower() - if hx[1].lower() in hexchars: - hxb += hx[1].lower() - if hx[2].lower() in hexchars: - hxb += hx[2].lower() - if hx[3].lower() in hexchars: - hxb += hx[3].lower() - except IndexError: - if len(hxb) != 2: - raise Exception("Invalid escape sequence") - if len(hxb) != 4 and len(hxb) != 2: - raise Exception("Invalid escape sequence") - newv += unichr(int(hxb, 16)) - newv += unicode(hx[len(hxb):]) - v = newv + for prefix in ["\\u", "\\U"]: + if prefix in v: + hexbytes = v.split(prefix) + v = load_unicode_escapes(hexbytes[0], hexbytes[1:], prefix) for i in range(len(escapes)): if escapes[i] == '\\': v = v.replace("\\"+escapes[i], escapedchars[i]) else: v = re.sub("([^\\\\](\\\\\\\\)*)\\\\"+escapes[i], "\\1"+escapedchars[i], v) + if v[1] == '"': + v = v[2:-2] + return (v[1:-1], "str") + elif v[0] == "'": + if v[1] == "'": + v = v[2:-2] return (v[1:-1], "str") elif v[0] == '[': return (load_array(v), "array") - elif len(v) == 20 and v[-1] == 'Z': - if v[10] == 'T': - return (datetime.datetime.strptime(v, "%Y-%m-%dT%H:%M:%SZ"), "date") - else: - raise Exception("Wait, what?") else: - itype = "int" - digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] - neg = False - if v[0] == '-': - neg = True - v = v[1:] - if '.' in v: - if v.split('.', 1)[1] == '': - raise Exception("This float is missing digits after the point") - if v[0] not in digits: - raise Exception("This float doesn't have a leading digit") - v = float(v) - itype = "float" + parsed_date = load_date(v) + if parsed_date != None: + return (parsed_date, "date") else: - v = int(v) - if neg: - return (0 - v, itype) - return (v, itype) - + itype = "int" + digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + neg = False + if v[0] == '-': + neg = True + v = v[1:] + if '.' in v or 'e' in v: + if v.split('.', 1)[1] == '': + raise Exception("This float is missing digits after the point") + if v[0] not in digits: + raise Exception("This float doesn't have a leading digit") + v = float(v) + itype = "float" + else: + v = int(v) + if neg: + return (0 - v, itype) + return (v, itype) def load_array(a): atype = None @@ -363,6 +550,12 @@ def dump_sections(o, sup): retdict = {} arraystr = "" for section in o: + qsection = section + if not re.match(r'^[A-Za-z0-9_-]+$', section): + if '"' in section: + qsection = "'" + section + "'" + else: + qsection = '"' + section + '"' if not isinstance(o[section], dict): arrayoftables = False if isinstance(o[section], list): @@ -372,8 +565,8 @@ def dump_sections(o, sup): if arrayoftables: for a in o[section]: arraytabstr = "" - arraystr += "[["+sup+section+"]]\n" - s, d = dump_sections(a, sup+section) + arraystr += "[["+sup+qsection+"]]\n" + s, d = dump_sections(a, sup+qsection) if s: if s[0] == "[": arraytabstr += s @@ -382,18 +575,20 @@ def dump_sections(o, sup): while d != {}: newd = {} for dsec in d: - s1, d1 = dump_sections(d[dsec], sup+section+dsec) + s1, d1 = dump_sections(d[dsec], sup+qsection+"."+dsec) if s1: - arraytabstr += "["+sup+section+"."+dsec+"]\n" + arraytabstr += "["+sup+qsection+"."+dsec+"]\n" arraytabstr += s1 for s1 in d1: newd[dsec+"."+s1] = d1[s1] d = newd arraystr += arraytabstr else: - retstr += section + " = " + str(dump_value(o[section])) + '\n' + if o[section] is not None: + retstr += (qsection + " = " + + str(dump_value(o[section])) + '\n') else: - retdict[section] = o[section] + retdict[qsection] = o[section] retstr += arraystr return (retstr, retdict) @@ -415,17 +610,22 @@ def dump_value(v): retval += "]" return retval if isinstance(v, (str, unicode)): - escapes = ['\\', '0', 'b', 'f', '/', 'n', 'r', 't', '"'] - escapedchars = ['\\', '\0', '\b', '\f', '/', '\n', '\r', '\t', '\"'] - for i in range(len(escapes)): - v = v.replace(escapedchars[i], "\\"+escapes[i]) + v = "%r" % v + if v[0] == 'u': + v = v[1:] + singlequote = v[0] == "'" + v = v[1:-1] + if singlequote: + v = v.replace("\\'", "'") + v = v.replace('"', '\\"') + v = v.replace("\\x", "\\u00") return str('"'+v+'"') if isinstance(v, bool): return str(v).lower() if isinstance(v, datetime.datetime): return v.isoformat()[:19]+'Z' if isinstance(v, float): - return '{0:f}'.format(decimal.Decimal(str(v))) + return str(v) return v def toml_merge_dict(a, b): @@ -440,4 +640,4 @@ def toml_merge_dict(a, b): else: raise Exception("Can't merge dict and nondict in toml object") a.update(b) - return a + return a \ No newline at end of file