From eed66a9eb67da63e4d27aa7588ef5717f9dd3eff Mon Sep 17 00:00:00 2001
From: Timothy Crosley <timothy.crosley@gmail.com>
Date: Sun, 2 Aug 2020 03:41:52 -0700
Subject: [PATCH] Initial work toward adding experimental support for sorting
 literals #1358

---
 isort/core.py         | 358 ++++++++++++++++++++++++++++++++++++++++++
 isort/literal.py      |  87 ++++++++++
 tests/test_literal.py |  33 ++++
 3 files changed, 478 insertions(+)
 create mode 100644 isort/core.py
 create mode 100644 isort/literal.py
 create mode 100644 tests/test_literal.py

diff --git a/isort/core.py b/isort/core.py
new file mode 100644
index 000000000..4dc8018a5
--- /dev/null
+++ b/isort/core.py
@@ -0,0 +1,358 @@
+import textwrap
+from io import StringIO
+from itertools import chain
+from typing import List, TextIO, Union
+
+import isort.literal
+from isort.settings import DEFAULT_CONFIG, Config
+
+from . import output, parse
+from .exceptions import FileSkipComment
+from .format import format_natural, remove_whitespace
+from .settings import FILE_SKIP_COMMENTS
+
+CIMPORT_IDENTIFIERS = ("cimport ", "cimport*", "from.cimport")
+IMPORT_START_IDENTIFIERS = ("from ", "from.import", "import ", "import*") + CIMPORT_IDENTIFIERS
+COMMENT_INDICATORS = ('"""', "'''", "'", '"', "#")
+CODE_SORT_COMMENTS = (
+    "# isort: list",
+    "# isort: dict",
+    "# isort: set",
+    "# isort: unique-list",
+    "# isort: tuple",
+    "# isort: unique-tuple",
+)
+
+
+def process(
+    input_stream: TextIO,
+    output_stream: TextIO,
+    extension: str = "py",
+    config: Config = DEFAULT_CONFIG,
+) -> bool:
+    """Parses stream identifying sections of contiguous imports and sorting them
+
+    Code with unsorted imports is read from the provided `input_stream`, sorted and then
+    outputted to the specified `output_stream`.
+
+    - `input_stream`: Text stream with unsorted import sections.
+    - `output_stream`: Text stream to output sorted inputs into.
+    - `config`: Config settings to use when sorting imports. Defaults settings.
+        - *Default*: `isort.settings.DEFAULT_CONFIG`.
+    - `extension`: The file extension or file extension rules that should be used.
+        - *Default*: `"py"`.
+        - *Choices*: `["py", "pyi", "pyx"]`.
+
+    Returns `True` if there were changes that needed to be made (errors present) from what
+    was provided in the input_stream, otherwise `False`.
+    """
+    line_separator: str = config.line_ending
+    add_imports: List[str] = [format_natural(addition) for addition in config.add_imports]
+    import_section: str = ""
+    next_import_section: str = ""
+    next_cimports: bool = False
+    in_quote: str = ""
+    first_comment_index_start: int = -1
+    first_comment_index_end: int = -1
+    contains_imports: bool = False
+    in_top_comment: bool = False
+    first_import_section: bool = True
+    section_comments = [f"# {heading}" for heading in config.import_headings.values()]
+    indent: str = ""
+    isort_off: bool = False
+    code_sorting: Union[bool, str] = False
+    code_sorting_section: str = ""
+    cimports: bool = False
+    made_changes: bool = False
+
+    if config.float_to_top:
+        new_input = ""
+        current = ""
+        isort_off = False
+        for line in chain(input_stream, (None,)):
+            if isort_off and line is not None:
+                if line == "# isort: on\n":
+                    isort_off = False
+                new_input += line
+            elif line in ("# isort: split\n", "# isort: off\n", None) or str(line).endswith(
+                "# isort: split\n"
+            ):
+                if line == "# isort: off\n":
+                    isort_off = True
+                if current:
+                    parsed = parse.file_contents(current, config=config)
+                    extra_space = ""
+                    while current[-1] == "\n":
+                        extra_space += "\n"
+                        current = current[:-1]
+                    extra_space = extra_space.replace("\n", "", 1)
+                    sorted_output = output.sorted_imports(
+                        parsed, config, extension, import_type="import"
+                    )
+                    if sorted_output.strip() != current.strip():
+                        made_changes = True
+                    new_input += sorted_output
+                    new_input += extra_space
+                    current = ""
+                new_input += line or ""
+            else:
+                current += line or ""
+
+        input_stream = StringIO(new_input)
+
+    for index, line in enumerate(chain(input_stream, (None,))):
+        if line is None:
+            if index == 0 and not config.force_adds:
+                return False
+
+            not_imports = True
+            line = ""
+            if not line_separator:
+                line_separator = "\n"
+
+            if code_sorting and code_sorting_section:
+                output_stream.write(
+                    isort.literal.assignment(code_sorting_section, str(code_sorting), extension)
+                )
+        else:
+            stripped_line = line.strip()
+            if stripped_line and not line_separator:
+                line_separator = line[len(line.rstrip()) :].replace(" ", "").replace("\t", "")
+
+            for file_skip_comment in FILE_SKIP_COMMENTS:
+                if file_skip_comment in line:
+                    raise FileSkipComment("Passed in content")
+
+            if (
+                (index == 0 or (index in (1, 2) and not contains_imports))
+                and stripped_line.startswith("#")
+                and stripped_line not in section_comments
+            ):
+                in_top_comment = True
+            elif in_top_comment:
+                if not line.startswith("#") or stripped_line in section_comments:
+                    in_top_comment = False
+                    first_comment_index_end = index - 1
+
+            if (not stripped_line.startswith("#") or in_quote) and '"' in line or "'" in line:
+                char_index = 0
+                if first_comment_index_start == -1 and (
+                    line.startswith('"') or line.startswith("'")
+                ):
+                    first_comment_index_start = index
+                while char_index < len(line):
+                    if line[char_index] == "\\":
+                        char_index += 1
+                    elif in_quote:
+                        if line[char_index : char_index + len(in_quote)] == in_quote:
+                            in_quote = ""
+                            if first_comment_index_end < first_comment_index_start:
+                                first_comment_index_end = index
+                    elif line[char_index] in ("'", '"'):
+                        long_quote = line[char_index : char_index + 3]
+                        if long_quote in ('"""', "'''"):
+                            in_quote = long_quote
+                            char_index += 2
+                        else:
+                            in_quote = line[char_index]
+                    elif line[char_index] == "#":
+                        break
+                    char_index += 1
+
+            not_imports = bool(in_quote) or in_top_comment or isort_off
+            if not (in_quote or in_top_comment):
+                stripped_line = line.strip()
+                if isort_off:
+                    if stripped_line == "# isort: on":
+                        isort_off = False
+                elif stripped_line == "# isort: off":
+                    not_imports = True
+                    isort_off = True
+                elif stripped_line.endswith("# isort: split"):
+                    not_imports = True
+                elif stripped_line in CODE_SORT_COMMENTS:
+                    code_sorting = stripped_line.split("isort: ")[1].strip()
+                    not_imports = True
+                elif code_sorting:
+                    if not stripped_line:
+                        output_stream.write(
+                            isort.literal.assignment(
+                                code_sorting_section, str(code_sorting), extension
+                            )
+                        )
+                        not_imports = True
+                        code_sorting = False
+                        code_sorting_section = ""
+                    else:
+                        code_sorting_section += line
+                        line = ""
+                elif stripped_line in config.section_comments and not import_section:
+                    import_section += line
+                    indent = line[: -len(line.lstrip())]
+                elif not (stripped_line or contains_imports):
+                    if add_imports and not indent and not config.append_only:
+                        if not import_section:
+                            output_stream.write(line)
+                            line = ""
+                        import_section += line_separator.join(add_imports) + line_separator
+                        contains_imports = True
+                        add_imports = []
+                    else:
+                        not_imports = True
+                elif (
+                    not stripped_line
+                    or stripped_line.startswith("#")
+                    and (not indent or indent + line.lstrip() == line)
+                    and not config.treat_all_comments_as_code
+                    and stripped_line not in config.treat_comments_as_code
+                ):
+                    import_section += line
+                elif stripped_line.startswith(IMPORT_START_IDENTIFIERS):
+                    contains_imports = True
+
+                    new_indent = line[: -len(line.lstrip())]
+                    import_statement = line
+                    stripped_line = line.strip().split("#")[0]
+                    while stripped_line.endswith("\\") or (
+                        "(" in stripped_line and ")" not in stripped_line
+                    ):
+                        if stripped_line.endswith("\\"):
+                            while stripped_line and stripped_line.endswith("\\"):
+                                line = input_stream.readline()
+                                stripped_line = line.strip().split("#")[0]
+                                import_statement += line
+                        else:
+                            while ")" not in stripped_line:
+                                line = input_stream.readline()
+                                stripped_line = line.strip().split("#")[0]
+                                import_statement += line
+
+                    cimport_statement: bool = False
+                    if (
+                        import_statement.lstrip().startswith(CIMPORT_IDENTIFIERS)
+                        or " cimport " in import_statement
+                        or " cimport*" in import_statement
+                        or " cimport(" in import_statement
+                        or ".cimport" in import_statement
+                    ):
+                        cimport_statement = True
+
+                    if cimport_statement != cimports or (new_indent != indent and import_section):
+                        if import_section:
+                            next_cimports = cimport_statement
+                            next_import_section = import_statement
+                            import_statement = ""
+                            not_imports = True
+                            line = ""
+                        else:
+                            cimports = cimport_statement
+
+                    indent = new_indent
+                    import_section += import_statement
+                else:
+                    not_imports = True
+
+        if not_imports:
+            raw_import_section: str = import_section
+            if (
+                add_imports
+                and not config.append_only
+                and not in_top_comment
+                and not in_quote
+                and not import_section
+                and not line.lstrip().startswith(COMMENT_INDICATORS)
+            ):
+                import_section = line_separator.join(add_imports) + line_separator
+                contains_imports = True
+                add_imports = []
+
+            if next_import_section and not import_section:  # pragma: no cover
+                raw_import_section = import_section = next_import_section
+                next_import_section = ""
+
+            if import_section:
+                if add_imports and not indent:
+                    import_section = (
+                        line_separator.join(add_imports) + line_separator + import_section
+                    )
+                    contains_imports = True
+                    add_imports = []
+
+                if not indent:
+                    import_section += line
+                    raw_import_section += line
+                if not contains_imports:
+                    output_stream.write(import_section)
+                else:
+                    leading_whitespace = import_section[: -len(import_section.lstrip())]
+                    trailing_whitespace = import_section[len(import_section.rstrip()) :]
+                    if first_import_section and not import_section.lstrip(
+                        line_separator
+                    ).startswith(COMMENT_INDICATORS):
+                        import_section = import_section.lstrip(line_separator)
+                        raw_import_section = raw_import_section.lstrip(line_separator)
+                        first_import_section = False
+
+                    if indent:
+                        import_section = "".join(
+                            line[len(indent) :] for line in import_section.splitlines(keepends=True)
+                        )
+                        out_config = Config(
+                            config=config,
+                            line_length=max(config.line_length - len(indent), 0),
+                            wrap_length=max(config.wrap_length - len(indent), 0),
+                            lines_after_imports=1,
+                        )
+                    else:
+                        out_config = config
+
+                    sorted_import_section = output.sorted_imports(
+                        parse.file_contents(import_section, config=config),
+                        out_config,
+                        extension,
+                        import_type="cimport" if cimports else "import",
+                    )
+                    if not (import_section.strip() and not sorted_import_section):
+                        if indent:
+                            sorted_import_section = (
+                                leading_whitespace
+                                + textwrap.indent(sorted_import_section, indent).strip()
+                                + trailing_whitespace
+                            )
+
+                        if not made_changes:
+                            if config.ignore_whitespace:
+                                compare_in = remove_whitespace(
+                                    raw_import_section, line_separator=line_separator
+                                ).strip()
+                                compare_out = remove_whitespace(
+                                    sorted_import_section, line_separator=line_separator
+                                ).strip()
+                            else:
+                                compare_in = raw_import_section.strip()
+                                compare_out = sorted_import_section.strip()
+
+                            if compare_out != compare_in:
+                                made_changes = True
+
+                        output_stream.write(sorted_import_section)
+                        if not line and not indent and next_import_section:
+                            output_stream.write(line_separator)
+
+                if indent:
+                    output_stream.write(line)
+                    if not next_import_section:
+                        indent = ""
+
+                if next_import_section:
+                    cimports = next_cimports
+                    contains_imports = True
+                else:
+                    contains_imports = False
+                import_section = next_import_section
+                next_import_section = ""
+            else:
+                output_stream.write(line)
+                not_imports = False
+
+    return made_changes
diff --git a/isort/literal.py b/isort/literal.py
new file mode 100644
index 000000000..c4b101903
--- /dev/null
+++ b/isort/literal.py
@@ -0,0 +1,87 @@
+import ast
+from pprint import PrettyPrinter
+from typing import Any, Callable, Dict, List, Set, Tuple
+
+from isort.exceptions import LiteralParsingFailure, LiteralSortTypeMismatch
+from isort.settings import DEFAULT_CONFIG, Config
+
+
+class ISortPrettyPrinter(PrettyPrinter):
+    """an isort customized pretty printer for sorted literals"""
+
+    def __init__(self, config: Config):
+        super().__init__(width=config.line_length, compact=True)
+
+
+type_mapping: Dict[str, Tuple[type, Callable[[Any, ISortPrettyPrinter], str]]] = {}
+
+
+def assignment(code: str, sort_type: str, extension: str, config: Config = DEFAULT_CONFIG) -> str:
+    """Sorts the literal present within the provided code against the provided sort type,
+    returning the sorted representation of the source code.
+    """
+    if sort_type not in type_mapping:
+        raise ValueError(
+            "Trying to sort using an undefined sort_type. "
+            f"Defined sort types are {', '.join(type_mapping.keys())}."
+        )
+
+    variable_name, literal = code.split(" = ")
+    try:
+        value = ast.literal_eval(literal)
+    except Exception as error:
+        raise LiteralParsingFailure(code, error)
+
+    expected_type, sort_function = type_mapping[sort_type]
+    if type(value) != expected_type:
+        raise LiteralSortTypeMismatch(type(value), expected_type)
+
+    printer = ISortPrettyPrinter(config)
+    sorted_value_code = sort_function(value, printer)
+    if config.formatting_function:
+        sorted_value_code = config.formatting_function(
+            sorted_value_code, extension, config
+        ).rstrip()
+
+    sorted_value_code += code[len(code.rstrip()) :]
+    return f"{variable_name} = {sorted_value_code}"
+
+
+def register_type(name: str, kind: type):
+    """Registers a new literal sort type."""
+
+    def wrap(function):
+        type_mapping[name] = (kind, function)
+        return function
+
+    return wrap
+
+
+@register_type("dict", dict)
+def _dict(value: Dict[Any, Any], printer: ISortPrettyPrinter) -> str:
+    return printer.pformat(dict(sorted(value.items(), key=lambda item: item[1])))
+
+
+@register_type("list", list)
+def _list(value: List[Any], printer: ISortPrettyPrinter) -> str:
+    return printer.pformat(sorted(value))
+
+
+@register_type("unique-list", list)
+def _unique_list(value: List[Any], printer: ISortPrettyPrinter) -> str:
+    return printer.pformat(list(sorted(set(value))))
+
+
+@register_type("set", set)
+def _set(value: Set[Any], printer: ISortPrettyPrinter) -> str:
+    return "{" + printer.pformat(tuple(sorted(value)))[1:-1] + "}"
+
+
+@register_type("tuple", tuple)
+def _tuple(value: Tuple[Any, ...], printer: ISortPrettyPrinter) -> str:
+    return printer.pformat(tuple(sorted(value)))
+
+
+@register_type("unique-tuple", tuple)
+def _unique_tuple(value: Tuple[Any, ...], printer: ISortPrettyPrinter) -> str:
+    return printer.pformat(tuple(sorted(set(value))))
diff --git a/tests/test_literal.py b/tests/test_literal.py
new file mode 100644
index 000000000..d94dd0812
--- /dev/null
+++ b/tests/test_literal.py
@@ -0,0 +1,33 @@
+import pytest
+
+import isort.literal
+from isort import exceptions
+from isort.settings import Config
+
+
+def test_value_mismatch():
+    with pytest.raises(exceptions.LiteralSortTypeMismatch):
+        isort.literal.assignment("x = [1, 2, 3]", "set", "py")
+
+
+def test_invalid_syntax():
+    with pytest.raises(exceptions.LiteralParsingFailure):
+        isort.literal.assignment("x = [1, 2, 3", "list", "py")
+
+
+def test_invalid_sort_type():
+    with pytest.raises(ValueError):
+        isort.literal.assignment("x = [1, 2, 3", "tuple-list-not-exist", "py")
+
+
+def test_invalid_sort_type():
+    with pytest.raises(ValueError):
+        isort.literal.assignment("x = [1, 2, 3", "tuple-list-not-exist", "py")
+
+
+def test_value_assignment():
+    assert isort.literal.assignment("x = ['b', 'a']", "list", "py") == "x = ['a', 'b']"
+    assert (
+        isort.literal.assignment("x = ['b', 'a']", "list", "py", Config(formatter="example"))
+        == 'x = ["a", "b"]'
+    )