From 9d1d1ddeba21abd2094079a4b3ba77320dd8080b Mon Sep 17 00:00:00 2001 From: Toilal Date: Wed, 23 Dec 2020 21:47:44 +0100 Subject: [PATCH] feat: Add python 3.8/3.9 support, drop python 2.7/3.4 support Releases are now automated with python-semantic-release BREAKING CHANGE: Python 2.7 and 3.4 support have been dropped --- .github/workflows/ci.yml | 100 +++++++ .travis.yml | 42 --- CHANGELOG.md | 4 + README.md | 564 ++++++++++++++++++++++++++++++++++++ README.rst | 521 --------------------------------- rebulk/__version__.py | 2 +- rebulk/builder.py | 7 +- rebulk/chain.py | 4 +- rebulk/introspector.py | 4 +- rebulk/match.py | 22 +- rebulk/pattern.py | 16 +- rebulk/rebulk.py | 2 +- rebulk/rules.py | 12 +- rebulk/test/test_match.py | 24 +- rebulk/test/test_pattern.py | 2 +- rebulk/toposort.py | 2 +- setup.cfg | 7 +- setup.py | 19 +- tox.ini | 12 +- 19 files changed, 720 insertions(+), 646 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml create mode 100644 CHANGELOG.md create mode 100644 README.md delete mode 100644 README.rst diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..6fa3785 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,100 @@ +name: ci +on: + push: ~ + pull_request: ~ +jobs: + build: + if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: [ 3.5, 3.6, 3.7, 3.8, 3.9, pypy-3.6, pypy-3.7 ] + python-regex: [ True, False ] + + steps: + - name: Setup python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Checkout + uses: actions/checkout@v2 + + - name: Git User config + run: | + git config --global user.email "action@github.com" + git config --global user.name "github-actions" + + - name: Install Dependencies + run: | + pip install -e .[dev,test] + pip install coveralls + + - name: Install regex + run: | + pip install regex + if: ${{ matrix.python-regex }} + + - run: pylint rebulk + if: matrix.python-version != '3.9' + + - run: coverage run --source=rebulk setup.py test + + - run: python setup.py build + + - name: Coveralls + run: coveralls + env: + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + + release: + if: ${{ github.ref == 'refs/heads/master' && github.event_name == 'push' }} + needs: build + + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: [ 3.8 ] + + steps: + - name: Setup python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Git User config + run: | + git config --global user.email "action@github.com" + git config --global user.name "github-actions" + + - name: Install Dependencies + run: pip install -e .[dev,test] + + - name: Install python-semantic-release + run: pip install python-semantic-release + + - name: Publish release + run: semantic-release -v DEBUG publish + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + + - name: Merge master to develop + # uses: robotology/gh-action-nightly-merge@v1.3.1 # Wait PR merge https://github.com/robotology/gh-action-nightly-merge/pull/5 + uses: Toilal/gh-action-nightly-merge@master + with: + stable_branch: 'master' + development_branch: 'develop' + allow_ff: true + user_name: github-actions + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 47c8a7c..0000000 --- a/.travis.yml +++ /dev/null @@ -1,42 +0,0 @@ -language: python -python: - - 2.7 - - 3.5 - - 3.6 - - pypy -arch: - - amd64 - - ppc64le -matrix: - include: - - python: 3.7 - dist: xenial - sudo: true - - python: 3.8-dev - dist: xenial - sudo: true - - arch: ppc64le - python: 3.7 - dist: xenial - sudo: true - - arch: ppc64le - python: 3.8-dev - dist: xenial - sudo: true - exclude: # Disable pypy version for power support - - arch: ppc64le - python: pypy -env: - matrix: - - PYTHON_REGEX=false - - PYTHON_REGEX=true -install: - - pip install pip --upgrade - - pip install -e .[dev,test] - - if [[ "$PYTHON_REGEX" == "true" ]]; then pip install regex; fi; - - pip install coveralls -script: - - coverage run --source=rebulk setup.py test - - python setup.py build -after_success: - - coveralls diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..0be02a9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,4 @@ +Changelog +========= + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..e2998c1 --- /dev/null +++ b/README.md @@ -0,0 +1,564 @@ +--- +title: ReBulk +--- + +[![Latest Version](http://img.shields.io/pypi/v/rebulk.svg)](https://pypi.python.org/pypi/rebulk) +[![MIT License](http://img.shields.io/badge/license-MIT-blue.svg)](https://pypi.python.org/pypi/rebulk) +[![Build Status](https://img.shields.io/github/workflow/status/Toilal/rebulk/ci)](https://github.com/Toilal/rebulk/actions?query=workflow%3Aci) +[![Coveralls](http://img.shields.io/coveralls/Toilal/rebulk.svg)](https://coveralls.io/r/Toilal/rebulk?branch=master) +[![semantic-release](https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg)](https://github.com/relekang/python-semantic-release) + +ReBulk is a python library that performs advanced searches in strings +that would be hard to implement using [re +module](https://docs.python.org/3/library/re.html) or [String +methods](https://docs.python.org/3/library/stdtypes.html#str) only. + +It includes some features like `Patterns`, `Match`, `Rule` that allows +developers to build a custom and complex string matcher using a readable +and extendable API. + +This project is hosted on GitHub: + +Install +======= + +```sh +$ pip install rebulk +``` + +Usage +===== + +Regular expression, string and function based patterns are declared in a +`Rebulk` object. It use a fluent API to chain `string`, `regex`, and +`functional` methods to define various patterns types. + +```python +>>> from rebulk import Rebulk +>>> bulk = Rebulk().string('brown').regex(r'qu\w+').functional(lambda s: (20, 25)) +``` + +When `Rebulk` object is fully configured, you can call `matches` method +with an input string to retrieve all `Match` objects found by registered +pattern. + +```python +>>> bulk.matches("The quick brown fox jumps over the lazy dog") +[, , ] +``` + +If multiple `Match` objects are found at the same position, only the +longer one is kept. + +```python +>>> bulk = Rebulk().string('lakers').string('la') +>>> bulk.matches("the lakers are from la") +[, ] +``` + +String Patterns +=============== + +String patterns are based on +[str.find](https://docs.python.org/3/library/stdtypes.html#str.find) +method to find matches, but returns all matches in the string. +`ignore_case` can be enabled to ignore case. + +```python +>>> Rebulk().string('la').matches("lalalilala") +[, , , ] + +>>> Rebulk().string('la').matches("LalAlilAla") +[] + +>>> Rebulk().string('la', ignore_case=True).matches("LalAlilAla") +[, , , ] +``` + +You can define several patterns with a single `string` method call. + +```python +>>> Rebulk().string('Winter', 'coming').matches("Winter is coming...") +[, ] +``` + +Regular Expression Patterns +=========================== + +Regular Expression patterns are based on a compiled regular expression. +[re.finditer](https://docs.python.org/3/library/re.html#re.finditer) +method is used to find matches. + +If [regex module](https://pypi.python.org/pypi/regex) is available, it +will be used by rebulk instead of default [re +module](https://docs.python.org/3/library/re.html). + +```python +>>> Rebulk().regex(r'l\w').matches("lolita") +[, ] +``` + +You can define several patterns with a single `regex` method call. + +```python +>>> Rebulk().regex(r'Wint\wr', r'com\w{3}').matches("Winter is coming...") +[, ] +``` + +All keyword arguments from +[re.compile](https://docs.python.org/3/library/re.html#re.compile) are +supported. + +```python +>>> import re # import required for flags constant +>>> Rebulk().regex('L[A-Z]KERS', flags=re.IGNORECASE) \ +... .matches("The LaKeRs are from La") +[] + +>>> Rebulk().regex('L[A-Z]', 'L[A-Z]KERS', flags=re.IGNORECASE) \ +... .matches("The LaKeRs are from La") +[, ] + +>>> Rebulk().regex(('L[A-Z]', re.IGNORECASE), ('L[a-z]KeRs')) \ +... .matches("The LaKeRs are from La") +[, ] +``` + +If [regex module](https://pypi.python.org/pypi/regex) is available, it +automatically supports repeated captures. + +```python +>>> # If regex module is available, repeated_captures is True by default. +>>> matches = Rebulk().regex(r'(\d+)(?:-(\d+))+').matches("01-02-03-04") +>>> matches[0].children # doctest:+SKIP +[<01:(0, 2)>, <02:(3, 5)>, <03:(6, 8)>, <04:(9, 11)>] + +>>> # If regex module is not available, or if repeated_captures is forced to False. +>>> matches = Rebulk().regex(r'(\d+)(?:-(\d+))+', repeated_captures=False) \ +... .matches("01-02-03-04") +>>> matches[0].children +[<01:(0, 2)+initiator=01-02-03-04>, <04:(9, 11)+initiator=01-02-03-04>] +``` + +- `abbreviations` + + Defined as a list of 2-tuple, each tuple is an abbreviation. It + simply replace `tuple[0]` with `tuple[1]` in the expression. + + \>\>\> Rebulk().regex(r\'Custom-separators\', + abbreviations=\[(\"-\", r\"\[W\_\]+\")\])\... + .matches(\"Custom\_separators using-abbreviations\") + \[\\] + +Functional Patterns +=================== + +Functional Patterns are based on the evaluation of a function. + +The function should have the same parameters as `Rebulk.matches` method, +that is the input string, and must return at least start index and end +index of the `Match` object. + +```python +>>> def func(string): +... index = string.find('?') +... if index > -1: +... return 0, index - 11 +>>> Rebulk().functional(func).matches("Why do simple ? Forget about it ...") +[] +``` + +You can also return a dict of keywords arguments for `Match` object. + +You can define several patterns with a single `functional` method call, +and function used can return multiple matches. + +Chain Patterns +============== + +Chain Patterns are ordered composition of string, functional and regex +patterns. Repeater can be set to define repetition on chain part. + +```python +>>> r = Rebulk().regex_defaults(flags=re.IGNORECASE)\ +... .defaults(children=True, formatter={'episode': int, 'version': int})\ +... .chain()\ +... .regex(r'e(?P\d{1,4})').repeater(1)\ +... .regex(r'v(?P\d+)').repeater('?')\ +... .regex(r'[ex-](?P\d{1,4})').repeater('*')\ +... .close() # .repeater(1) could be omitted as it's the default behavior +>>> r.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict +MatchesDict([('episode', [14, 15, 16, 17]), ('version', 2)]) +``` + +Patterns parameters +=================== + +All patterns have options that can be given as keyword arguments. + +- `validator` + + Function to validate `Match` value given by the pattern. Can also be + a `dict`, to use `validator` with pattern named with key. + + ```python + >>> def check_leap_year(match): + ... return int(match.value) in [1980, 1984, 1988] + >>> matches = Rebulk().regex(r'\d{4}', validator=check_leap_year) \ + ... .matches("In year 1982 ...") + >>> len(matches) + 0 + >>> matches = Rebulk().regex(r'\d{4}', validator=check_leap_year) \ + ... .matches("In year 1984 ...") + >>> len(matches) + 1 + ``` + +Some base validator functions are available in `rebulk.validators` +module. Most of those functions have to be configured using +`functools.partial` to map them to function accepting a single `match` +argument. + +- `formatter` + + Function to convert `Match` value given by the pattern. Can also be + a `dict`, to use `formatter` with matches named with key. + + ```python + >>> def year_formatter(value): + ... return int(value) + >>> matches = Rebulk().regex(r'\d{4}', formatter=year_formatter) \ + ... .matches("In year 1982 ...") + >>> isinstance(matches[0].value, int) + True + ``` + +- `pre_match_processor` / `post_match_processor` + + Function to mutagen or invalidate a match generated by a pattern. + + Function has a single parameter which is the Match object. If + function returns False, it will be considered as an invalid match. + If function returns a match instance, it will replace the original + match with this instance in the process. + +- `post_processor` + + Function to change the default output of the pattern. Function + parameters are Matches list and Pattern object. + +- `name` + + The name of the pattern. It is automatically passed to `Match` + objects generated by this pattern. + +- `tags` + + A list of string that qualifies this pattern. + +- `value` + + Override value property for generated `Match` objects. Can also be a + `dict`, to use `value` with pattern named with key. + +- `validate_all` + + By default, validator is called for returned `Match` objects only. + Enable this option to validate them all, parent and children + included. + +- `format_all` + + By default, formatter is called for returned `Match` values only. + Enable this option to format them all, parent and children included. + +- `disabled` + + A `function(context)` to disable the pattern if returning `True`. + +- `children` + + If `True`, all children `Match` objects will be retrieved instead of + a single parent `Match` object. + +- `private` + + If `True`, `Match` objects generated from this pattern are available + internally only. They will be removed at the end of `Rebulk.matches` + method call. + +- `private_parent` + + Force parent matches to be returned and flag them as private. + +- `private_children` + + Force children matches to be returned and flag them as private. + +- `private_names` + + Matches names that will be declared as private + +- `ignore_names` + + Matches names that will be ignored from the pattern output, after + validation. + +- `marker` + + If `true`, `Match` objects generated from this pattern will be + markers matches instead of standard matches. They won\'t be included + in `Matches` sequence, but will be available in `Matches.markers` + sequence (see `Markers` section). + +Match +===== + +A `Match` object is the result created by a registered pattern. + +It has a `value` property defined, and position indices are available +through `start`, `end` and `span` properties. + +In some case, it contains children `Match` objects in `children` +property, and each child `Match` object reference its parent in `parent` +property. Also, a `name` property can be defined for the match. + +If groups are defined in a Regular Expression pattern, each group match +will be converted to a single `Match` object. If a group has a name +defined (`(?Pgroup)`), it is set as `name` property in a child +`Match` object. The whole regexp match (`re.group(0)`) will be converted +to the main `Match` object, and all subgroups (1, 2, \... n) will be +converted to `children` matches of the main `Match` object. + +```python +>>> matches = Rebulk() \ +... .regex(r"One, (?P\w+), Two, (?P\w+), Three, (?P\w+)") \ +... .matches("Zero, 0, One, 1, Two, 2, Three, 3, Four, 4") +>>> matches +[] +>>> for child in matches[0].children: +... '%s = %s' % (child.name, child.value) +'one = 1' +'two = 2' +'three = 3' +``` + +It\'s possible to retrieve only children by using `children` parameters. +You can also customize the way structure is generated with `every`, +`private_parent` and `private_children` parameters. + +```python +>>> matches = Rebulk() \ +... .regex(r"One, (?P\w+), Two, (?P\w+), Three, (?P\w+)", children=True) \ +... .matches("Zero, 0, One, 1, Two, 2, Three, 3, Four, 4") +>>> matches +[<1:(14, 15)+name=one+initiator=One, 1, Two, 2, Three, 3>, <2:(22, 23)+name=two+initiator=One, 1, Two, 2, Three, 3>, <3:(32, 33)+name=three+initiator=One, 1, Two, 2, Three, 3>] +``` + +Match object has the following properties that can be given to Pattern +objects + +- `formatter` + + Function to convert `Match` value given by the pattern. Can also be + a `dict`, to use `formatter` with matches named with key. + + ```python + >>> def year_formatter(value): + ... return int(value) + >>> matches = Rebulk().regex(r'\d{4}', formatter=year_formatter) \ + ... .matches("In year 1982 ...") + >>> isinstance(matches[0].value, int) + True + ``` + +- `format_all` + + By default, formatter is called for returned `Match` values only. + Enable this option to format them all, parent and children included. + +- `conflict_solver` + + A `function(match, conflicting_match)` used to solve conflict. + Returned object will be removed from matches by `ConflictSolver` + default rule. If `__default__` string is returned, it will fallback + to default behavior keeping longer match. + +Matches +======= + +A `Matches` object holds the result of `Rebulk.matches` method call. +It\'s a sequence of `Match` objects and it behaves like a list. + +All methods accepts a `predicate` function to filter `Match` objects +using a callable, and an `index` int to retrieve a single element from +default returned matches. + +It has the following additional methods and properties on it. + +- `starting(index, predicate=None, index=None)` + + Retrieves a list of `Match` objects that starts at given index. + +- `ending(index, predicate=None, index=None)` + + Retrieves a list of `Match` objects that ends at given index. + +- `previous(match, predicate=None, index=None)` + + Retrieves a list of `Match` objects that are previous and nearest to + match. + +- `next(match, predicate=None, index=None)` + + Retrieves a list of `Match` objects that are next and nearest to + match. + +- `tagged(tag, predicate=None, index=None)` + + Retrieves a list of `Match` objects that have the given tag defined. + +- `named(name, predicate=None, index=None)` + + Retrieves a list of `Match` objects that have the given name. + +- `range(start=0, end=None, predicate=None, index=None)` + + Retrieves a list of `Match` objects for given range, sorted from + start to end. + +- `holes(start=0, end=None, formatter=None, ignore=None, predicate=None, index=None)` + + Retrieves a list of *hole* `Match` objects for given range. A hole + match is created for each range where no match is available. + +- `conflicting(match, predicate=None, index=None)` + + Retrieves a list of `Match` objects that conflicts with given match. + +- `chain_before(self, position, seps, start=0, predicate=None, index=None)`: + + Retrieves a list of chained matches, before position, matching + predicate and separated by characters from seps only. + +- `chain_after(self, position, seps, end=None, predicate=None, index=None)`: + + Retrieves a list of chained matches, after position, matching + predicate and separated by characters from seps only. + +- `at_match(match, predicate=None, index=None)` + + Retrieves a list of `Match` objects at the same position as match. + +- `at_span(span, predicate=None, index=None)` + + Retrieves a list of `Match` objects from given (start, end) tuple. + +- `at_index(pos, predicate=None, index=None)` + + Retrieves a list of `Match` objects from given position. + +- `names` + + Retrieves a sequence of all `Match.name` properties. + +- `tags` + + Retrieves a sequence of all `Match.tags` properties. + +- `to_dict(details=False, first_value=False, enforce_list=False)` + + Convert to an ordered dict, with `Match.name` as key and + `Match.value` as value. + + It\'s a subclass of + [OrderedDict](https://docs.python.org/2/library/collections.html#collections.OrderedDict), + that contains a `matches` property which is a dict with `Match.name` + as key and list of `Match` objects as value. + + If `first_value` is `True` and distinct values are found for the + same name, value will be wrapped to a list. If `False`, first value + only will be kept and values lists can be retrieved with + `values_list` which is a dict with `Match.name` as key and list of + `Match.value` as value. + + if `enforce_list` is `True`, all values will be wrapped to a list, + even if a single value is found. + + If `details` is True, `Match.value` objects are replaced with + complete `Match` object. + +- `markers` + + A custom `Matches` sequences specialized for `markers` matches (see + below) + +Markers +======= + +If you have defined some patterns with `markers` property, then +`Matches.markers` points to a special `Matches` sequence that contains +only `markers` matches. This sequence supports all methods from +`Matches`. + +Markers matches are not intended to be used in final result, but can be +used to implement a `Rule`. + +Rules +===== + +Rules are a convenient and readable way to implement advanced +conditional logic involving several `Match` objects. When a rule is +triggered, it can perform an action on `Matches` object, like filtering +out, adding additional tags or renaming. + +Rules are implemented by extending the abstract `Rule` class. They are +registered using `Rebulk.rule` method by giving either a `Rule` +instance, a `Rule` class or a module containing `Rule classes` only. + +For a rule to be triggered, `Rule.when` method must return `True`, or a +non empty list of `Match` objects, or any other truthy object. When +triggered, `Rule.then` method is called to perform the action with +`when_response` parameter defined as the response of `Rule.when` call. + +Instead of implementing `Rule.then` method, you can define `consequence` +class property with a Consequence classe or instance, like +`RemoveMatch`, `RenameMatch` or `AppendMatch`. You can also use a list +of consequence when required : `when_response` must then be iterable, +and elements of this iterable will be given to each consequence in the +same order. + +When many rules are registered, it can be useful to set `priority` class +variable to define a priority integer between all rule executions +(higher priorities will be executed first). You can also define +`dependency` to declare another Rule class as dependency for the current +rule, meaning that it will be executed before. + +For all rules with the same `priority` value, `when` is called before, +and `then` is called after all. + +```python +>>> from rebulk import Rule, RemoveMatch + +>>> class FirstOnlyRule(Rule): +... consequence = RemoveMatch +... +... def when(self, matches, context): +... grabbed = matches.named("grabbed", 0) +... if grabbed and matches.previous(grabbed): +... return grabbed + +>>> rebulk = Rebulk() + +>>> rebulk.regex("This match(.*?)grabbed", name="grabbed") +<...Rebulk object ...> +>>> rebulk.regex("if it's(.*?)first match", private=True) +<...Rebulk object at ...> +>>> rebulk.rules(FirstOnlyRule) +<...Rebulk object at ...> + +>>> rebulk.matches("This match is grabbed only if it's the first match") +[] +>>> rebulk.matches("if it's NOT the first match, This match is NOT grabbed") +[] +``` diff --git a/README.rst b/README.rst deleted file mode 100644 index ae43caa..0000000 --- a/README.rst +++ /dev/null @@ -1,521 +0,0 @@ -ReBulk -======= - -.. image:: http://img.shields.io/pypi/v/rebulk.svg - :target: https://pypi.python.org/pypi/rebulk - :alt: Latest Version - -.. image:: http://img.shields.io/badge/license-MIT-blue.svg - :target: https://pypi.python.org/pypi/rebulk - :alt: MIT License - -.. image:: http://img.shields.io/travis/Toilal/rebulk.svg - :target: http://travis-ci.org/Toilal/rebulk?branch=master - :alt: Build Status - -.. image:: http://img.shields.io/coveralls/Toilal/rebulk.svg - :target: https://coveralls.io/r/Toilal/rebulk?branch=master - :alt: Coveralls - -ReBulk is a python library that performs advanced searches in strings that would be hard to implement using -`re module`_ or `String methods`_ only. - -It includes some features like ``Patterns``, ``Match``, ``Rule`` that allows developers to build a -custom and complex string matcher using a readable and extendable API. - -This project is hosted on GitHub: ``_ - -Install -------- -.. code-block:: sh - - $ pip install rebulk - -Usage ------- -Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to -chain ``string``, ``regex``, and ``functional`` methods to define various patterns types. - -.. code-block:: python - - >>> from rebulk import Rebulk - >>> bulk = Rebulk().string('brown').regex(r'qu\w+').functional(lambda s: (20, 25)) - -When ``Rebulk`` object is fully configured, you can call ``matches`` method with an input string to retrieve all -``Match`` objects found by registered pattern. - -.. code-block:: python - - >>> bulk.matches("The quick brown fox jumps over the lazy dog") - [, , ] - -If multiple ``Match`` objects are found at the same position, only the longer one is kept. - -.. code-block:: python - - >>> bulk = Rebulk().string('lakers').string('la') - >>> bulk.matches("the lakers are from la") - [, ] - -String Patterns ---------------- -String patterns are based on `str.find`_ method to find matches, but returns all matches in the string. ``ignore_case`` -can be enabled to ignore case. - -.. code-block:: python - - >>> Rebulk().string('la').matches("lalalilala") - [, , , ] - - >>> Rebulk().string('la').matches("LalAlilAla") - [] - - >>> Rebulk().string('la', ignore_case=True).matches("LalAlilAla") - [, , , ] - -You can define several patterns with a single ``string`` method call. - -.. code-block:: python - - >>> Rebulk().string('Winter', 'coming').matches("Winter is coming...") - [, ] - -Regular Expression Patterns ---------------------------- -Regular Expression patterns are based on a compiled regular expression. -`re.finditer`_ method is used to find matches. - -If `regex module`_ is available, it will be used by rebulk instead of default `re module`_. - -.. code-block:: python - - >>> Rebulk().regex(r'l\w').matches("lolita") - [, ] - -You can define several patterns with a single ``regex`` method call. - -.. code-block:: python - - >>> Rebulk().regex(r'Wint\wr', r'com\w{3}').matches("Winter is coming...") - [, ] - -All keyword arguments from `re.compile`_ are supported. - -.. code-block:: python - - >>> import re # import required for flags constant - >>> Rebulk().regex('L[A-Z]KERS', flags=re.IGNORECASE) \ - ... .matches("The LaKeRs are from La") - [] - - >>> Rebulk().regex('L[A-Z]', 'L[A-Z]KERS', flags=re.IGNORECASE) \ - ... .matches("The LaKeRs are from La") - [, ] - - >>> Rebulk().regex(('L[A-Z]', re.IGNORECASE), ('L[a-z]KeRs')) \ - ... .matches("The LaKeRs are from La") - [, ] - -If `regex module`_ is available, it automatically supports repeated captures. - -.. code-block:: python - - >>> # If regex module is available, repeated_captures is True by default. - >>> matches = Rebulk().regex(r'(\d+)(?:-(\d+))+').matches("01-02-03-04") - >>> matches[0].children # doctest:+SKIP - [<01:(0, 2)>, <02:(3, 5)>, <03:(6, 8)>, <04:(9, 11)>] - - >>> # If regex module is not available, or if repeated_captures is forced to False. - >>> matches = Rebulk().regex(r'(\d+)(?:-(\d+))+', repeated_captures=False) \ - ... .matches("01-02-03-04") - >>> matches[0].children - [<01:(0, 2)+initiator=01-02-03-04>, <04:(9, 11)+initiator=01-02-03-04>] - -- ``abbreviations`` - - Defined as a list of 2-tuple, each tuple is an abbreviation. It simply replace ``tuple[0]`` with ``tuple[1]`` in the - expression. - - >>> Rebulk().regex(r'Custom-separators', abbreviations=[("-", r"[\W_]+")])\ - ... .matches("Custom_separators using-abbreviations") - [] - - -Functional Patterns -------------------- -Functional Patterns are based on the evaluation of a function. - -The function should have the same parameters as ``Rebulk.matches`` method, that is the input string, -and must return at least start index and end index of the ``Match`` object. - -.. code-block:: python - - >>> def func(string): - ... index = string.find('?') - ... if index > -1: - ... return 0, index - 11 - >>> Rebulk().functional(func).matches("Why do simple ? Forget about it ...") - [] - -You can also return a dict of keywords arguments for ``Match`` object. - -You can define several patterns with a single ``functional`` method call, and function used can return multiple -matches. - -Chain Patterns --------------- -Chain Patterns are ordered composition of string, functional and regex patterns. Repeater can be set to define -repetition on chain part. - -.. code-block:: python - - >>> r = Rebulk().regex_defaults(flags=re.IGNORECASE)\ - ... .defaults(children=True, formatter={'episode': int, 'version': int})\ - ... .chain()\ - ... .regex(r'e(?P\d{1,4})').repeater(1)\ - ... .regex(r'v(?P\d+)').repeater('?')\ - ... .regex(r'[ex-](?P\d{1,4})').repeater('*')\ - ... .close() # .repeater(1) could be omitted as it's the default behavior - >>> r.matches("This is E14v2-15-16-17").to_dict() # converts matches to dict - MatchesDict([('episode', [14, 15, 16, 17]), ('version', 2)]) - -Patterns parameters -------------------- - -All patterns have options that can be given as keyword arguments. - -- ``validator`` - - Function to validate ``Match`` value given by the pattern. Can also be a ``dict``, to use ``validator`` with pattern - named with key. - - .. code-block:: python - - >>> def check_leap_year(match): - ... return int(match.value) in [1980, 1984, 1988] - >>> matches = Rebulk().regex(r'\d{4}', validator=check_leap_year) \ - ... .matches("In year 1982 ...") - >>> len(matches) - 0 - >>> matches = Rebulk().regex(r'\d{4}', validator=check_leap_year) \ - ... .matches("In year 1984 ...") - >>> len(matches) - 1 - -Some base validator functions are available in ``rebulk.validators`` module. Most of those functions have to be -configured using ``functools.partial`` to map them to function accepting a single ``match`` argument. - -- ``formatter`` - - Function to convert ``Match`` value given by the pattern. Can also be a ``dict``, to use ``formatter`` with matches - named with key. - - .. code-block:: python - - >>> def year_formatter(value): - ... return int(value) - >>> matches = Rebulk().regex(r'\d{4}', formatter=year_formatter) \ - ... .matches("In year 1982 ...") - >>> isinstance(matches[0].value, int) - True - -- ``pre_match_processor`` / ``post_match_processor`` - - Function to mutagen or invalidate a match generated by a pattern. - - Function has a single parameter which is the Match object. If function returns False, it will be considered as an - invalid match. If function returns a match instance, it will replace the original match with this instance in the - process. - -- ``post_processor`` - - Function to change the default output of the pattern. Function parameters are Matches list and Pattern object. - -- ``name`` - - The name of the pattern. It is automatically passed to ``Match`` objects generated by this pattern. - -- ``tags`` - - A list of string that qualifies this pattern. - -- ``value`` - - Override value property for generated ``Match`` objects. Can also be a ``dict``, to use ``value`` with pattern - named with key. - -- ``validate_all`` - - By default, validator is called for returned ``Match`` objects only. Enable this option to validate them all, parent - and children included. - -- ``format_all`` - - By default, formatter is called for returned ``Match`` values only. Enable this option to format them all, parent and - children included. - -- ``disabled`` - - A ``function(context)`` to disable the pattern if returning ``True``. - -- ``children`` - - If ``True``, all children ``Match`` objects will be retrieved instead of a single parent ``Match`` object. - -- ``private`` - - If ``True``, ``Match`` objects generated from this pattern are available internally only. They will be removed at - the end of ``Rebulk.matches`` method call. - -- ``private_parent`` - - Force parent matches to be returned and flag them as private. - -- ``private_children`` - - Force children matches to be returned and flag them as private. - -- ``private_names`` - - Matches names that will be declared as private - -- ``ignore_names`` - - Matches names that will be ignored from the pattern output, after validation. - -- ``marker`` - - If ``true``, ``Match`` objects generated from this pattern will be markers matches instead of standard matches. - They won't be included in ``Matches`` sequence, but will be available in ``Matches.markers`` sequence (see - ``Markers`` section). - - -Match ------ - -A ``Match`` object is the result created by a registered pattern. - -It has a ``value`` property defined, and position indices are available through ``start``, ``end`` and ``span`` -properties. - -In some case, it contains children ``Match`` objects in ``children`` property, and each child ``Match`` object -reference its parent in ``parent`` property. Also, a ``name`` property can be defined for the match. - -If groups are defined in a Regular Expression pattern, each group match will be converted to a -single ``Match`` object. If a group has a name defined (``(?Pgroup)``), it is set as ``name`` property in a child -``Match`` object. The whole regexp match (``re.group(0)``) will be converted to the main ``Match`` object, -and all subgroups (1, 2, ... n) will be converted to ``children`` matches of the main ``Match`` object. - -.. code-block:: python - - >>> matches = Rebulk() \ - ... .regex(r"One, (?P\w+), Two, (?P\w+), Three, (?P\w+)") \ - ... .matches("Zero, 0, One, 1, Two, 2, Three, 3, Four, 4") - >>> matches - [] - >>> for child in matches[0].children: - ... '%s = %s' % (child.name, child.value) - 'one = 1' - 'two = 2' - 'three = 3' - -It's possible to retrieve only children by using ``children`` parameters. You can also customize the way structure -is generated with ``every``, ``private_parent`` and ``private_children`` parameters. - -.. code-block:: python - - >>> matches = Rebulk() \ - ... .regex(r"One, (?P\w+), Two, (?P\w+), Three, (?P\w+)", children=True) \ - ... .matches("Zero, 0, One, 1, Two, 2, Three, 3, Four, 4") - >>> matches - [<1:(14, 15)+name=one+initiator=One, 1, Two, 2, Three, 3>, <2:(22, 23)+name=two+initiator=One, 1, Two, 2, Three, 3>, <3:(32, 33)+name=three+initiator=One, 1, Two, 2, Three, 3>] - -Match object has the following properties that can be given to Pattern objects - -- ``formatter`` - - Function to convert ``Match`` value given by the pattern. Can also be a ``dict``, to use ``formatter`` with matches - named with key. - - .. code-block:: python - - >>> def year_formatter(value): - ... return int(value) - >>> matches = Rebulk().regex(r'\d{4}', formatter=year_formatter) \ - ... .matches("In year 1982 ...") - >>> isinstance(matches[0].value, int) - True - -- ``format_all`` - - By default, formatter is called for returned ``Match`` values only. Enable this option to format them all, parent and - children included. - -- ``conflict_solver`` - - A ``function(match, conflicting_match)`` used to solve conflict. Returned object will be removed from matches by - ``ConflictSolver`` default rule. If ``__default__`` string is returned, it will fallback to default behavior - keeping longer match. - - -Matches -------- - -A ``Matches`` object holds the result of ``Rebulk.matches`` method call. It's a sequence of ``Match`` objects and -it behaves like a list. - -All methods accepts a ``predicate`` function to filter ``Match`` objects using a callable, and an ``index`` int to -retrieve a single element from default returned matches. - -It has the following additional methods and properties on it. - -- ``starting(index, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects that starts at given index. - -- ``ending(index, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects that ends at given index. - -- ``previous(match, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects that are previous and nearest to match. - -- ``next(match, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects that are next and nearest to match. - -- ``tagged(tag, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects that have the given tag defined. - -- ``named(name, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects that have the given name. - -- ``range(start=0, end=None, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects for given range, sorted from start to end. - -- ``holes(start=0, end=None, formatter=None, ignore=None, predicate=None, index=None)`` - - Retrieves a list of *hole* ``Match`` objects for given range. A hole match is created for each range where no match - is available. - -- ``conflicting(match, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects that conflicts with given match. - -- ``chain_before(self, position, seps, start=0, predicate=None, index=None)``: - - Retrieves a list of chained matches, before position, matching predicate and separated by characters from seps only. - -- ``chain_after(self, position, seps, end=None, predicate=None, index=None)``: - - Retrieves a list of chained matches, after position, matching predicate and separated by characters from seps only. - -- ``at_match(match, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects at the same position as match. - -- ``at_span(span, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects from given (start, end) tuple. - -- ``at_index(pos, predicate=None, index=None)`` - - Retrieves a list of ``Match`` objects from given position. - -- ``names`` - - Retrieves a sequence of all ``Match.name`` properties. - -- ``tags`` - - Retrieves a sequence of all ``Match.tags`` properties. - -- ``to_dict(details=False, first_value=False, enforce_list=False)`` - - Convert to an ordered dict, with ``Match.name`` as key and ``Match.value`` as value. - - It's a subclass of `OrderedDict`_, that contains a ``matches`` property which is a dict with ``Match.name`` as key - and list of ``Match`` objects as value. - - If ``first_value`` is ``True`` and distinct values are found for the same name, value will be wrapped to a list. - If ``False``, first value only will be kept and values lists can be retrieved with ``values_list`` which is a dict - with ``Match.name`` as key and list of ``Match.value`` as value. - - if ``enforce_list`` is ``True``, all values will be wrapped to a list, even if a single value is found. - - If ``details`` is True, ``Match.value`` objects are replaced with complete ``Match`` object. - -- ``markers`` - - A custom ``Matches`` sequences specialized for ``markers`` matches (see below) - -Markers -------- - -If you have defined some patterns with ``markers`` property, then ``Matches.markers`` points to a special ``Matches`` -sequence that contains only ``markers`` matches. This sequence supports all methods from ``Matches``. - -Markers matches are not intended to be used in final result, but can be used to implement a ``Rule``. - -Rules ------ -Rules are a convenient and readable way to implement advanced conditional logic involving several ``Match`` objects. -When a rule is triggered, it can perform an action on ``Matches`` object, like filtering out, adding additional tags or -renaming. - -Rules are implemented by extending the abstract ``Rule`` class. They are registered using ``Rebulk.rule`` method by -giving either a ``Rule`` instance, a ``Rule`` class or a module containing ``Rule classes`` only. - -For a rule to be triggered, ``Rule.when`` method must return ``True``, or a non empty list of ``Match`` -objects, or any other truthy object. When triggered, ``Rule.then`` method is called to perform the action with -``when_response`` parameter defined as the response of ``Rule.when`` call. - -Instead of implementing ``Rule.then`` method, you can define ``consequence`` class property with a Consequence classe -or instance, like ``RemoveMatch``, ``RenameMatch`` or ``AppendMatch``. You can also use a list of consequence when -required : ``when_response`` must then be iterable, and elements of this iterable will be given to each consequence in -the same order. - -When many rules are registered, it can be useful to set ``priority`` class variable to define a priority integer -between all rule executions (higher priorities will be executed first). You can also define ``dependency`` to declare -another Rule class as dependency for the current rule, meaning that it will be executed before. - -For all rules with the same ``priority`` value, ``when`` is called before, and ``then`` is called after all. - -.. code-block:: python - - >>> from rebulk import Rule, RemoveMatch - - >>> class FirstOnlyRule(Rule): - ... consequence = RemoveMatch - ... - ... def when(self, matches, context): - ... grabbed = matches.named("grabbed", 0) - ... if grabbed and matches.previous(grabbed): - ... return grabbed - - >>> rebulk = Rebulk() - - >>> rebulk.regex("This match(.*?)grabbed", name="grabbed") - <...Rebulk object ...> - >>> rebulk.regex("if it's(.*?)first match", private=True) - <...Rebulk object at ...> - >>> rebulk.rules(FirstOnlyRule) - <...Rebulk object at ...> - - >>> rebulk.matches("This match is grabbed only if it's the first match") - [] - >>> rebulk.matches("if it's NOT the first match, This match is NOT grabbed") - [] - -.. _re module: https://docs.python.org/3/library/re.html -.. _regex module: https://pypi.python.org/pypi/regex -.. _String methods: https://docs.python.org/3/library/stdtypes.html#str -.. _str.find: https://docs.python.org/3/library/stdtypes.html#str.find -.. _re.finditer: https://docs.python.org/3/library/re.html#re.finditer -.. _re.compile: https://docs.python.org/3/library/re.html#re.compile -.. _OrderedDict: https://docs.python.org/2/library/collections.html#collections.OrderedDict - diff --git a/rebulk/__version__.py b/rebulk/__version__.py index 6d7f2ad..939c554 100644 --- a/rebulk/__version__.py +++ b/rebulk/__version__.py @@ -4,4 +4,4 @@ Version module """ # pragma: no cover -__version__ = '2.0.2.dev0' +__version__ = '2.0.1' diff --git a/rebulk/builder.py b/rebulk/builder.py index c91420a..c0053f2 100644 --- a/rebulk/builder.py +++ b/rebulk/builder.py @@ -7,16 +7,13 @@ from copy import deepcopy from logging import getLogger -from six import add_metaclass - from .loose import set_defaults from .pattern import RePattern, StringPattern, FunctionalPattern log = getLogger(__name__).log -@add_metaclass(ABCMeta) -class Builder(object): +class Builder(metaclass=ABCMeta): """ Base builder class for patterns """ @@ -147,7 +144,7 @@ def build_chain(self, **kwargs): :return: :rtype: """ - from .chain import Chain + from .chain import Chain # pylint:disable=import-outside-toplevel set_defaults(self._chain_defaults, kwargs) set_defaults(self._defaults, kwargs) chain = Chain(self, **kwargs) diff --git a/rebulk/chain.py b/rebulk/chain.py index ba31ec9..f2ed66c 100644 --- a/rebulk/chain.py +++ b/rebulk/chain.py @@ -125,7 +125,7 @@ def _process_match(self, match, match_index, child=False): :rtype: """ # pylint: disable=too-many-locals - ret = super(Chain, self)._process_match(match, match_index, child=child) + ret = super()._process_match(match, match_index, child=child) if ret: return True @@ -144,7 +144,7 @@ def _process_match(self, match, match_index, child=False): for last_match in last_matches: match.children.remove(last_match) match.end = match.children[-1].end if match.children else match.start - ret = super(Chain, self)._process_match(match, match_index, child=child) + ret = super()._process_match(match, match_index, child=child) if ret: return True diff --git a/rebulk/introspector.py b/rebulk/introspector.py index bfefcb7..f4a4f70 100644 --- a/rebulk/introspector.py +++ b/rebulk/introspector.py @@ -6,13 +6,11 @@ from abc import ABCMeta, abstractmethod from collections import defaultdict -import six from .pattern import StringPattern, RePattern, FunctionalPattern from .utils import extend_safe -@six.add_metaclass(ABCMeta) -class Description(object): +class Description(metaclass=ABCMeta): """ Abstract class for a description. """ diff --git a/rebulk/match.py b/rebulk/match.py index d8e72df..b3f84c4 100644 --- a/rebulk/match.py +++ b/rebulk/match.py @@ -15,7 +15,6 @@ from collections import OrderedDict # pylint:disable=ungrouped-imports except ImportError: # pragma: no cover from ordereddict import OrderedDict # pylint:disable=import-error -import six from .loose import ensure_list, filter_index from .utils import is_iterable @@ -28,7 +27,7 @@ class MatchesDict(OrderedDict): """ def __init__(self): - super(MatchesDict, self).__init__() + super().__init__() self.matches = defaultdict(list) self.values_list = defaultdict(list) @@ -67,7 +66,7 @@ def _name_dict(self): def _start_dict(self): if self.__start_dict is None: self.__start_dict = defaultdict(_BaseMatches._base) - for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.start): + for start, values in itertools.groupby(list(self._delegate), lambda item: item.start): _BaseMatches._base_extend(self.__start_dict[start], values) return self.__start_dict @@ -76,7 +75,7 @@ def _start_dict(self): def _end_dict(self): if self.__end_dict is None: self.__end_dict = defaultdict(_BaseMatches._base) - for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.end): + for start, values in itertools.groupby(list(self._delegate), lambda item: item.end): _BaseMatches._base_extend(self.__end_dict[start], values) return self.__end_dict @@ -534,13 +533,6 @@ def to_dict(self, details=False, first_value=False, enforce_list=False): ret[match.name] = value return ret - if six.PY2: # pragma: no cover - def clear(self): - """ - Python 3 backport - """ - del self[:] - def __len__(self): return len(self._delegate) @@ -583,11 +575,11 @@ class Matches(_BaseMatches): def __init__(self, matches=None, input_string=None): self.markers = Markers(input_string=input_string) - super(Matches, self).__init__(matches=matches, input_string=input_string) + super().__init__(matches=matches, input_string=input_string) def _add_match(self, match): assert not match.marker, "A marker match should not be added to object" - super(Matches, self)._add_match(match) + super()._add_match(match) class Markers(_BaseMatches): @@ -596,11 +588,11 @@ class Markers(_BaseMatches): """ def __init__(self, matches=None, input_string=None): - super(Markers, self).__init__(matches=None, input_string=input_string) + super().__init__(matches=None, input_string=input_string) def _add_match(self, match): assert match.marker, "A non-marker match should not be added to object" - super(Markers, self)._add_match(match) + super()._add_match(match) class Match(object): diff --git a/rebulk/pattern.py b/rebulk/pattern.py index beb8b27..d7629d1 100644 --- a/rebulk/pattern.py +++ b/rebulk/pattern.py @@ -7,8 +7,6 @@ from abc import ABCMeta, abstractmethod, abstractproperty -import six - from . import debug from .formatters import default_formatter from .loose import call, ensure_list, ensure_dict @@ -18,8 +16,7 @@ from .validators import allways_true -@six.add_metaclass(ABCMeta) -class BasePattern(object): +class BasePattern(metaclass=ABCMeta): """ Base class for Pattern like objects """ @@ -41,8 +38,7 @@ def matches(self, input_string, context=None, with_raw_matches=False): pass -@six.add_metaclass(ABCMeta) -class Pattern(BasePattern): +class Pattern(BasePattern, metaclass=ABCMeta): """ Definition of a particular pattern to search for. """ @@ -396,7 +392,7 @@ class StringPattern(Pattern): """ def __init__(self, *patterns, **kwargs): - super(StringPattern, self).__init__(**kwargs) + super().__init__(**kwargs) self._patterns = patterns self._kwargs = kwargs self._match_kwargs = filter_match_kwargs(kwargs) @@ -422,7 +418,7 @@ class RePattern(Pattern): """ def __init__(self, *patterns, **kwargs): - super(RePattern, self).__init__(**kwargs) + super().__init__(**kwargs) self.repeated_captures = REGEX_AVAILABLE if 'repeated_captures' in kwargs: self.repeated_captures = kwargs.get('repeated_captures') @@ -434,7 +430,7 @@ def __init__(self, *patterns, **kwargs): self._children_match_kwargs = filter_match_kwargs(kwargs, children=True) self._patterns = [] for pattern in patterns: - if isinstance(pattern, six.string_types): + if isinstance(pattern, str): if self.abbreviations and pattern: for key, replacement in self.abbreviations: pattern = pattern.replace(key, replacement) @@ -494,7 +490,7 @@ class FunctionalPattern(Pattern): """ def __init__(self, *patterns, **kwargs): - super(FunctionalPattern, self).__init__(**kwargs) + super().__init__(**kwargs) self._patterns = patterns self._kwargs = kwargs self._match_kwargs = filter_match_kwargs(kwargs) diff --git a/rebulk/rebulk.py b/rebulk/rebulk.py index a6a0fd2..972b92a 100644 --- a/rebulk/rebulk.py +++ b/rebulk/rebulk.py @@ -53,7 +53,7 @@ def __init__(self, disabled=lambda context: False, default_rules=True): :return: :rtype: """ - super(Rebulk, self).__init__() + super().__init__() if not callable(disabled): self.disabled = lambda context: disabled else: diff --git a/rebulk/rules.py b/rebulk/rules.py index 2514904..264792a 100644 --- a/rebulk/rules.py +++ b/rebulk/rules.py @@ -8,7 +8,6 @@ from itertools import groupby from logging import getLogger -import six from .utils import is_iterable from .toposort import toposort @@ -18,8 +17,7 @@ log = getLogger(__name__).log -@six.add_metaclass(ABCMeta) -class Consequence(object): +class Consequence(metaclass=ABCMeta): """ Definition of a consequence to apply. """ @@ -40,8 +38,7 @@ def then(self, matches, when_response, context): # pragma: no cover pass -@six.add_metaclass(ABCMeta) -class Condition(object): +class Condition(metaclass=ABCMeta): """ Definition of a condition to check. """ @@ -60,8 +57,7 @@ def when(self, matches, context): # pragma: no cover pass -@six.add_metaclass(ABCMeta) -class CustomRule(Condition, Consequence): +class CustomRule(Condition, Consequence, metaclass=ABCMeta): """ Definition of a rule to apply """ @@ -243,7 +239,7 @@ class Rules(list): """ def __init__(self, *rules): - super(Rules, self).__init__() + super().__init__() self.load(*rules) def load(self, *rules): diff --git a/rebulk/test/test_match.py b/rebulk/test/test_match.py index 8750733..2359149 100644 --- a/rebulk/test/test_match.py +++ b/rebulk/test/test_match.py @@ -3,7 +3,6 @@ # pylint: disable=no-self-use, pointless-statement, missing-docstring, unneeded-not, len-as-condition import pytest -import six from ..match import Match, Matches from ..pattern import StringPattern, RePattern @@ -72,23 +71,18 @@ def test_compare(self): assert match2 > match1 assert match2 >= match1 - if six.PY3: - with pytest.raises(TypeError): - match1 < other + with pytest.raises(TypeError): + match1 < other - with pytest.raises(TypeError): - match1 <= other + with pytest.raises(TypeError): + match1 <= other - with pytest.raises(TypeError): - match1 > other + with pytest.raises(TypeError): + match1 > other + + with pytest.raises(TypeError): + match1 >= other - with pytest.raises(TypeError): - match1 >= other - else: - assert match1 < other - assert match1 <= other - assert not match1 > other - assert not match1 >= other def test_value(self): match1 = Match(1, 3) diff --git a/rebulk/test/test_pattern.py b/rebulk/test/test_pattern.py index beee170..e94b670 100644 --- a/rebulk/test/test_pattern.py +++ b/rebulk/test/test_pattern.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# pylint: disable=no-self-use, pointless-statement, missing-docstring, unbalanced-tuple-unpacking, len-as-condition +# pylint: disable=no-self-use, pointless-statement, missing-docstring, unbalanced-tuple-unpacking, len-as-condition, no-member import re import pytest diff --git a/rebulk/toposort.py b/rebulk/toposort.py index 2bcba9a..9f3a731 100644 --- a/rebulk/toposort.py +++ b/rebulk/toposort.py @@ -22,7 +22,7 @@ class CyclicDependency(ValueError): def __init__(self, cyclic): s = 'Cyclic dependencies exist among these items: {0}'.format(', '.join(repr(x) for x in cyclic.items())) - super(CyclicDependency, self).__init__(s) + super().__init__(s) self.cyclic = cyclic diff --git a/setup.cfg b/setup.cfg index f8bdf00..8986ccc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,8 @@ -[zest.releaser] -python-file-with-version = rebulk/__version__.py +[semantic_release] +version_variable = rebulk/__version__.py:__version__ +commit_subject = chore(release): Release v{version} +commit_author = github-actions +upload_to_pypi_glob_patterns = *.tar.gz,*.whl [aliases] test=pytest diff --git a/setup.py b/setup.py index 94522d8..4aaabb8 100644 --- a/setup.py +++ b/setup.py @@ -5,18 +5,21 @@ import re from setuptools import setup, find_packages -with io.open('README.rst', 'r', encoding='utf-8') as f: +with io.open('CHANGELOG.md', encoding='utf-8') as f: + changelog = f.read() + +with io.open('README.md', 'r', encoding='utf-8') as f: readme = f.read() -install_requires = ['six'] +install_requires = [] native_requires = ['regex'] setup_requires = ['pytest-runner'] -dev_require = ['pytest', 'zest.releaser[recommended]', 'pylint', 'tox'] +dev_require = ['pytest', 'pylint', 'tox'] -tests_require = ['pytest'] +tests_require = ['pytest', 'pylint'] with io.open('rebulk/__version__.py', 'r') as f: version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]$', f.read(), re.MULTILINE).group(1) @@ -24,19 +27,19 @@ args = dict(name='rebulk', version=version, description='Rebulk - Define simple search patterns in bulk to perform advanced matching on any string.', - long_description=readme, + long_description=readme + '\n\n' + changelog, + long_description_content_type='text/markdown', # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=['Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Intended Audience :: Developers', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Topic :: Software Development :: Libraries :: Python Modules' ], keywords='re regexp regular expression search pattern string match', diff --git a/tox.ini b/tox.ini index 483438e..2fa9d3e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,15 +1,5 @@ [tox] -envlist = py27,py34,py35,py36,py37,py38,pypy - -[testenv:py26] -commands = - {envbindir}/pip install -e .[dev] - {envpython} setup.py test - -[testenv:py38] -commands = - {envbindir}/pip install -e .[dev] - {envpython} setup.py test +envlist = py35,py36,py37,py38,pypy39,pypy3 [testenv] commands =