In [115]:
from IPython.display import display

# pip install ipymarkup
from ipymarkup import show_span_ascii_markup as show_markup

from yargy import (
    Parser,
    or_, rule,  and_
)
from yargy.pipelines import morph_pipeline
from yargy.predicates import (
    eq, in_, dictionary, gte, lte, normalized,
    type, gram
)
from yargy.tokenizer import MorphTokenizer
from yargy import interpretation as interp
from yargy.interpretation import fact, attribute


def show_matches(rule, *lines):
    parser = Parser(rule)
    for line in lines:
        matches = parser.findall(line)
        matches = sorted(matches, key=lambda _: _.span)
        spans = [_.span for _ in matches]
        show_markup(line, spans)
        if matches:
            facts = [_.fact for _ in matches]
            if len(facts) == 1:
                facts = facts[0]
            return facts.as_json


INT = type('INT')
NOUN = gram('NOUN')
ADJF = gram('ADJF')
PRTF = gram('PRTF')
GENT = gram('gent')
DOT = eq('.')

TOKENIZER = MorphTokenizer()

In [116]:
Year = fact(
    'Year',
    ['year']
)

YEAR_VAL = and_(
    gte(1000),
    lte(2100)
)

YEAR_WORD = or_(
    rule('г', eq('.').optional()),
    rule(normalized('год'))
)

YEAR = rule(
    YEAR_VAL.interpretation(
        Year.year.custom(int)
    ),
    YEAR_WORD.optional(),
).interpretation(
    Year
)

Year_range = fact(
    'Year_range',
    ['start', 'stop']
)

YEAR_DELIMETOR_BEFORE = morph_pipeline([
    'с',
    'от'
])

YEAR_DELIMETOR_UNDER = morph_pipeline([
    '-',
    'по',
    'до'
])


YEAR_RANGE = rule(
    YEAR_DELIMETOR_BEFORE.optional(),
    YEAR.interpretation(
        Year_range.start
    ),
    YEAR_DELIMETOR_UNDER.optional(),
    YEAR.optional()
        .interpretation(
            Year_range.stop
    )

).interpretation(
    Year_range
)

js = show_matches(
    YEAR_RANGE,
    '2015-2016',
    '2014 года',
    '2020 год'
)
js

2015-2016
─────────


OrderedDict([('start', OrderedDict([('year', 2015)])),
             ('stop', OrderedDict([('year', 2016)]))])