In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")
from spacy import displacy


### Код алгоритма, описанного в курсовой работе

In [135]:
def mistakes_in_adv_dealer(parser):
    tokens =[]
    intensifiers = ['just', 'even', 'also', 'only', 'simply', 'really']
    indefinite_time = ['already', 'often', 'still', 'always', 'usually', 'sometimes']
    for token in parser:
        tokens.append(token)
    for token in parser:
        if token.dep_=='advmod':
            token_head = token.head
            pos_head = token.head.pos_
            if pos_head == 'VERB':
                children_dict = {}
                for verb_child in token_head.children:
                    children_dict[verb_child] = verb_child.dep_
                if 'aux' in children_dict.values():
                    if tokens.index(verb_child) < tokens.index(token) and tokens.index(token_head) > tokens.index(token):
                        print("Sentence is correct")
                    else:
                        print("You should put your adverbial between auxiliary and main verb")
                elif 'dobj' in children_dict.values():
                    for notion, tag in  children_dict.items():
                        if children_dict[notion] == tag and tag == 'dobj':
                            dobj = notion
                    if token.text not in intensifiers and token.text not in indefinite_time:
                        if tokens.index(dobj)<tokens.index(token) and abs(tokens.index(dobj)-tokens.index(token))<= 1:
                            print("Sentence is correct")
                        else:
                            print("Direct object should go right before adverbial")
                    else:
                        if tokens.index(dobj)>tokens.index(token) and tokens.index(token_head)<tokens.index(token):
                            print("Sentence is correct")
                        else:
                            print("Adverbial should go before direct object and after the verb")
                else:
                    if tokens.index(token_head)<tokens.index(token):
                        if token.text not in intensifiers and token.text not in indefinite_time:
                            if abs(tokens.index(token_head)-tokens.index(token))>1:
                                print("There should be no words between adverbial and verb")
                            else:
                                print ("Sentence is correct")
                        else:
                            print("Adverbial should go before verb")
                    else:
                        if token.text not in intensifiers and token.text not in indefinite_time:
                            print("Adverbial should go after verb")
                        else:
                            if abs(tokens.index(token_head)-tokens.index(token))>1:
                                print("There should be no words between adverbial and verb")
                            else:
                                print ("Sentence is correct")
            elif pos_head == 'ADJ':
                if token.text != 'enough':
                    if tokens.index(token)<tokens.index(token_head):
                        if abs(tokens.index(token)-tokens.index(token_head))<=1:
                            print ("Sentence is correct")
                        else:
                            print ("Adverbial should go right before adjective")
                    else:
                        print ("Adverbial should go right before adjective")
                else:
                    if tokens.index(token)>tokens.index(token_head):
                        if abs(tokens.index(token)-tokens.index(token_head))<=1:
                            print ("Sentence is correct")
                        else:
                            print ("Adverbial should go right after adjective")
                    else:
                        print ("Adverbial should go right after adjective")

### Примеры работы

Пример работы с предложением с вспомогательным глаголом:

In [102]:
sentence1 = nlp("Chinese and Britain's printed books also are going to show a slight decrease in sells, both fo $0,6 billion")
mistakes_in_adv_dealer(sentence1)

You should put your adverbial between auxiliary and main verb


Пример работы с предложением, где есть модальный глагол:

In [113]:
sentence2 = nlp("Health always should play important role in our life.  ")
mistakes_in_adv_dealer(sentence2)


You should put your adverbial between auxiliary and main verb


Пример реализации правила о том, что наречия неопределенного времени (здесь - still) должны идти прямо перед глаголом:

In [115]:
sentence3 = nlp("There are both advantages and disadvantages in different methods and systems, still, I believe that the Scandinaxian one, described above, can give people the result that they expect.")
mistakes_in_adv_dealer(sentence3)


There should be no words between adverbial and verb
Sentence is correct


Пример обработки предложения с ошибкой в позиции адвербиала относительно одиночного глагола (в первом случае) и составной формы (во втором случае):

In [116]:
sentence4 = nlp("During these sixty years the proportion of elderly population steadily rose, but since 2000 proportion in Sweden have rose rapidly and reached 20% near 2010. ")
mistakes_in_adv_dealer(sentence4)

Adverbial should go after verb
You should put your adverbial between auxiliary and main verb


In [119]:
sentence5 = nlp("According to the chart, the number of girls without acess to primary education dramatically decreased from 2000 to 2012 in Sout Asia.  ")
mistakes_in_adv_dealer(sentence5)

Adverbial should go after verb


Пример работы алгоритма с конструкциями с прямым объектом:

In [120]:
sentence6 = nlp("As a result, people define differently the happiness. ")
mistakes_in_adv_dealer(sentence6)

Direct object should go right before adverbial


Пример работы алгоритма с enough, модифицирующим прилагательное:

In [132]:
sentence7 = nlp("To tell the truth, the first way seems enough right because it provides the opportunity of revenge for the victims of criminals, while the second way may seem too soft and not effective. ")
mistakes_in_adv_dealer(sentence7)


Adverbial should go right after adjective
Sentence is correct


### Примеры некорректной работы

Модификаторы прилагательных SpaCy не всегда воспринимает таковыми:

In [146]:
sentence8 = nlp("In distinguish LinkedIn is popular rather. ")
mistakes_in_adv_dealer(sentence8)
displacy.render(sentence8, style='dep')


Неправильная обработка предложения с пропуском (алгоритм не находит ошибку):

In [148]:
sentence9 = nlp("Health always should important role in our life.  ")
mistakes_in_adv_dealer(sentence9)
displacy.render(sentence9, style='dep')


Не учитывает случаи возможной инверсии вспомогательного глагола и лексического (но в данном случае также изначально неверно установлена синтаксическая связь между вспомогательным глаголом и главным). 

In [150]:
sentence10 = nlp("The number of people aged 65 and over started to increase rapidly, nearely reaching 25 percent in 2040, which is, surprisingly, the lowest percentage of all three countries at given period. ")

mistakes_in_adv_dealer(sentence10)
displacy.render(sentence10, style='dep')


You should put your adverbial between auxiliary and main verb
Direct object should go right before adverbial


Неверно распознается тип зависимости в случае первого употребления адвербиала, поэтому дается неверное объяснение ошибки (хотя в данном случае оно ведет к правильному исправлению: эксперт также исправил на "slightly increasing"):

In [151]:
sentence11 = nlp("Then the level of students started increasing slightly: from approximately 30% (at 30 -39) to around 40% at the age of over 49.")
mistakes_in_adv_dealer(sentence11)


Adverbial should go after verb
Sentence is correct
