In [15]:
import spacy

#Загрузка английской NLP-модели
nlp = spacy.load('en_core_web_sm')

#Текст для анализа
text = "Alina Khartanovich, a fourth-year student of the Belarusian State Technological University, won first place at the Eurasian Programming Olympiad in Tokyo and after that she will visit Canada to participate in the international Olympiad."

#Парсинг текста с помощью spaCy. Эта команда запускает целый конвейер
doc = nlp(text)

print(doc.text)

#текст токена, начальная форма, часть речи, является ли стоп-словом
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.is_stop)

Alina Khartanovich, a fourth-year student of the Belarusian State Technological University, won first place at the Eurasian Programming Olympiad in Tokyo and after that she will visit Canada to participate in the international Olympiad.
Alina Alina PROPN False
Khartanovich Khartanovich PROPN False
, , PUNCT False
a a DET True
fourth fourth ADJ False
- - PUNCT False
year year NOUN False
student student NOUN False
of of ADP True
the the DET True
Belarusian Belarusian PROPN False
State State PROPN False
Technological Technological PROPN False
University University PROPN False
, , PUNCT False
won win VERB False
first first ADJ True
place place NOUN False
at at ADP True
the the DET True
Eurasian eurasian ADJ False
Programming Programming PROPN False
Olympiad Olympiad PROPN False
in in ADP True
Tokyo Tokyo PROPN False
and and CCONJ True
after after ADP True
that that PRON True
she she PRON True
will will AUX True
visit visit VERB False
Canada Canada PROPN False
to to PART True
participate pa

In [16]:
#построение дерева зависимостей
#(текст токена (тип зависимости,(согласно Universal Dependency), корневое слово) 
for token in doc:
    print(token.text, token.dep_, token.head)

Alina compound Khartanovich
Khartanovich nsubj won
, punct Khartanovich
a det student
fourth amod year
- punct year
year compound student
student appos Khartanovich
of prep student
the det University
Belarusian compound University
State compound University
Technological compound University
University pobj of
, punct Khartanovich
won ROOT won
first amod place
place dobj won
at prep won
the det Olympiad
Eurasian amod Olympiad
Programming compound Olympiad
Olympiad pobj at
in prep Olympiad
Tokyo pobj in
and cc won
after prep visit
that pobj after
she nsubj visit
will aux visit
visit conj won
Canada dobj visit
to aux participate
participate xcomp visit
in prep participate
the det Olympiad
international amod Olympiad
Olympiad pobj in
. punct won


In [17]:
#визуализация дерева зависимостей, а также распознавание зависимостей
from spacy import displacy
displacy.render(doc, style='dep', jupyter=True) 

In [19]:
#для расшифровки названий тегов можно воспользоваться функцией explain
print(spacy.explain("PROPN")) #имя собственное
print(spacy.explain("DET")) #определитель
print(spacy.explain("ADJ")) #имя прилагательное
print(spacy.explain("NOUN")) #существительное
print(spacy.explain("ADP")) #сближение
print(spacy.explain("VERB")) #глагол
print(spacy.explain("CCONJ")) #координирующее соединение
print(spacy.explain("PRON")) #местоимение
print(spacy.explain("AUX")) #вспомогательный
print(spacy.explain("PART")) #частица

proper noun
determiner
adjective
noun
adposition
verb
coordinating conjunction
pronoun
auxiliary
particle


In [20]:
#Распознавание именованных сущностей 
doc2 = nlp("Alina Khartanovich, a fourth-year student of the Belarusian State Technological University, won first place at the Eurasian Programming Olympiad in Tokyo and after that she will visit Canada to participate in the international Olympiad.")

for ent in doc2.ents:
    print(ent.text, ent.label_)
displacy.render(doc2, style='ent', jupyter=True)    

Alina Khartanovich PERSON
fourth-year DATE
the Belarusian State Technological University ORG
first ORDINAL
the Eurasian Programming Olympiad ORG
Tokyo GPE
Canada GPE
Olympiad ORG


In [21]:
print(spacy.explain("PERSON"))
print(spacy.explain("DATE"))
print(spacy.explain("ORG"))
print(spacy.explain("ORDINAL"))
print(spacy.explain("GPE"))

People, including fictional
Absolute or relative dates or periods
Companies, agencies, institutions, etc.
"first", "second", etc.
Countries, cities, states
