In [8]:
import spacy
from math import sqrt
import pytextrank

In [2]:
# example text
text = """Compatibility of systems of linear constraints over the set of natural numbers.
Criteria of compatibility of a system of linear Diophantine equations, strprintt inequations,
and nonstrprintt inequations are considered. Upper bounds for components of a minimal set of
solutions and algorithms of construction of minimal generating sets of solutions for all types
of systems are given. These criteria and the corresponding algorithms for constructing a minimal
supporting set of solutions can be used in solving all the considered types systems and systems of mixed types."""

In [4]:
# load spaCy model.
nlp = spacy.load("en_core_web_sm")


nlp.add_pipe("textrank", last=True)
doc = nlp(text)

In [5]:
sent_bounds = [ [s.start, s.end, set([])] for s in doc.sents ]

In [6]:
limit_phrases = 4

phrase_id = 0
unit_vector = []

for p in doc._.phrases:
    print(phrase_id, p.text, p.rank)

    unit_vector.append(p.rank)

    for chunk in p.chunks:
        print(chunk.start, chunk.end)

        for sent_start, sent_end, sent_vector in sent_bounds:
            if chunk.start >= sent_start and chunk.end <= sent_end:
                print(sent_start, chunk.start, chunk.end, sent_end)
                sent_vector.add(phrase_id)
                break

    phrase_id += 1

    if phrase_id == limit_phrases:
        break

0 mixed types 0.18224422086397363
93 95
65 93 95 96
1 systems 0.177894013810666
2 3
0 2 3 14
61 62
35 61 62 65
91 92
65 91 92 96
2 minimal generating sets 0.15012796482367466
51 54
35 51 54 65
3 nonstrict inequations 0.14619431119413756
30 32
14 30 32 35


In [7]:
sum_ranks = sum(unit_vector)

unit_vector = [ rank/sum_ranks for rank in unit_vector ]
unit_vector

[0.2776164261148589,
 0.27098966489700754,
 0.2286930628398587,
 0.22270084614827468]

In [9]:
sent_rank = {}
sent_id = 0

for sent_start, sent_end, sent_vector in sent_bounds:
    print(sent_vector)
    sum_sq = 0.0
    for phrase_id in range(len(unit_vector)):
        print(phrase_id, unit_vector[phrase_id])

        if phrase_id not in sent_vector:
            sum_sq += unit_vector[phrase_id]**2.0

    sent_rank[sent_id] = sqrt(sum_sq)
    sent_id += 1


{1}
0 0.2776164261148589
1 0.27098966489700754
2 0.2286930628398587
3 0.22270084614827468
{3}
0 0.2776164261148589
1 0.27098966489700754
2 0.2286930628398587
3 0.22270084614827468
{1, 2}
0 0.2776164261148589
1 0.27098966489700754
2 0.2286930628398587
3 0.22270084614827468
{0, 1}
0 0.2776164261148589
1 0.27098966489700754
2 0.2286930628398587
3 0.22270084614827468


In [10]:
from operator import itemgetter

sorted(sent_rank.items(), key=itemgetter(1)) 

[(3, 0.31921181661434944),
 (2, 0.35590244017700196),
 (0, 0.42304499041475485),
 (1, 0.45034075489661707)]

In [11]:
limit_sentences = 2

sent_text = {}
sent_id = 0

for sent in doc.sents:
    sent_text[sent_id] = sent.text
    sent_id += 1

num_sent = 0

for sent_id, rank in sorted(sent_rank.items(), key=itemgetter(1)):
    print(sent_id, sent_text[sent_id])
    num_sent += 1

    if num_sent == limit_sentences:
        break


3 These criteria and the corresponding algorithms for constructing a minimal
supporting set of solutions can be used in solving all the considered types systems and systems of mixed types.
2 Upper bounds for components of a minimal set of
solutions and algorithms of construction of minimal generating sets of solutions for all types
of systems are given.
