In [None]:
#importing nltk 
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
#importing other librariesa
import re

import numpy as np
from nltk import sent_tokenize, word_tokenize

from nltk.cluster.util import cosine_distance

MULTIPLE_WHITESPACE_PATTERN = re.compile(r"\s+", re.UNICODE)

In [None]:
def normalize_whitespace(text):
    """
    Translates multiple whitespace into single space character.
    If there is at least one new line character chunk is replaced
    by single LF (Unix new line) character.
    """
    return MULTIPLE_WHITESPACE_PATTERN.sub(_replace_whitespace, text)


def _replace_whitespace(match):
    text = match.group()

    if "\n" in text or "\r" in text:
        return "\n"
    else:
        return " "


def is_blank(string):
    """
    Returns `True` if string contains only white-space characters
    or is empty. Otherwise `False` is returned.
    """
    return not string or string.isspace()


def get_symmetric_matrix(matrix):
    """
    Get Symmetric matrix
    :param matrix:
    :return: matrix
    """
    return matrix + matrix.T - np.diag(matrix.diagonal())


def core_cosine_similarity(vector1, vector2):
    """
    measure cosine similarity between two vectors
    :param vector1:
    :param vector2:
    :return: 0 < cosine similarity value < 1
    """
    return 1 - cosine_distance(vector1, vector2)


'''
Note: This is not a summarization algorithm. This Algorithm pics top sentences irrespective of the order they appeared.
'''

'\nNote: This is not a summarization algorithm. This Algorithm pics top sentences irrespective of the order they appeared.\n'

In [None]:
class TextRank4Sentences():
    def __init__(self):
        self.damping = 0.85  # damping coefficient, usually is .85
        self.min_diff = 1e-5  # convergence threshold
        self.steps = 100  # iteration steps
        self.text_str = None
        self.sentences = None
        self.pr_vector = None

    def _sentence_similarity(self, sent1, sent2, stopwords=None):
        if stopwords is None:
            stopwords = []

        sent1 = [w.lower() for w in sent1]
        sent2 = [w.lower() for w in sent2]

        all_words = list(set(sent1 + sent2))

        vector1 = [0] * len(all_words)
        vector2 = [0] * len(all_words)

        # build the vector for the first sentence
        for w in sent1:
            if w in stopwords:
                continue
            vector1[all_words.index(w)] += 1

        # build the vector for the second sentence
        for w in sent2:
            if w in stopwords:
                continue
            vector2[all_words.index(w)] += 1

        return core_cosine_similarity(vector1, vector2)

    def _build_similarity_matrix(self, sentences, stopwords=None):
        # create an empty similarity matrix
        sm = np.zeros([len(sentences), len(sentences)])

        for idx1 in range(len(sentences)):
            for idx2 in range(len(sentences)):
                if idx1 == idx2:
                    continue

                sm[idx1][idx2] = self._sentence_similarity(sentences[idx1], sentences[idx2], stopwords=stopwords)

        # Get Symmeric matrix
        sm = get_symmetric_matrix(sm)

        # Normalize matrix by column
        norm = np.sum(sm, axis=0)
        sm_norm = np.divide(sm, norm, where=norm != 0)  # this is ignore the 0 element in norm

        return sm_norm

    def _run_page_rank(self, similarity_matrix):

        pr_vector = np.array([1] * len(similarity_matrix))

        # Iteration
        previous_pr = 0
        for epoch in range(self.steps):
            pr_vector = (1 - self.damping) + self.damping * np.matmul(similarity_matrix, pr_vector)
            if abs(previous_pr - sum(pr_vector)) < self.min_diff:
                break
            else:
                previous_pr = sum(pr_vector)

        return pr_vector

    def _get_sentence(self, index):

        try:
            return self.sentences[index]
        except IndexError:
            return ""

    def get_top_sentences(self, number=5):

        top_sentences = []

        if self.pr_vector is not None:

            sorted_pr = np.argsort(self.pr_vector)
            sorted_pr = list(sorted_pr)
            sorted_pr.reverse()

            index = 0
            for epoch in range(number):
                sent = self.sentences[sorted_pr[index]]
                sent = normalize_whitespace(sent)
                top_sentences.append(sent)
                index += 1

        return top_sentences

    def analyze(self, text, stop_words=None):
        self.text_str = text
        self.sentences = sent_tokenize(self.text_str)

        tokenized_sentences = [word_tokenize(sent) for sent in self.sentences]

        similarity_matrix = self._build_similarity_matrix(tokenized_sentences, stop_words)

        self.pr_vector = self._run_page_rank(similarity_matrix)

In [None]:
# create spacy 
text_str = '''
    Mr. President, I offer you our congratulations on your election as the President of the current session of the General Assembly.
You represent Norway, a country which can take pride in its reputation as peaceful, just and progressive.
Your personal qualifications and your family's dedication to international effort are well known.
I should also like to express our appreciation of the services of your distinguished predecessor, Mrs. Angie Brooks Randolph.
I would also repeat our admiration for U Thant, whose skill and dedication have won him our respect.

41.	
Today is Mahatma Gandhi's one-hundred-first birthday, and we in India will take a fresh pledge to dedicate ourselves once again to the ideals for which the Mahatma lived and died, peace and nonviolence being the foremost among them.
We may not fully succeed in living up to his ideals but we must continue to try.

42.	
There are many developments in India which give us satisfaction.
Our people are expecting a better life through our development plans.
We have had a sizable increase in agricultural and industrial production.
Our trade is also showing signs of improvement.
India has once again demonstrated its faith in full-fledged democracy.
Alongside this there is a growing desire of the common man to share more equitably in the distribution of national wealth.

43.	
twenty-two days ago a great conference ended at Lusaka, and in 22 days from now we shall be celebrating the signing of the Charter of the United Nations.
The Conference of Lusaka owes much of its success to the efforts and organization undertaken by the Government and people of Zambia, and once again we should like to thank them.
The final declarations and resolution s of that Conference are being circulated as United Nations documents.
They represent the consensus of 53 Members of the United Nations, representing about half the human race.
I would urge that everyone read them.

44.	
The Conference at Lusaka highlighted several key points.
These are: international peace and security, peaceful coexistence and friendly relations, solution of international problems by negotiations, the value of the United Nations as a universal forum, decolonization, development, disarmament and the pursuit of the principles of nonalignment.
In order to fulfill the objectives we subscribed to at Lusaka, we seek the widest support and cooperation of the Members of the United Nations.

45.	
In assessing the achievements of the United Nations over the past 25 years, the major factor that strikes us is that, while another world war has been avoided, insecurity still prevails and that, while tension between the great Powers has eased and negotiations between them in certain fields have begun developments which are welcome armed conflicts are still raging in many parts of the world.
This situation requires attention from the world community.
A positive step in that direction that has been taken is the adoption by the Sixth Committee last week [1184th meeting] of the Declaration on Principles of international law concerning Friendly Relations and Cooperation among States in Accordance with the Charter of the United Nations
[A 18082, para.
8].
A further step to strengthen the edifice of peace that this Assembly could take is to adopt a d

    '''
import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp(text_str)

for token in doc:
    print(token.text,'->',token.pos_)


     -> SPACE
Mr. -> PROPN
President -> PROPN
, -> PUNCT
I -> PRON
offer -> VERB
you -> PRON
our -> DET
congratulations -> NOUN
on -> ADP
your -> DET
election -> NOUN
as -> SCONJ
the -> DET
President -> PROPN
of -> ADP
the -> DET
current -> ADJ
session -> NOUN
of -> ADP
the -> DET
General -> PROPN
Assembly -> PROPN
. -> PUNCT

 -> SPACE
You -> PRON
represent -> VERB
Norway -> PROPN
, -> PUNCT
a -> DET
country -> NOUN
which -> DET
can -> VERB
take -> VERB
pride -> NOUN
in -> ADP
its -> DET
reputation -> NOUN
as -> ADV
peaceful -> ADJ
, -> PUNCT
just -> ADV
and -> CCONJ
progressive -> ADJ
. -> PUNCT

 -> SPACE
Your -> DET
personal -> ADJ
qualifications -> NOUN
and -> CCONJ
your -> DET
family -> NOUN
's -> PART
dedication -> NOUN
to -> ADP
international -> ADJ
effort -> NOUN
are -> AUX
well -> ADV
known -> ADJ
. -> PUNCT

 -> SPACE
I -> PRON
should -> VERB
also -> ADV
like -> VERB
to -> PART
express -> VERB
our -> DET
appreciation -> NOUN
of -> ADP
the -> DET
services -> NOUN
of -> ADP
y

In [None]:
from spacy import displacy 
displacy.render(doc, style='dep',jupyter=True)

In [None]:
tr4sh = TextRank4Sentences()
tr4sh.analyze(text_str)
print(tr4sh.get_top_sentences(5 ))


['In order to fulfill the objectives we subscribed to at Lusaka, we seek the widest support and cooperation of the Members of the United Nations.', 'The Conference of Lusaka owes much of its success to the efforts and organization undertaken by the Government and people of Zambia, and once again we should like to thank them.', '43.\ntwenty-two days ago a great conference ended at Lusaka, and in 22 days from now we shall be celebrating the signing of the Charter of the United Nations.', 'A positive step in that direction that has been taken is the adoption by the Sixth Committee last week [1184th meeting] of the Declaration on Principles of international law concerning Friendly Relations and Cooperation among States in Accordance with the Charter of the United Nations\n[A 18082, para.', 'These are: international peace and security, peaceful coexistence and friendly relations, solution of international problems by negotiations, the value of the United Nations as a universal forum, decolo