In [3]:
from nltk import corpus, tokenize, stem


__all__ = ['LanguageProcessor']


class LanguageProcessor(object):
    def __init__(self, language):
        self.language = language
        self.stopwords = corpus.stopwords.words(language)
        self.stemmer = stem.SnowballStemmer(language)

    def split_sentences(self, text):
        return tokenize.sent_tokenize(text, self.language)

    def extract_significant_words(self, sentence):
        return set(
            word for word in tokenize.word_tokenize(sentence)
            if word.isalnum() and word not in self.stopwords
        )

    def stem(self, word):
        return self.stemmer.stem(word)

In [16]:
from itertools import combinations
from operator import itemgetter

from distance import jaccard
from networkx import Graph, pagerank


def summarize_graph(text, sentence_count=5, lang='no'):
    languages = {
            'da': 'danish',
            'nl': 'dutch',
            'en': 'english',
            'fi': 'finnish',
            'fr': 'french',
            'de': 'german',
            'hu': 'hungarian',
            'it': 'italian',
            'no': 'norwegian',
            'po': 'portuguese',
            'ro': 'romanian',
            'ru': 'russian',
            'es': 'spanish',
            'sv': 'swedish'}
    if lang not in languages:
        print('Language is not supported')
        return
    
    language=languages[lang]
    
    processor = LanguageProcessor(language)

    sentence_list = processor.split_sentences(text)
    wordset_list = map(processor.extract_significant_words, sentence_list)
    stemsets = [
        {processor.stem(word) for word in wordset}
        for wordset in wordset_list
    ]

    graph = Graph()
    pairs = combinations(enumerate(stemsets), 2)
    for (index_a, stems_a), (index_b, stems_b) in pairs:
        if stems_a and stems_b:
            similarity = 1 - jaccard(stems_a, stems_b)
            if similarity > 0:
                graph.add_edge(index_a, index_b, weight=similarity)

    ranked_sentence_indexes = list(pagerank(graph).items())
    if ranked_sentence_indexes:
        sentences_by_rank = sorted(
            ranked_sentence_indexes, key=itemgetter(1), reverse=True)
        best_sentences = map(itemgetter(0), sentences_by_rank[:sentence_count])
        best_sentences_in_order = sorted(best_sentences)
    else:
        best_sentences_in_order = range(min(sentence_count, len(sentence_list)))

    return ' '.join(sentence_list[index] for index in best_sentences_in_order)

In [17]:
txt='''Chicken Little likes to walk in the woods. She likes to look at the trees. She likes to smell the flowers. She likes to listen to the birds singing.

One day while she is walking an acorn falls from a tree, and hits the top of her little head.

- My, oh, my, the sky is falling. I must run and tell the lion about it, - says Chicken Little and begins to run.

She runs and runs. By and by she meets the hen.

- Where are you going? - asks the hen.

- Oh, Henny Penny, the sky is falling and I am going to the lion to tell him about it.

- How do you know it? - asks Henny Penny.

- It hit me on the head, so I know it must be so, - says Chicken Little.

- Let me go with you! - says Henny Penny. - Run, run.

So the two run and run until they meet Ducky Lucky.

- The sky is falling, - says Henny Penny. - We are going to the lion to tell him about it.

- How do you know that? - asks Ducky Lucky.

- It hit Chicken Little on the head, - says Henny Penny.

- May I come with you? - asks Ducky Lucky.

- Come, - says Henny Penny.

So all three of them run on and on until they meet Foxey Loxey.

- Where are you going? - asks Foxey Loxey.

- The sky is falling and we are going to the lion to tell him about it, - says Ducky Lucky.

- Do you know where he lives? - asks the fox.

- I don't, - says Chicken Little.

- I don't, - says Henny Penny.

- I don't, - says Ducky Lucky.

- I do, - says Foxey Loxey. - Come with me and I can show you the way.

He walks on and on until he comes to his den.

- Come right in, - says Foxey Loxey.

They all go in, but they never, never come out again.'''

In [18]:
summarize_graph(txt)

"I must run and tell the lion about it, - says Chicken Little and begins to run. - The sky is falling, - says Henny Penny. - It hit Chicken Little on the head, - says Henny Penny. - The sky is falling and we are going to the lion to tell him about it, - says Ducky Lucky. - I don't, - says Henny Penny."