## Imports

In [5]:
import numpy as np
import networkx 
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance

## Split our text in sentences

In [6]:
def sentence_text(file_name):
    file = open(file_name, "r")
    filedata = file.readlines()
    story = filedata[0].split(". ")
    sentences = []
    for sentence in story:
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    return sentences

## Build similarity sentence

In [7]:
def sentence_similarity(sentence1, sentence2, stopwords=None):
    if stopwords is None:
        stopwords = []
    sentence1 = [word.lower() for word in sentence1]
    sentence2 = [word.lower() for word in sentence2]
    all_words = list(set(sentence1 + sentence2))
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
    
    for word in sentence1:
        if word in stopwords:
            continue
        vector1[all_words.index(word)] += 1
 
    for word in sentence2:
        if word in stopwords:
            continue
        vector2[all_words.index(word)] += 1
 
    return 1 - cosine_distance(vector1, vector2)

## Build similarity matrix

In [8]:
def build_similarity_matrix(sentences, stop_words):
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for index1 in range(len(sentences)):
        for index2 in range(len(sentences)):
            if index1 == index2: 
                continue 
            similarity_matrix[index1][index2] = sentence_similarity(sentences[index1], sentences[index2], stop_words)
    return similarity_matrix

## Generate a rank based on matrix similarity & choose N best sentences for abstract

In [9]:
def generate_abstract(file_name, nb_sentence):
    stop_words = stopwords.words('english')
    resume_text = []
    
    sentences = sentence_text(file_name)
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
    sentence_similarity_graph = networkx.from_numpy_array(sentence_similarity_martix)
    scores = networkx.pagerank(sentence_similarity_graph)
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)       
    for i in range(nb_sentence):
        resume_text.append(" ".join(ranked_sentence[i][1]))
    print(resume_text)

generate_abstract( "test.txt",3)

['Alfred is at a loss, and Matuschek avoids any explanation, finally telling Alfred that it would be best if he left', 'Alfred and Klara are getting ready to leave, and she has another date with her mystery pen pal, but Alfred delays her with a few questions', 'Later, as she is resting at home, Alfred pays her a visit, and while he is there her aunt brings her another letter from her secret pen pal that explains his not being at the meeting because he saw her there with Alfred']
