In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
 

In [4]:
def read_article(file_name):
    file = open(file_name, "r")
    filedata = file.readlines()
    article = filedata[0].split(". ")
    sentences = []
    for sentence in article:
        sentences.append(sentence.replace("[a-zA-Z]", "").split(" "))
    sentences.pop()
    return sentences



In [5]:
def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
    
    sent1 = [w.lower() for w in sent1] 
    sent2 = [w.lower() for w in sent2]
    all_words = list(set(sent1 + sent2))
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] +=1
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] +=1
    
    return 1 - cosine_distance(vector1,vector2)


In [9]:
def build_similarity_matrix(sentences, stop_words):
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2:
                continue
            similarity_matrix[idx1][idx2]= sentence_similarity(sentences[idx1], sentences[idx2], stop_words)
    return similarity_matrix


In [13]:
def generate_summary(file_name, top_n=5):
    nltk.download("stopwords")
    stop_words = stopwords.words("english")
    summarize_text = []
    sentences = read_article(file_name)
    print(sentences)
    similarity_matrix = build_similarity_matrix(sentences,stop_words)
    sentence_similarity_graph = nx.from_numpy_array(similarity_matrix)
    scores = nx.pagerank(sentence_similarity_graph)
    ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
    print("Indexes of top ranked_sentence order are ", ranked_sentences)
    for i in range(top_n):
        print(" ".join(ranked_sentences[i][1]))
    


In [15]:
generate_summary("trump.txt", 2)


WASHINGTON - The Trump administration has ordered the military to start withdrawing roughly 7,000 troops from Afghanistan in the coming months, two defense officials said Thursday, an abrupt shift in the 17-year-old war there and a decision that stunned Afghan officials, who said they had not been briefed on the plans.President Trump made the decision to pull the troops - about half the number the United States has in Afghanistan now - at the same time he decided to pull American forces out of Syria, one official said.The announcement came hours after Jim Mattis, the secretary of defense, said that he would resign from his position at the end of February after disagreeing with the president over his approach to policy in the Middle East.The whirlwind of troop withdrawals and the resignation of Mr
Mattis leave a murky picture for what is next in the United States’ longest war, and they come as Afghanistan has been troubled by spasms of violence afflicting the capital, Kabul, and other i

[nltk_data] Downloading package stopwords to C:\Users\Aazhmeer
[nltk_data]     Chhapra\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
