## Import needed modules

In [1]:
from nltk.corpus import stopwords #you can remove stop words for speed
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

## Open file and split into sentences

In [2]:
file = open("C://Users//123ch//OneDrive//Desktop//Text4.txt", "r")
#This file contains one paragraph of multiple sentences
filedata = file.readlines()
article = filedata[0].split(". ") #Just do the first paragraph

sentences = []
for sentence in article:
    print(sentence)
    sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))

Imagine there's no heaven
It's easy if you try
No hell below us
Above us, only sky
Imagine all the people livin' for today
Imagine there's no countries
It isn't hard to do
Nothing to kill or die for and no religion, too
Imagine all the people livin' life in peace
You may say I'm a dreamer but I'm not the only one
I hope someday you'll join us and the world will be as one
Imagine no possessions
I wonder if you can
No need for greed or hunger
A brotherhood of man
Imagine all the people sharing all the world




## Our data: a list of sentences

In [3]:
print("Sentences are ", sentences)

Sentences are  [['Imagine', "there's", 'no', 'heaven'], ["It's", 'easy', 'if', 'you', 'try'], ['No', 'hell', 'below', 'us'], ['Above', 'us,', 'only', 'sky'], ['Imagine', 'all', 'the', 'people', "livin'", 'for', 'today'], ['Imagine', "there's", 'no', 'countries'], ['It', "isn't", 'hard', 'to', 'do'], ['Nothing', 'to', 'kill', 'or', 'die', 'for', 'and', 'no', 'religion,', 'too'], ['Imagine', 'all', 'the', 'people', "livin'", 'life', 'in', 'peace'], ['You', 'may', 'say', "I'm", 'a', 'dreamer', 'but', "I'm", 'not', 'the', 'only', 'one'], ['I', 'hope', 'someday', "you'll", 'join', 'us', 'and', 'the', 'world', 'will', 'be', 'as', 'one'], ['Imagine', 'no', 'possessions'], ['I', 'wonder', 'if', 'you', 'can'], ['No', 'need', 'for', 'greed', 'or', 'hunger'], ['A', 'brotherhood', 'of', 'man'], ['Imagine', 'all', 'the', 'people', 'sharing', 'all', 'the', 'world'], ['\n']]


## Function to calculate similarity

In [4]:
def sentence_similarity(sent1, sent2 ):
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
    all_words = list(set(sent1 + sent2))
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
     # build the vector for the first sentence
    for w in sent1:
          vector1[all_words.index(w)] += 1
     # build the vector for the second sentence
    for w in sent2:
          vector2[all_words.index(w)] += 1
    return 1 - cosine_distance(vector1, vector2)

## Create the similarity matrix 

In [5]:
similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
             if idx1 == idx2: #ignore if both are same sentences
                continue 
             similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2])

print("Smilarity matrix \n", similarity_matrix)

Smilarity matrix 
 [[0.         0.         0.25       0.         0.18898224 0.75
  0.         0.15811388 0.1767767  0.         0.         0.57735027
  0.         0.20412415 0.         0.14433757 0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.11952286 0.         0.
  0.4        0.         0.         0.         0.        ]
 [0.25       0.         0.         0.         0.         0.25
  0.         0.15811388 0.         0.         0.13867505 0.28867513
  0.         0.20412415 0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.13363062 0.         0.
  0.         0.         0.         0.         0.        ]
 [0.18898224 0.         0.         0.         0.         0.18898224
  0.         0.11952286 0.6681531  0.10101525 0.10482848 0.21821789
  0.         0.15430335 0.         0.65465367 0.        ]
 [0.75       0.         0.25       0.         0.1889822

## Get the pagerank scores 

In [6]:
# Step 3 - Rank sentences in similarity martix
sentence_similarity_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(sentence_similarity_graph)
print("scores", scores)

scores {0: 0.09163334044231106, 1: 0.044085621995863154, 2: 0.05432094163511733, 3: 0.017795160457262718, 4: 0.09575693097837698, 5: 0.09163334044231106, 6: 0.014924507861294584, 7: 0.06531247594766292, 8: 0.0818694769377972, 9: 0.07522194125679457, 10: 0.05101030185038316, 11: 0.09222556696979167, 12: 0.05146059071771967, 13: 0.05778838761945696, 14: 0.017795160457262718, 15: 0.08787832873399998, 16: 0.009287925696594429}


## Sort sentences by pagerank

In [7]:
# Step 4 - Sort the rank and pick top sentences
ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
print("Indexes of top ranked_sentence order are \n\n", ranked_sentence)

Indexes of top ranked_sentence order are 

 [(0.09575693097837698, ['Imagine', 'all', 'the', 'people', "livin'", 'for', 'today']), (0.09222556696979167, ['Imagine', 'no', 'possessions']), (0.09163334044231106, ['Imagine', "there's", 'no', 'heaven']), (0.09163334044231106, ['Imagine', "there's", 'no', 'countries']), (0.08787832873399998, ['Imagine', 'all', 'the', 'people', 'sharing', 'all', 'the', 'world']), (0.0818694769377972, ['Imagine', 'all', 'the', 'people', "livin'", 'life', 'in', 'peace']), (0.07522194125679457, ['You', 'may', 'say', "I'm", 'a', 'dreamer', 'but', "I'm", 'not', 'the', 'only', 'one']), (0.06531247594766292, ['Nothing', 'to', 'kill', 'or', 'die', 'for', 'and', 'no', 'religion,', 'too']), (0.05778838761945696, ['No', 'need', 'for', 'greed', 'or', 'hunger']), (0.05432094163511733, ['No', 'hell', 'below', 'us']), (0.05146059071771967, ['I', 'wonder', 'if', 'you', 'can']), (0.05101030185038316, ['I', 'hope', 'someday', "you'll", 'join', 'us', 'and', 'the', 'world', 'wi

## Pick the top “n” sentences

In [8]:
#Step 5 - How many sentences to pick
n = int(input("How many sentences do you want in the summary? "))
#n=2
summarize_text = []
for i in range(n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))

How many sentences do you want in the summary? 5


## Finish off by printing summary

In [9]:
# Step 6 - Offcourse, output the summarize text
print("Summarize Text: \n", ". ".join(summarize_text))

Summarize Text: 
 Imagine all the people livin' for today. Imagine no possessions. Imagine there's no heaven. Imagine there's no countries. Imagine all the people sharing all the world
