In [3]:
from nltk.cluster.util import cosine_distance
from nltk.corpus import stopwords
import numpy as np
import pandas as pd
import networkx as nx
from string import punctuation
import nltk
nltk.download('stopwords')
import io

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
def data_summary(data,top_n):
    stop_words = set(stopwords.words("english"))
    summary=[]
    for X in data:

        #----------------------------------------------------
        #reading sentence

        sentences = nltk.tokenize.sent_tokenize(X)
        #print(sentences)
        #----------------------------------------------------
        #sentence similarity

        def sentence_similarity(sent1, sent2, stopwords=None):
            if stopwords is None:
                stopwords = []

            sent1 = [w.lower() for w in sent1]
            sent2 = [w.lower() for w in sent2]
            #print(sent1,sent2)
            all_words = list(set(sent1 + sent2))

            vector1 = [0] * len(all_words)
            vector2 = [0] * len(all_words)

            # build the vector for the first sentence
            for w in sent1:
                if w in stopwords:
                    continue
                vector1[all_words.index(w)] += 1

            # build the vector for the second sentence
            for w in sent2:
                if w in stopwords:
                    continue
                vector2[all_words.index(w)] += 1

            return 1 - cosine_distance(vector1, vector2)

        #print("sentence_similarity :",sentence_similarity,'\n','-'*50)

        #-----------------------------------------------------------------------------
        #similarity matrix

        similarity_matrix = np.zeros((len(sentences), len(sentences)))
        for idx1 in range(len(sentences)):
                for idx2 in range(len(sentences)):
                    if idx1 == idx2: #ignore if both are same sentences
                        continue 
                    similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)


        #print("similarity matrix :",similarity_matrix,'\n','-'*50)

        #------------------------------------------------------------------------------
        #ranking sentence 

        sentence_similarity_graph = nx.from_numpy_array(similarity_matrix)
        scores = nx.pagerank(sentence_similarity_graph)
        ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True) 
        #print("ranked_sentence :",ranked_sentence,'\n','-'*50)

        #--------------------------------------------------------------------------------
        #summarize text
        summarize_text = []


        #top_n = 15
        for i in range(top_n):
            if X != '-' :
                summarize_text.append("".join(ranked_sentence[i][1]))
            else :
                summarize_text.append('-')
        #print("Summarize Text: \n", ". ".join(summarize_text))
        #print('-'*110)

        summary.append(". ".join(summarize_text))
    
    return summary

### ----------------------------------------------------------------------------------------------------------------------------------------------------------------

## Testing with real news data

In [9]:
news1 = '''Seven policemen in Kanpur were quarantined and their samples were sent for examination on Friday after a
22-year-old man arrested on charges of cow slaughter tested positive for the coronavirus disease (Covid-19). Overall,
23 policemen in the district have tested positive till date.On Thursday, the police raided a house in Deputy Padaho
locality, and claimed to have recovered 16 kg meat and weapons allegedly used for slaughtering. The 22-year-old was
held with three others. The arrest was registered only on Friday. The police said the meat would be sent to a laboratory
for testing since it was suspected to be beef. “While the formalities were being done, we got information that samples
of one of the four were collected randomly a few days back in Anwarganj. We checked with officials and found his report
was positive,” said Chamanganj Station House Officer Raj Bahadur Singh.'''


given_headline_news1 = '''Seven Kanpur cops quarantined after accused tests positive. '''

In [10]:
news1_summary = data_summary(pd.Series([news1]),3)
news1_summary

['“While the formalities were being done, we got information that samples\nof one of the four were collected randomly a few days back in Anwarganj.. Seven policemen in Kanpur were quarantined and their samples were sent for examination on Friday after a\n22-year-old man arrested on charges of cow slaughter tested positive for the coronavirus disease (Covid-19).. Overall,\n23 policemen in the district have tested positive till date.On Thursday, the police raided a house in Deputy Padaho\nlocality, and claimed to have recovered 16 kg meat and weapons allegedly used for slaughtering.']

In [11]:
news2 = '''Chief Minister Mamata Banerjee on Saturday announced that her government had decided to bear the entire cost
of the return journey of migrant workers from West Bengal stranded in other states, even as the administration said an
international flight carrying 160 citizens from Dhaka would land in the city on Monday. Earlier this month, the Centre
had said 85 per cent of the cost of train tickets would be borne by the Ministry of Railways, while the rest would have
to be paid by the states. Banerjee’s announcement came a day after Railway Minister Piyush Goyal accused a number of
states, including West Bengal, of not granting permission to receive the trains. The states dismissed the minister’s
claim. “Saluting the toil faced by our migrant brethren, I am pleased to announce the decision of GoWB to bear the entire
cost of movement for our migrant workers by special trains from other states to West Bengal. No migrant will be charged.
Letter to Railway Board attached,” tweeted Banerjee, attaching Chief Secretary Rajiva Sinha’s letter to Railway Board 
Chairperson VK Yadav. In his letter, Sinha wrote, “I would like to confirm that the entire cost of the movement by
special trains to West Bengal, of the migrants of the state stranded in various parts of the country, shall be borne 
by the Government of West Bengal.” Later in the afternoon, Home Secretary Alapan Bandyopadhyay briefed reporters about
this decision at state secretariat Nabanna. “These trains are only for stranded people who are under distress, and not 
for normal movements. All migrant labourers, students, and pilgrims can come back to West Bengal without any tickets. 
The state government will bear the cost,” he added. Giving an update about the 105 additional special trains that Mamata
Banerjee had announced on Thursday, the Home Secretary said: “Already seven trains have entered the state. Two more are 
coming. Total 105 trains are coming. Out of which, 28 are coming from Kerala, 18 from Maharashtra, 10 from Tamil Nadu, 
seven from Uttar Pradesh, six each from Delhi, Haryana and Rajasthan, five each from Gujarat, Karnataka and Telangana,
three from Andhra Pradesh, two from Punjab, and one each from Himachal Pradesh, Jammu and Kashmir, Madhya Pradesh and 
Uttarakhand.” Bandyopadhyay said the effort to bring back those stranded was a “huge and herculean exercise”.'''

given_headline_news2 = '''West Bengal will cover cost of migrants’ train journey home: Mamata Banerjee'''

In [12]:
news2_summary = data_summary(pd.Series([news2]),2)
news2_summary

['In his letter, Sinha wrote, “I would like to confirm that the entire cost of the movement by\nspecial trains to West Bengal, of the migrants of the state stranded in various parts of the country, shall be borne \nby the Government of West Bengal.” Later in the afternoon, Home Secretary Alapan Bandyopadhyay briefed reporters about\nthis decision at state secretariat Nabanna.. “Saluting the toil faced by our migrant brethren, I am pleased to announce the decision of GoWB to bear the entire\ncost of movement for our migrant workers by special trains from other states to West Bengal.']

In [13]:
news3 = '''In a direct snub to Bihar Chief Minister and JD(U) leader Nitish Kumar, Lok Jan Shakti Party (LJP) 
president Chirag Paswan asserted that only the BJP can decide who would lead the National Democratic Alliance (NDA) 
in Assembly elections slated for end of this year. His statement comes ahead of Home Minister Amit Shah’s digital rally
addressing the State voters on Sunday. Mr. Paswan’s statement comes at a time when voices within the BJP have been 
picking faults with Mr. Kumar’s handling of the COVID-19 crisis. Many have been demanding that Mr. Kumar hand over 
the Chief Minister’s position to the BJP. Mr Paswan’s statement is convenient for the BJP, which so far officially 
maintains that Mr. Kumar will continue to be the face of the NDA. “Who will be the face of the NDA and lead us into 
the Assembly elections will be decided by the BJP, which is the largest constituent of the NDA in the State. Whatever 
decision the BJP will take in the interest of Bihar and the coalition, the Lok Jan Shakti Party will fully support it,” 
Mr Paswan tweeted on Saturday. He has openly slammed Mr. Kumar, for what he called delayed response in getting stranded 
students and migrant workers back during the extended lockdown. The LJP, meanwhile, is struggling to remain politically 
relevant in a State where at present it has not a single MLA. In 2015, it fought in alliance with the BJP, the Hindustan 
Awam Morcha and the Rashtriya Lok Samata Party. Out of the 243 seats, they got 42 in the alliance but could only win two. 
Even the two MLAs it managed to send to the Assembly shifted loyalties later on. The JD(U), in 2015 was with the 
‘grand alliance’ with the RJD and the Congress. The LJP is worried that it may not get 42 seats, like it got in the 
last elections, with the JD(U) back in the NDA. Concentrating on an online campaign now, Mr. Paswan has come up with 
a slogan “Bihar 1st, Bihari 1st” for the elections. In November last, the party had begun a membership drive aiming to 
connect 50 lakh new members. But so far, according to their own estimates, only 31 lakh members have joined.'''

given_headline_news3 = '''BJP will decide face of NDA in Bihar Assembly polls: LJP'''

In [14]:
news3_summary = data_summary(pd.Series([news3]),3)
news3_summary

['In a direct snub to Bihar Chief Minister and JD(U) leader Nitish Kumar, Lok Jan Shakti Party (LJP) \npresident Chirag Paswan asserted that only the BJP can decide who would lead the National Democratic Alliance (NDA) \nin Assembly elections slated for end of this year.. Whatever \ndecision the BJP will take in the interest of Bihar and the coalition, the Lok Jan Shakti Party will fully support it,” \nMr Paswan tweeted on Saturday.. But so far, according to their own estimates, only 31 lakh members have joined.']

In [15]:
news4 = '''Two suspended Buffalo, New York, police officers pleaded not guilty Saturday to assaulting a 75-year-old man during a protest against racism and police brutality.
Both were charged with one count of assault in the second degree and have been released on their own recognizance without bail.
They were arraigned via video conference and had the same attorney.
The officers, who were with the Police Department's Emergency Response Team, are scheduled to appear in court again July 20.
Video from a demonstration Thursday shows two officers pushing Martin Gugino back. He falls to the sidewalk, where his head bleeds. The officers and others then walk by him, some looking down at him as they pass.
Gugino was hospitalized with a head injury.
Gov. Andrew Cuomo said Friday the officers should be fired and prosecutors should move "fairly but quickly."
"When I saw the video, I got sick to my stomach," Cuomo said. "I would encourage the district attorney not to do what happened in Minneapolis, which the delay itself caused issues. People don't want vaguery. They are upset and want answers."
Mayor Byron Brown told CNN's Chris Cuomo on Friday night that he was told that emergency response team officers are trained to keep moving forward.
"Embedded with them are medics, officers with first-aid training," he said. "The medics were just behind the first line unit that continued to move forward, and within seconds, the medics rendered first-aid assistance."
Also Friday, 57 officers resigned from the Emergency Response Team. Brown said that's because they felt pressured by the police union.'''

In [16]:
news4_summary = data_summary(pd.Series([news4]),3)
news4_summary

['Both were charged with one count of assault in the second degree and have been released on their own recognizance without bail.. "I would encourage the district attorney not to do what happened in Minneapolis, which the delay itself caused issues.. "The medics were just behind the first line unit that continued to move forward, and within seconds, the medics rendered first-aid assistance."']