In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize,sent_tokenize
from collections import Counter

In [None]:
data='/content/drive/MyDrive/Datasets/True.csv'

In [None]:
df=pd.read_csv(data)

In [None]:
df=df.iloc[:500,:]


In [None]:
df.shape

(500, 4)

# ***Extractive Method (I)-'Frequency'***

In [None]:
df['text']=df['text'].apply(lambda x:x.lower())

In [None]:
df['text']=df['text'].apply(lambda x:re.sub(r'^\w+\s\(\w+\)\s-\s', '', x) )

In [None]:
df['text']

Unnamed: 0,text
0,the head of a conservative republican faction ...
1,transgender people will be allowed for the fir...
2,the special counsel investigation of links bet...
3,trump campaign adviser george papadopoulos tol...
4,seattle/washington (reuters) - president donal...
...,...
495,the following statements were posted to the ve...
496,"leandra english, who is in a legal battle with..."
497,u.s. senator elizabeth warren said on tuesday ...
498,the top two republicans in the u.s. congress s...


In [None]:
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
def preprocess_text(text):
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return " ".join(tokens)

df["cleaned_text"] = df["text"].apply(preprocess_text)

In [None]:
df["cleaned_text"]

Unnamed: 0,cleaned_text
0,head conservative republican faction u.s. cong...
1,transgender people allowed first time enlist u...
2,special counsel investigation links russia pre...
3,trump campaign adviser george papadopoulos tol...
4,seattle/washington ( reuters ) - president don...
...,...
495,following statements posted verified twitter a...
496,"leandra english , legal battle trump administr..."
497,u.s. senator elizabeth warren said tuesday “ d...
498,top two republicans u.s. congress said leading...


In [None]:
df['cleaned_text'][1]

'transgender people allowed first time enlist u.s. military starting monday ordered federal courts , pentagon said friday , president donald trump ’ administration decided appeal rulings blocked transgender ban . two federal appeals courts , one washington one virginia , last week rejected administration ’ request put hold orders lower court judges requiring military begin accepting transgender recruits jan. 1. justice department official said administration challenge rulings . “ department defense announced releasing independent study issues coming weeks . rather litigate interim appeal occurs , administration decided wait dod ’ study continue defend president ’ lawful authority district court meantime , ” official said , speaking condition anonymity . september , pentagon said created panel senior officials study implement directive trump prohibit transgender individuals serving . defense department feb. 21 submit plan trump . lawyers representing currently-serving transgender servic

In [None]:
df["tokens"] = df["cleaned_text"].apply(lambda text: word_tokenize(re.sub(r'\W', ' ', text.lower())))

In [None]:
df["word_freq"] = df["tokens"].apply(lambda x: Counter(x))

In [None]:
df[["word_freq"]]

Unnamed: 0,word_freq
0,"{'head': 1, 'conservative': 3, 'republican': 6..."
1,"{'transgender': 15, 'people': 2, 'allowed': 1,..."
2,"{'special': 2, 'counsel': 2, 'investigation': ..."
3,"{'trump': 10, 'campaign': 5, 'adviser': 1, 'ge..."
4,"{'seattle': 2, 'washington': 3, 'reuters': 1, ..."
...,...
495,"{'following': 1, 'statements': 2, 'posted': 1,..."
496,"{'leandra': 1, 'english': 1, 'legal': 1, 'batt..."
497,"{'u': 4, 's': 4, 'senator': 1, 'elizabeth': 1,..."
498,"{'top': 1, 'two': 1, 'republicans': 2, 'u': 1,..."


In [None]:
df["word_freq"][1]

Counter({'transgender': 15,
         'people': 2,
         'allowed': 1,
         'first': 1,
         'time': 2,
         'enlist': 2,
         'u': 2,
         's': 2,
         'military': 7,
         'starting': 1,
         'monday': 1,
         'ordered': 1,
         'federal': 4,
         'courts': 2,
         'pentagon': 4,
         'said': 11,
         'friday': 1,
         'president': 5,
         'donald': 1,
         'trump': 10,
         'administration': 10,
         'decided': 2,
         'appeal': 4,
         'rulings': 4,
         'blocked': 1,
         'ban': 5,
         'two': 1,
         'appeals': 1,
         'one': 2,
         'washington': 2,
         'virginia': 1,
         'last': 1,
         'week': 1,
         'rejected': 1,
         'request': 1,
         'put': 2,
         'hold': 1,
         'orders': 1,
         'lower': 1,
         'court': 4,
         'judges': 3,
         'requiring': 1,
         'begin': 3,
         'accepting': 3,
         'recruits': 

In [None]:
df["sentences"] = df["text"].apply(lambda text: sent_tokenize(text))
df[["sentences"]]

Unnamed: 0,sentences
0,[the head of a conservative republican faction...
1,[transgender people will be allowed for the fi...
2,[the special counsel investigation of links be...
3,[trump campaign adviser george papadopoulos to...
4,[seattle/washington (reuters) - president dona...
...,...
495,[the following statements were posted to the v...
496,"[leandra english, who is in a legal battle wit..."
497,[u.s. senator elizabeth warren said on tuesday...
498,[the top two republicans in the u.s. congress ...


In [None]:
df["sentences"][1]

['transgender people will be allowed for the first time to enlist in the u.s. military starting on monday as ordered by federal courts, the pentagon said on friday, after president donald trump’s administration decided not to appeal rulings that blocked his transgender ban.',
 'two federal appeals courts, one in washington and one in virginia, last week rejected the administration’s request to put on hold orders by lower court judges requiring the military to begin accepting transgender recruits on jan. 1. a justice department official said the administration will not challenge those rulings.',
 '“the department of defense has announced that it will be releasing an independent study of these issues in the coming weeks.',
 'so rather than litigate this interim appeal before that occurs, the administration has decided to wait for dod’s study and will continue to defend the president’s lawful authority in district court in the meantime,” the official said, speaking on condition of anonymi

In [None]:
sentence_scores_list = []
for i, sentences in enumerate(df["sentences"]):
    word_freq = df["word_freq"][i]
    sentence_scores = {}

    for sentence in sentences:
        words = word_tokenize(sentence.lower())
        score = sum(word_freq[j] for j in words if j in word_freq)
        sentence_scores[sentence] = score

    sentence_scores_list.append(sentence_scores)

df["sentence_scores"] = sentence_scores_list
df[["sentence_scores"]]

Unnamed: 0,sentence_scores
0,{'the head of a conservative republican factio...
1,{'transgender people will be allowed for the f...
2,{'the special counsel investigation of links b...
3,{'trump campaign adviser george papadopoulos t...
4,{'seattle/washington (reuters) - president don...
...,...
495,{'the following statements were posted to the ...
496,"{'leandra english, who is in a legal battle wi..."
497,{'u.s. senator elizabeth warren said on tuesda...
498,{'the top two republicans in the u.s. congress...


In [None]:
word_freq

Counter({'reuters': 1,
         'u': 4,
         's': 4,
         'congress': 6,
         'careening': 1,
         'toward': 1,
         'major': 1,
         'deadlines': 1,
         'republican': 5,
         'tax': 7,
         'bill': 6,
         'budget': 2,
         'policies': 1,
         'outlook': 1,
         'promises': 1,
         'sprint': 1,
         'end': 3,
         '2017': 3,
         'tuesday': 3,
         'nov': 3,
         '28': 1,
         'senate': 9,
         'committee': 1,
         'voted': 1,
         'send': 1,
         'cut': 1,
         'legislation': 3,
         'floor': 2,
         'vote': 3,
         'possibly': 1,
         'soon': 1,
         'thursday': 3,
         '51': 1,
         'votes': 1,
         'needed': 2,
         'passage': 1,
         '30': 2,
         'friday': 4,
         'dec': 6,
         '1': 2,
         'possible': 3,
         'final': 1,
         'although': 1,
         'delay': 1,
         'ahead': 1,
         'several': 1,
         '

In [None]:
df["sentence_scores"][1]

{'transgender people will be allowed for the first time to enlist in the u.s. military starting on monday as ordered by federal courts, the pentagon said on friday, after president donald trump’s administration decided not to appeal rulings that blocked his transgender ban.': 114,
 'two federal appeals courts, one in washington and one in virginia, last week rejected the administration’s request to put on hold orders by lower court judges requiring the military to begin accepting transgender recruits on jan. 1. a justice department official said the administration will not challenge those rulings.': 108,
 '“the department of defense has announced that it will be releasing an independent study of these issues in the coming weeks.': 18,
 'so rather than litigate this interim appeal before that occurs, the administration has decided to wait for dod’s study and will continue to defend the president’s lawful authority in district court in the meantime,” the official said, speaking on condit

In [None]:
summaries = []
for scores in df["sentence_scores"]:
    sorted_sentences = sorted(scores, key=scores.get, reverse=True)[:2]
    summary = " ".join(sorted_sentences)
    summaries.append(summary)

df["extractive_summary"] = summaries

In [None]:
print(df['text'][1])
print()
print(df['extractive_summary'][1])

transgender people will be allowed for the first time to enlist in the u.s. military starting on monday as ordered by federal courts, the pentagon said on friday, after president donald trump’s administration decided not to appeal rulings that blocked his transgender ban. two federal appeals courts, one in washington and one in virginia, last week rejected the administration’s request to put on hold orders by lower court judges requiring the military to begin accepting transgender recruits on jan. 1. a justice department official said the administration will not challenge those rulings. “the department of defense has announced that it will be releasing an independent study of these issues in the coming weeks. so rather than litigate this interim appeal before that occurs, the administration has decided to wait for dod’s study and will continue to defend the president’s lawful authority in district court in the meantime,” the official said, speaking on condition of anonymity. in septemb

# ***Executive Summary (II-'Weighted_Frequency')***

In [None]:
sentence_scores_list_1 = []
for i, sentences in enumerate(df["sentences"]):
    word_freq = df["word_freq"][i]
    max_freq =  max(word_freq.values())
    sentence_scores_1 = {}

    for sentence in sentences:
        words = word_tokenize(sentence.lower())
        score = sum(word_freq[j]/max_freq for j in words if j in word_freq)
        sentence_scores_1[sentence] = score

    sentence_scores_list_1.append(sentence_scores_1)

df["sentence_scores_1"] = sentence_scores_list_1
df[["sentence_scores_1"]]

Unnamed: 0,sentence_scores_1
0,{'the head of a conservative republican factio...
1,{'transgender people will be allowed for the f...
2,{'the special counsel investigation of links b...
3,{'trump campaign adviser george papadopoulos t...
4,{'seattle/washington (reuters) - president don...
...,...
495,{'the following statements were posted to the ...
496,"{'leandra english, who is in a legal battle wi..."
497,{'u.s. senator elizabeth warren said on tuesda...
498,{'the top two republicans in the u.s. congress...


In [None]:
df["sentence_scores_1"][1]

{'transgender people will be allowed for the first time to enlist in the u.s. military starting on monday as ordered by federal courts, the pentagon said on friday, after president donald trump’s administration decided not to appeal rulings that blocked his transgender ban.': 7.600000000000001,
 'two federal appeals courts, one in washington and one in virginia, last week rejected the administration’s request to put on hold orders by lower court judges requiring the military to begin accepting transgender recruits on jan. 1. a justice department official said the administration will not challenge those rulings.': 7.200000000000002,
 '“the department of defense has announced that it will be releasing an independent study of these issues in the coming weeks.': 1.2,
 'so rather than litigate this interim appeal before that occurs, the administration has decided to wait for dod’s study and will continue to defend the president’s lawful authority in district court in the meantime,” the offi

In [None]:
summeries_1=[]
for scores in df['sentence_scores_1']:
  sorted_sentences_1=sorted(scores,key=scores.get,reverse=True)[:2]
  summary=' '.join(sorted_sentences_1)
  summeries_1.append(summary)
df['executive_summary_1']=summeries_1

In [None]:
df['executive_summary_1'][1]

'pentagon spokeswoman heather babb said in a statement: “as mandated by court order, the department of defense is prepared to begin accessing transgender applicants for military service jan. 1. all applicants must meet all accession standards.” jennifer levi, a lawyer with gay, lesbian and transgender advocacy group glad, called the decision not to appeal “great news.” “i’m hoping it means the government has come to see that there is no way to justify a ban and that it’s not good for the military or our country,” levi said. the trump administration previously said in legal papers that the armed forces were not prepared to train thousands of personnel on the medical standards needed to process transgender applicants and might have to accept “some individuals who are not medically fit for service.” the obama administration had set a deadline of july 1, 2017, to begin accepting transgender recruits.'

In [None]:
print(df['extractive_summary'][2])
print()
print(df['executive_summary_1'][2])

the special counsel investigation of links between russia and president trump’s 2016 election campaign should continue without interference in 2018, despite calls from some trump administration allies and republican lawmakers to shut it down, a prominent republican senator said on sunday. trump himself seemed to undercut his supporters in an interview last week with the new york times in which he said he expected mueller was “going to be fair.”    russia’s role in the election and the question of possible links to the trump campaign are the focus of multiple inquiries in washington.

the special counsel investigation of links between russia and president trump’s 2016 election campaign should continue without interference in 2018, despite calls from some trump administration allies and republican lawmakers to shut it down, a prominent republican senator said on sunday. graham said he still wants an examination of the fbi’s use of a dossier on links between trump and russia that was comp