In [1]:
#pip install transformers==2.2.0
#pip install bert-extractive-summarizer

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import numpy as np
import pandas as pd
from transformers import BartTokenizer, BartForConditionalGeneration

In [5]:
def bert_text_summarizer(text):
    # Load pre-trained BART model and tokenizer
    model_name = 'facebook/bart-large-cnn'
    tokenizer = BartTokenizer.from_pretrained(model_name)
    model = BartForConditionalGeneration.from_pretrained(model_name)
    
    # Tokenize the text
    inputs = tokenizer.encode_plus(text, truncation=True, padding='longest', return_tensors='pt')
    
    # Generate the summary using BART
    summary_ids = model.generate(inputs['input_ids'], num_beams=4, length_penalty=2.0, max_length=142, min_length=56, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return summary

In [6]:
df_1 = pd.read_csv("articles1.csv")

In [7]:
df = df_1.iloc[:10]

In [8]:
df.shape

(10, 10)

In [9]:
df['summarized'] = df['content'].apply(bert_text_summarizer)

In [10]:
from transformers import pipeline

def zero_shot_classification(text, labels):
    # Load zero-shot classification pipeline
    classifier = pipeline("zero-shot-classification")
    
    # Perform zero-shot classification
    result = classifier(text, labels)
    
    # Return the classification result
    return result

In [11]:
#List out the labels as required to the context
labels = ["war", "country", "rule"]

In [12]:
df['classification'] = df['summarized'].apply(lambda x: zero_shot_classification(x, labels))

No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (h

In [13]:
type(df['classification'][0])

dict

In [14]:
df['label'] = df['classification'].apply(lambda x: x['labels'][0])
df['score'] = df['classification'].apply(lambda x: x['scores'][0])

In [15]:
results = df[['content','summarized','label']]

In [16]:
results

Unnamed: 0,content,summarized,label
0,WASHINGTON — Congressional Republicans have...,House Republicans have a new fear when it come...,rule
1,"After the bullet shells get counted, the blood...",Four of every five shootings in the 40th Preci...,rule
2,"When Walt Disney’s “Bambi” opened in 1942, cri...","Tyrus Wong, a Chinese immigrant, was one of th...",rule
3,"Death may be the great equalizer, but it isn’t...","The pop music world had, hands down, the bleak...",rule
4,"SEOUL, South Korea — North Korea’s leader, ...",North Korea has conducted five nuclear tests i...,country
5,"LONDON — Queen Elizabeth II, who has been b...",Queen Elizabeth II has been battling a cold fo...,country
6,BEIJING — President Tsai of Taiwan sharpl...,Taiwan's President Tsai says China is threaten...,country
7,"Danny Cahill stood, slightly dazed, in a blizz...","Study of ""The Biggest Loser"" contestants yield...",rule
8,"Just how is Hillary Kerr, the founder of ...",Hillary Kerr and Jonathan Leahy met at the 410...,rule
9,Angels are everywhere in the Muñiz family’s ap...,José and Zoraida Muñiz and their children have...,rule


In [17]:
#for more accurate classes setting a threshold to 0.75
threshold = 0.75
df['Filtered_Labels'] = df['classification'].apply(lambda x: [label for label, score in zip(x['labels'], x['scores']) if score >= threshold])

In [18]:
results = df[['content','summarized','label','Filtered_Labels']]

In [19]:
results

Unnamed: 0,content,summarized,label,Filtered_Labels
0,WASHINGTON — Congressional Republicans have...,House Republicans have a new fear when it come...,rule,[]
1,"After the bullet shells get counted, the blood...",Four of every five shootings in the 40th Preci...,rule,[]
2,"When Walt Disney’s “Bambi” opened in 1942, cri...","Tyrus Wong, a Chinese immigrant, was one of th...",rule,[]
3,"Death may be the great equalizer, but it isn’t...","The pop music world had, hands down, the bleak...",rule,[rule]
4,"SEOUL, South Korea — North Korea’s leader, ...",North Korea has conducted five nuclear tests i...,country,[]
5,"LONDON — Queen Elizabeth II, who has been b...",Queen Elizabeth II has been battling a cold fo...,country,[country]
6,BEIJING — President Tsai of Taiwan sharpl...,Taiwan's President Tsai says China is threaten...,country,[]
7,"Danny Cahill stood, slightly dazed, in a blizz...","Study of ""The Biggest Loser"" contestants yield...",rule,[]
8,"Just how is Hillary Kerr, the founder of ...",Hillary Kerr and Jonathan Leahy met at the 410...,rule,[rule]
9,Angels are everywhere in the Muñiz family’s ap...,José and Zoraida Muñiz and their children have...,rule,[]
