### Analyze Emotions

In [25]:
from textblob import TextBlob
from transformers import pipeline
from newsplease import NewsPlease
import re

In [26]:
emotions_class = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
classes_class = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")



In [27]:
article = NewsPlease.from_url("https://www.npr.org/2023/12/22/1221230635/japan-alleged-political-corruption-ldp-slush-fund")

In [28]:
article.description

"Japan's governing Liberal Democratic Party replaced two of its top executives as part of a purge related to investigations into alleged political slush funds."

In [29]:
article_text = article.title + "\n" + article.maintext

In [30]:
# Splitting the text into sentences
sentences = re.split(r'(?<=[.!?])\s+', article_text)

# Creating a dictionary with the number of characters, words, and the content of each sentence
sentence_dict = []
for i, sentence in enumerate(sentences, 1):
    words = sentence.split()
    dict_append = {
        'chars': len(sentence), 
        'words': len(words), 
        'content': sentence,
        'emotions': {}
    }
    for item in emotions_class(sentence)[0]:
        dict_append['emotions'][item['label']] = round(item['score'] * 100, 2)

    sentence_dict.append(dict_append)

In [31]:
emotion_sum_char = 0
emotion_value = 0
emotions_percentage = {}

for emotion in ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']:
    for sentence in sentence_dict:
        emotion_value += sentence['chars'] * sentence['emotions'][emotion]
        emotion_sum_char += sentence['chars']
    emotions_percentage[emotion] = emotion_value / emotion_sum_char
    emotion_value = 0
    emotion_sum_char = 0

In [32]:
emotions_percentage

{'anger': 12.074123116979731,
 'disgust': 25.135253859029195,
 'fear': 3.9958601450623035,
 'joy': 1.4329570392412125,
 'neutral': 49.124999070113454,
 'sadness': 7.03720104147294,
 'surprise': 1.2025813650734611}

In [33]:
sum_emotions = 0
for i in emotions_percentage.values():
    sum_emotions += i
sum_emotions

100.00297563697228

In [34]:
blob = TextBlob(article_text)

blob.sentiment

Sentiment(polarity=-0.0023160173160173144, subjectivity=0.39767316017316023)

In [39]:
candidate_labels = ["Politics", "Economy", "Finance"]
classes_class(article_text, candidate_labels, multi_label=True)['scores'], classes_class(article.description, candidate_labels, multi_label=True)['scores']

([0.3441440463066101, 0.2618587613105774, 0.21763548254966736],
 [0.8734523057937622, 0.257985383272171, 0.15992116928100586])

In [40]:
candidate_labels_2 = ["Elections", "International Relations", "Policy Reforms", "Legislation", "Civil Right", "Defense And Security", "Local Governance", "Politics Scandals", "Public Opinion", "Political Parties", "Global Economy", "Stock Market", "Banking", "Real Estate", "Cryptocurrencies", "Personal Finance", "Insurance", "Taxation", "Corporate Finance", "Economic Policies", "Climate Change", "Renewable Energy", "Wildlife", "Pollution", "Sustainable Living", "Natural Disasters", "Agriculture", "Water Resources", "Environment Laws", "Biodiversity"]

classes_class(article_text, candidate_labels_2[:10], multi_label=True)['scores']

[0.4883885383605957,
 0.3972182273864746,
 0.21122926473617554,
 0.20946666598320007,
 0.2053522765636444,
 0.20320242643356323,
 0.20069660246372223,
 0.19288396835327148,
 0.19221828877925873,
 0.17076162993907928]

In [41]:
classes_class(article.description, candidate_labels_2[:10], multi_label=True)['scores']

[0.9211400151252747,
 0.6830674409866333,
 0.2604702413082123,
 0.1896524429321289,
 0.16527467966079712,
 0.1205194815993309,
 0.09266605228185654,
 0.07585158944129944,
 0.017313361167907715,
 0.006902643013745546]

## Download models

In [54]:
emotions_class.model.save_pretrained('./models/emotions_classifier')
emotions_class.tokenizer.save_pretrained('./models/emotions_classifier')

classes_class.model.save_pretrained('./models/classes_classifier')
classes_class.tokenizer.save_pretrained('./models/classes_classifier')

('./models/classes_classifier\\tokenizer_config.json',
 './models/classes_classifier\\special_tokens_map.json',
 './models/classes_classifier\\spm.model',
 './models/classes_classifier\\added_tokens.json',
 './models/classes_classifier\\tokenizer.json')

In [55]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the emotion classification model
emotion_model_path = './models/emotions_classifier'

# Load the zero-shot classification model
classification_model_path = './models/classes_classifier'

In [58]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_path)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_path)
emotions_class = pipeline("text-classification", model=emotion_model, tokenizer=emotion_tokenizer, return_all_scores=True)

classification_tokenizer = AutoTokenizer.from_pretrained(classification_model_path)
classification_model = AutoModelForSequenceClassification.from_pretrained(classification_model_path)
classes_class = pipeline("zero-shot-classification", model=classification_model, tokenizer=classification_tokenizer)




In [57]:
classes_class("I am shocked", ['happiness', 'surprise', 'sad'])

{'sequence': 'I am shocked',
 'labels': ['surprise', 'sad', 'happiness'],
 'scores': [0.9425114393234253, 0.050712697207927704, 0.006775845307856798]}

## NewsScraper

In [None]:
article = NewsPlease.from_url("https://www.npr.org/2023/12/22/1221230635/japan-alleged-political-corruption-ldp-slush-fund")