In [1]:
''' import libraries
Autotokenizer: to count the number of tokens in a text
pipeline: to use the sentiment analysis model from huggingface
pandas: manipulate dataframes'''

from transformers import AutoTokenizer, pipeline
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
''' load the data'''

path = r'..\..\data\cleaned_output.csv'
news = pd.read_csv(path)
print('the number of articles is: ', len(news))

the number of articles is:  1190


In [3]:
''' load the same tokenizer as the one used to train the model'''

tokenizer = AutoTokenizer.from_pretrained("lxyuan/distilbert-base-multilingual-cased-sentiments-student")

In [15]:
''' define a function that takes a text as input and returns the sentiment label and score
if the text is too long, it will be truncated to 500 tokens'''

def get_sentiment(text):
    encoded_input = tokenizer(
        text, 
        truncation=True, 
        max_length=500, 
        return_tensors=None
    )
    truncated_text = tokenizer.decode(encoded_input["input_ids"])
    sentiment_check = pipeline(
        model="lxyuan/distilbert-base-multilingual-cased-sentiments-student", 
        top_k=3
    )
    sentiment_result = sentiment_check(truncated_text)

    # Extract the first label and score from the first list
    sentiment_label = sentiment_result[0][0]['label']
    sentiment_score = sentiment_result[0][0]['score']
    return sentiment_label, sentiment_score

In [31]:
''' For testing purposes create a short dataframe. Otherwise news_short = news
Assign the sentiment and score to the news_short then filter only the positive news as a new DataFrame'''

news_short = news.head(5).copy()
news_short[['sentiment', 'score']] = news_short['cleaned_content'].apply(lambda x: pd.Series(get_sentiment(x)))
positive_news = news_short[news_short['sentiment'] == 'positive']

In [32]:
positive_news.sort_values(by=['score'], ascending=False)

Unnamed: 0,link,title,content,cleaned_content,sentiment,score
2,https://www.today.com/popculture/travis-kelce-...,Travis Kelce and Chiefs hilariously trade Swif...,"<p class=""styles_content__a8lrE"" data-testid=""...",Brands\nAhead of the team's matchup against t...,positive,0.430348
3,https://www.today.com/food/news/applebees-doll...,Applebee’s brings back fan-favorite menu item ...,"<p class=""styles_content__a8lrE"" data-testid=""...",Brands\nOctober is the start of spooky season...,positive,0.414094
