In [2]:
import pandas as pd
from transformers import pipeline

In [2]:
# Model
classifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True)

All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at bhadresh-savani/distilbert-base-uncased-emotion.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [3]:
# Data
videos = pd.read_excel("data/depth_documents.xlsx")
videos

Unnamed: 0,title,video_id,url,channel,parent_id,parent,depth,lang
0,China Navy to BBC: 'Stay away from islands' - ...,LVeKbEgn50o,https://www.youtube.com/watch?v=LVeKbEgn50o,BBC News,nbBsh46aSz4,https://www.youtube.com/watch?v=nbBsh46aSz4,1,en
1,"Nike, H&M face backlash in China over Uighur s...",qC_qOoBFsMc,https://www.youtube.com/watch?v=qC_qOoBFsMc,DW News,s_eNkqwp_LA,https://www.youtube.com/watch?v=s_eNkqwp_LA,1,en
2,What do you do when someone just doesn't like ...,Y4gly9n9RBo,https://www.youtube.com/watch?v=Y4gly9n9RBo,TEDx Talks,s_eNkqwp_LA,https://www.youtube.com/watch?v=s_eNkqwp_LA,1,en
3,99 Years Later... We Solved It,JFFhD5HeByM,https://www.youtube.com/watch?v=JFFhD5HeByM,Physics Girl,tcKw5jiT6wg,https://www.youtube.com/watch?v=tcKw5jiT6wg,1,en
4,"Elon Musk talks Twitter, Tesla and how his bra...",cdZZpaB2kDM,https://www.youtube.com/watch?v=cdZZpaB2kDM,TED,tcKw5jiT6wg,https://www.youtube.com/watch?v=tcKw5jiT6wg,1,en
...,...,...,...,...,...,...,...,...
14327,The art of focus – a crucial ability | Christi...,xF80HzqvAoA,https://www.youtube.com/watch?v=xF80HzqvAoA,TEDx Talks,4AzpmZ7AjaQ,https://www.youtube.com/watch?v=4AzpmZ7AjaQ,5,en
14328,Why We Fear And Hold Back From What We Want | ...,tFDRXgDUea4,https://www.youtube.com/watch?v=tFDRXgDUea4,TEDx Talks,4AzpmZ7AjaQ,https://www.youtube.com/watch?v=4AzpmZ7AjaQ,5,en
14329,5 steps to designing the life you want | Bill ...,SemHh0n19LA,https://www.youtube.com/watch?v=SemHh0n19LA,TEDx Talks,vj-91dMvQQo,https://www.youtube.com/watch?v=vj-91dMvQQo,5,en
14330,Master Shi Heng Yi – 5 hindrances to self-mast...,4-079YIasck,https://www.youtube.com/watch?v=4-079YIasck,TEDx Talks,vj-91dMvQQo,https://www.youtube.com/watch?v=vj-91dMvQQo,5,en


In [5]:
# Example titles
titles = videos.title.astype(str).values.tolist()
titles[:10]

["China Navy to BBC: 'Stay away from islands' - BBC News",
 'Nike, H&M face backlash in China over Uighur stance | DW News',
 "What do you do when someone just doesn't like you? | Daryl Davis | TEDxCharlottesville",
 '99 Years Later... We Solved It',
 'Elon Musk talks Twitter, Tesla and how his brain works — live at TED2022',
 'MOFA: BBC is not trusted even in the UK',
 'By following @CNN , we find how they make fake news about Xinjiang',
 'British researcher exposes Western propaganda against China',
 'Do we see reality as it is? | Donald Hoffman',
 'By following @CNN , we find how they make fake news about Xinjiang']

In [6]:
# Emotion analysis for all titles using our model
all_title_emotions = classifier(titles)

In [7]:
title_emotion_labels = []
title_emotion_scores = []


# Find the maximum score for each prediction and assign its label
for title_emotions in all_title_emotions:
    
    max_score = 0
    emotion_label = None
    for emotion in title_emotions:
        if emotion['score'] > max_score:
            max_score = emotion['score']
            emotion_label = emotion['label']
            
    title_emotion_labels.append(emotion_label)
    title_emotion_scores.append(max_score)

In [8]:
# Add new values into the dataframe
videos['emotion'] = pd.Series(title_emotion_labels)
videos['score'] = pd.Series(title_emotion_scores)

In [9]:
# Save 
videos.to_csv("data/depth_videos_emotion.csv", index=False)