In [None]:
pip install transformers sentencepiece

In [8]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
import torch.nn.functional as F

In [12]:
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
text = "This is amazing!"
inputs = tokenizer(text, return_tensors="pt", truncation=True)

In [7]:
merged = pd.read_csv("../transcribe/data.csv")
merged['sentiment_score'] = None
merged['sentiment_confidence'] = None
merged['hook_score'] = None
merged['hook_confidence'] = None

In [13]:
for idx, row in merged.iterrows():
    text = row['text']
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=1)
    pred_class = torch.argmax(probs, dim=1).item() + 1
    confidence = probs[0, pred_class - 1].item()
    merged.at[idx, 'sentiment_score'] = pred_class
    merged.at[idx, 'sentiment_confidence'] = round(confidence, 4)

In [14]:
print(merged.head())

   start    end                                               text  \
0   0.00   3.24   The following is a conversation with Jack Wea...   
1   3.24   5.32                      anthropologist and historian,   
2   5.32   9.80   specializing in Genghis Khan and the Mongol E...   
3   9.80  12.40      He has written a legendary book on this topic   
4  12.40  15.40   titled Genghis Khan and the Making of the Mod...   

      speaker sentiment_score sentiment_confidence hook_score hook_confidence  
0  SPEAKER_01               4               0.4115       None            None  
1  SPEAKER_01               5                0.336       None            None  
2  SPEAKER_01               4               0.4449       None            None  
3  SPEAKER_01               5               0.6852       None            None  
4  SPEAKER_01               5               0.3773       None            None  


In [15]:
merged.to_csv("analysis.csv", index=False)