In [1]:
%pip install pandas transformers scipy tqdm


Collecting transformers
  Downloading transformers-4.40.2-py3-none-any.whl.metadata (137 kB)
     ---------------------------------------- 0.0/138.0 kB ? eta -:--:--
     -- ------------------------------------- 10.2/138.0 kB ? eta -:--:--
     -------- ---------------------------- 30.7/138.0 kB 330.3 kB/s eta 0:00:01
     ---------------- -------------------- 61.4/138.0 kB 469.7 kB/s eta 0:00:01
     ------------------------------------ 138.0/138.0 kB 742.7 kB/s eta 0:00:00
Collecting scipy
  Downloading scipy-1.13.0-cp312-cp312-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 0.0/60.6 kB ? eta -:--:--
     ---------------------------------------- 60.6/60.6 kB 3.1 MB/s eta 0:00:00
Collecting tqdm
  Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)
     ---------------------------------------- 0.0/57.6 kB ? eta -:--:--
     ---------------------------------------- 57.6/57.6 kB ? eta 0:00:00
Collecting filelock (from transformers)
  Downloading filelo



In [4]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
from tqdm.notebook import tqdm


In [5]:
# Load the translated comments
file_path = '../translated_comments.csv'
df = pd.read_csv(file_path)
df.head()


Unnamed: 0,translated_comments
0,South Africa knows very well that Morocco has ...
1,"A year ago, more than a billion dollars evapor..."
2,"Glory be to God, the states of South Africa an..."
3,"South Africa plays on the two ropes, pending t..."
4,From the Corrupt African Congress Party contro...


In [8]:
%pip install torch



Collecting torch
  Downloading torch-2.3.0-cp312-cp312-win_amd64.whl.metadata (26 kB)
Collecting sympy (from torch)
  Downloading sympy-1.12-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Downloading networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting mkl<=2021.4.0,>=2021.1.1 (from torch)
  Downloading mkl-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting intel-openmp==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Downloading intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.2 kB)
Collecting tbb==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Downloading tbb-2021.12.0-py3-none-win_amd64.whl.metadata (1.1 kB)
Collecting mpmath>=0.19 (from sympy->torch)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.3.0-cp312-cp312-win_amd64.whl (159.7 MB)
   ---------------------------------------- 0.0/159.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/159.7 MB ? eta -:--:--
   --------------



In [1]:
import torch
from transformers import AutoTokenizer, RobertaForSequenceClassification

# Load the RoBERTa tokenizer
MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)

# Load the RoBERTa model for sequence classification
model = RobertaForSequenceClassification.from_pretrained(MODEL)





pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [2]:
def polarity_scores_roberta(example):
    encoded_text = tokenizer(example, return_tensors='pt')
    output = model(**encoded_text)
    scores = output.logits.detach().cpu().numpy()
    scores = softmax(scores)
    scores_dict = {
        'roberta_neg': scores[0][0],
        'roberta_neu': scores[0][1],
        'roberta_pos': scores[0][2]
    }
    return scores_dict


In [6]:
res = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        text = row['translated_comments']
        roberta_result = polarity_scores_roberta(text)
        res[i] = roberta_result
    except RuntimeError as e:
        print(f'Broke for index {i} with error {e}')

results_df = pd.DataFrame(res).T
results_df = results_df.reset_index().rename(columns={'index': 'Id'})
results_df.head()


  0%|          | 0/948 [00:00<?, ?it/s]

Unnamed: 0,Id,roberta_neg,roberta_neu,roberta_pos
0,0,0.094935,0.449885,0.45518
1,1,0.842448,0.146817,0.010736
2,2,0.01302,0.171196,0.815784
3,3,0.226178,0.748648,0.025175
4,4,0.970052,0.028049,0.0019


In [7]:
# Add the original comments to the results DataFrame
final_df = results_df.merge(df, left_on='Id', right_index=True)
final_df.head()


Unnamed: 0,Id,roberta_neg,roberta_neu,roberta_pos,translated_comments
0,0,0.094935,0.449885,0.45518,South Africa knows very well that Morocco has ...
1,1,0.842448,0.146817,0.010736,"A year ago, more than a billion dollars evapor..."
2,2,0.01302,0.171196,0.815784,"Glory be to God, the states of South Africa an..."
3,3,0.226178,0.748648,0.025175,"South Africa plays on the two ropes, pending t..."
4,4,0.970052,0.028049,0.0019,From the Corrupt African Congress Party contro...


In [8]:
# Specify the output file path relative to the notebook location
output_file_path = '../sentiment_analysis_results.csv'

# Save the DataFrame with sentiment analysis results to a new CSV file
final_df.to_csv(output_file_path, index=False)

# Confirm the operation
final_df.head()


Unnamed: 0,Id,roberta_neg,roberta_neu,roberta_pos,translated_comments
0,0,0.094935,0.449885,0.45518,South Africa knows very well that Morocco has ...
1,1,0.842448,0.146817,0.010736,"A year ago, more than a billion dollars evapor..."
2,2,0.01302,0.171196,0.815784,"Glory be to God, the states of South Africa an..."
3,3,0.226178,0.748648,0.025175,"South Africa plays on the two ropes, pending t..."
4,4,0.970052,0.028049,0.0019,From the Corrupt African Congress Party contro...


In [9]:
# Display some of the sentiment analysis results to check the output
final_df[['translated_comments', 'roberta_neg', 'roberta_neu', 'roberta_pos']].head(10)


Unnamed: 0,translated_comments,roberta_neg,roberta_neu,roberta_pos
0,South Africa knows very well that Morocco has ...,0.094935,0.449885,0.45518
1,"A year ago, more than a billion dollars evapor...",0.842448,0.146817,0.010736
2,"Glory be to God, the states of South Africa an...",0.01302,0.171196,0.815784
3,"South Africa plays on the two ropes, pending t...",0.226178,0.748648,0.025175
4,From the Corrupt African Congress Party contro...,0.970052,0.028049,0.0019
5,"In fact, when South Africa was under the rule ...",0.090226,0.586994,0.32278
6,Because these countries have comprehensive sys...,0.575419,0.399011,0.025569
7,"For information, Colombia has no camel and no ...",0.225671,0.745329,0.029
8,"There is no peace, no words with the thugs of ...",0.962135,0.036024,0.001841
9,"Thus, future generations will read it in histo...",0.851143,0.14223,0.006627
