In [1]:
import pandas as pd
import numpy as np
from transformers import pipeline
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def sentiment_analysis(df: pd.DataFrame) -> pd.DataFrame:
    sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

    def analyze_sentiment(text):
        try:
            result = sentiment_analyzer(text)  # Limit text length for performance
            return result[0]['score']
        except Exception as e:
            #print(f"Error analyzing sentiment for text: {text}\n{e}")
            return 0

    sentiments = [(analyze_sentiment(text) >= 0.4) * 1  for text in df['text']]
    df['sentiment'] = sentiments

    return df

In [3]:
df = pd.read_csv('/home/eder/projects/big-data-management/Hackathon_GESTDB/datos/datos_raw_jsons/comments_data_final.csv')

In [4]:
df.tail()

Unnamed: 0,id,id_video,text,published_at,like_count
29099,Ugws8zwuN7wYJTv6dpR4AaABAg,cU2dZz18P0c,To think these people reproduce!,2025-10-02T03:41:36Z,0.0
29100,UgzW-5dgfjsjNhSCu_p4AaABAg,cU2dZz18P0c,Examples of why people should have to pass a t...,2025-10-02T01:07:59Z,0.0
29101,UgwKfm6NsRRPDhhuhaR4AaABAg,cU2dZz18P0c,"The sun is moving, not the earth.",2025-10-01T12:40:57Z,0.0
29102,Ugwp8OeNrHcxyLn3Dth4AaABAg,cU2dZz18P0c,364 and one quarter.,2025-10-01T12:38:39Z,0.0
29103,UgyIGNBx2DkIitc7Bd54AaABAg,cU2dZz18P0c,365 and 1/4 days,2025-10-01T05:06:35Z,0.0


In [5]:
sent = sentiment_analysis(df)

Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Token indices sequence length is longer than the specified maximum sequence length for this model (774 > 512). Running this sequence through the model will result in indexing errors


In [6]:
sent.tail()

Unnamed: 0,id,id_video,text,published_at,like_count,sentiment
29099,Ugws8zwuN7wYJTv6dpR4AaABAg,cU2dZz18P0c,To think these people reproduce!,2025-10-02T03:41:36Z,0.0,1
29100,UgzW-5dgfjsjNhSCu_p4AaABAg,cU2dZz18P0c,Examples of why people should have to pass a t...,2025-10-02T01:07:59Z,0.0,0
29101,UgwKfm6NsRRPDhhuhaR4AaABAg,cU2dZz18P0c,"The sun is moving, not the earth.",2025-10-01T12:40:57Z,0.0,0
29102,Ugwp8OeNrHcxyLn3Dth4AaABAg,cU2dZz18P0c,364 and one quarter.,2025-10-01T12:38:39Z,0.0,1
29103,UgyIGNBx2DkIitc7Bd54AaABAg,cU2dZz18P0c,365 and 1/4 days,2025-10-01T05:06:35Z,0.0,0


In [7]:
sent.to_csv('/home/eder/projects/big-data-management/Hackathon_GESTDB/datos/datos_raw_jsons/comments_data_sentiment.csv', index=False)