In [40]:
import pandas as pd
import numpy as np
from transformers import pipeline
import torch

In [41]:
def sentiment_analysis(df: pd.DataFrame) -> pd.DataFrame:
    sentiment_analyzer = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")


    def analyze_sentiment(text):
        try:
            result = sentiment_analyzer(text)  # Limit text length for performance
            return result[0]['label']
        except Exception as e:
            #print(f"Error analyzing sentiment for text: {text}\n{e}")
            return "Neutral"

    sentiments = [analyze_sentiment(text) for text in df['text']]
    df['sentiment_score'] = sentiments

    return df

In [42]:
df = pd.read_csv('/home/eder/projects/big-data-management/Hackathon_GESTDB/data/api_data/comments_data.csv')

In [43]:
df.tail()

Unnamed: 0,id,id_video,text,published_at,like_count,is_possitive
29099,Ugws8zwuN7wYJTv6dpR4AaABAg,cU2dZz18P0c,To think these people reproduce!,2025-10-02T03:41:36Z,0.0,1
29100,UgzW-5dgfjsjNhSCu_p4AaABAg,cU2dZz18P0c,Examples of why people should have to pass a t...,2025-10-02T01:07:59Z,0.0,0
29101,UgwKfm6NsRRPDhhuhaR4AaABAg,cU2dZz18P0c,"The sun is moving, not the earth.",2025-10-01T12:40:57Z,0.0,0
29102,Ugwp8OeNrHcxyLn3Dth4AaABAg,cU2dZz18P0c,364 and one quarter.,2025-10-01T12:38:39Z,0.0,1
29103,UgyIGNBx2DkIitc7Bd54AaABAg,cU2dZz18P0c,365 and 1/4 days,2025-10-01T05:06:35Z,0.0,0


In [44]:
sent = sentiment_analysis(df)

Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (800 > 512). Running this sequence through the model will result in indexing errors


In [45]:
sent.tail()

Unnamed: 0,id,id_video,text,published_at,like_count,is_possitive,sentiment_score
29099,Ugws8zwuN7wYJTv6dpR4AaABAg,cU2dZz18P0c,To think these people reproduce!,2025-10-02T03:41:36Z,0.0,1,Positive
29100,UgzW-5dgfjsjNhSCu_p4AaABAg,cU2dZz18P0c,Examples of why people should have to pass a t...,2025-10-02T01:07:59Z,0.0,0,Neutral
29101,UgwKfm6NsRRPDhhuhaR4AaABAg,cU2dZz18P0c,"The sun is moving, not the earth.",2025-10-01T12:40:57Z,0.0,0,Neutral
29102,Ugwp8OeNrHcxyLn3Dth4AaABAg,cU2dZz18P0c,364 and one quarter.,2025-10-01T12:38:39Z,0.0,1,Neutral
29103,UgyIGNBx2DkIitc7Bd54AaABAg,cU2dZz18P0c,365 and 1/4 days,2025-10-01T05:06:35Z,0.0,0,Neutral


In [46]:
# mapping
mapping = {'Very Negative': 1, 'Negative': 2, 'Neutral': 3, 'Positive': 4, 'Very Positive': 5}

In [47]:
df['sentiment_score'] = df['sentiment_score'].map(mapping)

In [48]:
sent.tail()

Unnamed: 0,id,id_video,text,published_at,like_count,is_possitive,sentiment_score
29099,Ugws8zwuN7wYJTv6dpR4AaABAg,cU2dZz18P0c,To think these people reproduce!,2025-10-02T03:41:36Z,0.0,1,4
29100,UgzW-5dgfjsjNhSCu_p4AaABAg,cU2dZz18P0c,Examples of why people should have to pass a t...,2025-10-02T01:07:59Z,0.0,0,3
29101,UgwKfm6NsRRPDhhuhaR4AaABAg,cU2dZz18P0c,"The sun is moving, not the earth.",2025-10-01T12:40:57Z,0.0,0,3
29102,Ugwp8OeNrHcxyLn3Dth4AaABAg,cU2dZz18P0c,364 and one quarter.,2025-10-01T12:38:39Z,0.0,1,3
29103,UgyIGNBx2DkIitc7Bd54AaABAg,cU2dZz18P0c,365 and 1/4 days,2025-10-01T05:06:35Z,0.0,0,3


In [49]:
sent.to_csv('/home/eder/projects/big-data-management/Hackathon_GESTDB/data/api_data/comments_data.csv', index=False)