In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np
import logging
import transformers
import pandas as pd

transformers.logging.set_verbosity_error()

def analyze_sentiment(text):
    """Analyzes sentiment in an Arabic text using the pretrained model."""
    model_name = "Walid-Ahmed/arabic-sentiment-model"
    
    # Load model and tokenizer
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt")

    # Chunk tokens
    max_length = 512
    sentiments = []
    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]
    token_type_ids = inputs.get('token_type_ids', None)
    
    if len(input_ids) > max_length:
        for i in range(0, len(input_ids), max_length):
            input_ids_chunk = input_ids[i:i+max_length]
            attention_mask_chunk = attention_mask[i:i+max_length]
            inputs_chunk = {
                'input_ids': input_ids_chunk.unsqueeze(0),
                'attention_mask': attention_mask_chunk.unsqueeze(0)
            }
            if token_type_ids is not None:
                token_type_ids_chunk = token_type_ids[0][i:i+max_length]
                inputs_chunk['token_type_ids'] = token_type_ids_chunk.unsqueeze(0)

            with torch.no_grad():
                outputs = model(**inputs_chunk)
                
            probabilities = torch.softmax(outputs.logits, dim=1)
            positive_probability = probabilities[0][1].item()
            sentiments.append(positive_probability)
        return np.mean(sentiments)

    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=1)
    positive_probability = probabilities[0][1].item()
    return positive_probability

# Example usage with sentiment analysis
text = "هذا نص جيد"
result = analyze_sentiment(text)
print(f"Sentiment Analysis Result: {result}")

Sentiment Analysis Result: 0.9936437606811523


In [None]:
# Apply sentiment analysis to the episode transcripts
sentiment_scores = []
for transcript in tqdm(df['episode_transcript'], desc="Analyzing sentiment"):
    sentiment_score = analyze_sentiment(transcript)
    sentiment_scores.append(sentiment_score)

df['sentiment_score'] = sentiment_scores
df.sample(10)

Analyzing sentiment:   0%|          | 0/426 [00:00<?, ?it/s]

Unnamed: 0,episode_transcript,creator,length,tags,sentiment_score
394,يا نجف بنّور، صديقي الإنسان. صديقي الإنسان!\nا...,Fi_Al_Hadaraa,00:17:23,,0.176732
209,في نص شهر مارس اللي فات\nسيدة إندونيسية اسمها ...,Al_Mokhbir_Al_Eqtisadi,00:13:40,,0.296271
21,اللي بيملك البطاريات هو اللي بيملك\nالمستقبل ا...,Al_Mokhbir_Al_Eqtisadi,00:17:23,,0.747123
90,المظاهرات اللي بيعملها الشباب الامريكي\nحاليا ...,Al_Mokhbir_Al_Eqtisadi,00:18:23,,0.238708
217,اكبر شبكه لبنوك الطعام في بريطانيا هي\nمؤسسه ا...,Al_Mokhbir_Al_Eqtisadi,00:26:09,,0.171293
172,يوم 8 سبتمبر 2022\nتم إعلان وفاة ملكة بريطانيا...,Al_Mokhbir_Al_Eqtisadi,00:14:54,,0.279408
201,كل شيء ممكن يجي في خيالك واسوا منه الجيش\nالاس...,Al_Mokhbir_Al_Eqtisadi,00:18:17,,0.144687
280,3.906: حسبي الله ونعم الوكيل!\n5.64: أنا مش عا...,Da7ee7,,,0.076963
100,في فبراير 2023\nبنك الاستثمار الصيني الشهير\nC...,Al_Mokhbir_Al_Eqtisadi,00:13:58,,0.171248
423,ها؟ جاب صورة؟\nولا صوت!\nبأقولّك إيه، أنا مش م...,Fi_Al_Hadaraa,00:21:08,,0.616445


In [None]:
# Save the cleaned data to a CSV file
df.to_csv("walid_sentiment_cleaned_data.csv", index=False)