In [42]:
import json
import io
import langid
from flair.models import TextClassifier
from flair.data import Sentence
from transformers import pipeline

# Load English sentiment classifier
eclassifier = TextClassifier.load('en-sentiment')

# Load Arabic sentiment classifier
sa = pipeline('text-classification', model='CAMeL-Lab/bert-base-arabic-camelbert-ca-sentiment')

# Initialize a dictionary to store results
results = {"tweets": []}

with io.open('twitter_replies(597).json', 'r', encoding='utf-8') as json_file:
    tweets = json.load(json_file)
    reply_texts = [tweet['reply text'] for tweet in tweets]

    for i, reply in enumerate(reply_texts):
        try:
            # Skip processing for empty strings
            if not reply.strip():
                continue

            language, confidence = langid.classify(reply)

            result_entry = {
                "ID": i + 1,
                "Text": reply,
                "Language": language
            }

            if language == 'en':
                sentence = Sentence(reply)
                eclassifier.predict(sentence)
                result_entry["sentiment"] = {
                    "Label": sentence.labels[0].value,
                    "Score": f"{round(sentence.labels[0].score * 100, 2)} %"
                }
            elif language == 'ar':
                result = sa(reply)[0]  # Results are returned as a list, take the first element
                result_entry["sentiment"] = {
                    "Label": result['label'],
                    "Score": f"{round(result['score'] * 100, 2)} %"
                }
            else:
                result_entry["sentiment"] = {
                    "Label": "Unknown",
                    "Score": None
                }

            results["tweets"].append(result_entry)

            # Print progress
            print(f"Processed {i + 1} of {len(reply_texts)} replies.")

        except Exception as e:
            print(f"Error processing sentiment for: {reply}. Error: {e}")

# Save the results to a JSON file
with open('sentiment_results.json', 'w', encoding='utf-8') as json_output:
    json.dump(results, json_output, ensure_ascii=False, indent=2)

print("Processing completed.")

Processed 1 of 597 replies.
Processed 2 of 597 replies.
Processed 3 of 597 replies.
Processed 4 of 597 replies.
Processed 5 of 597 replies.
Processed 6 of 597 replies.
Processed 7 of 597 replies.
Processed 8 of 597 replies.
Processed 9 of 597 replies.
Processed 10 of 597 replies.
Processed 11 of 597 replies.
Processed 12 of 597 replies.
Processed 13 of 597 replies.
Processed 14 of 597 replies.
Processed 15 of 597 replies.
Processed 16 of 597 replies.
Processed 17 of 597 replies.
Processed 18 of 597 replies.
Processed 19 of 597 replies.
Processed 20 of 597 replies.
Processed 21 of 597 replies.
Processed 22 of 597 replies.
Processed 23 of 597 replies.
Processed 24 of 597 replies.
Processed 25 of 597 replies.
Processed 26 of 597 replies.
Processed 27 of 597 replies.
Processed 28 of 597 replies.
Processed 29 of 597 replies.
Processed 30 of 597 replies.
Processed 31 of 597 replies.
Processed 32 of 597 replies.
Processed 33 of 597 replies.
Processed 34 of 597 replies.
Processed 35 of 597 rep