In [None]:
# Testing a sarcasm detector from huggingface
from transformers import pipeline
sarcasm_detector = pipeline("text-classification", model="MohamedGalal/arabert-sarcasm-detector")
text = "طبعا الجو جميل جداً اليوم، ممطر وبارد وأنا أحب أن أتمشى تحت المطر بدون مظلة!"
result = sarcasm_detector(text)
print(result)


[{'label': 'not sarcastic', 'score': 0.9997287392616272}]


In [None]:
# Testing Gemini Flash 2.0 API with LangChain
from langchain_google_genai import ChatGoogleGenerativeAI

def analyze_sarcasm(text):
    """Analyzes sarcasm in an Arabic text using Gemini Flash 2.0 API with LangChain."""
    
    # Initialize the LangChain Gemini model
    model = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        temperature=0,
        api_key="AIzaSyB-C-HkY-PKqlj1zwkWchO3NqAkNy5E9hs",
    )
    
    # Define prompt
    prompt = f"هل النص التالي ساخر؟ '{text}' أجب فقط بـ 'ساخر' أو 'غير ساخر'."
    
    # Get response
    response = model.invoke(prompt)
    
    return response.content  # Extract text response

# Example usage
text = "طبعا الجو جميل جداً اليوم، ممطر وبارد وأنا أحب أن أتمشى تحت المطر بدون مظلة!"
result = analyze_sarcasm(text)
print(f"Sarcasm Detection Result: {result}")


Sarcasm Detection Result: ساخر


It seems like the current sarcasm models in hugging face is not working good so let's use the Gemini API to get sarcasm labels

In [None]:
import time
from tqdm.notebook import tqdm

episode_sarcasm = []

for i, transcript in enumerate(tqdm(df['episode_transcript'], desc="Analyzing sarcasm")):
    # Add a 60-second pause after every 15 requests to respect API rate limits
    if i > 0 and i % 15 == 0:
        print(f"Pausing for 60 seconds after processing {i} episodes...")
        time.sleep(60)    

    sarcasm = analyze_sarcasm(transcript)
    episode_sarcasm.append(sarcasm)

df['sarcasm'] = episode_sarcasm
df.to_csv("sarcasm.csv", index=False)

print(f"Sarcasm analysis completed for {len(episode_sarcasm)} episodes and saved to sarcasm.csv")

Analyzing sarcasm:   0%|          | 0/426 [00:00<?, ?it/s]

Pausing for 60 seconds after processing 15 episodes...
Pausing for 60 seconds after processing 30 episodes...
Pausing for 60 seconds after processing 45 episodes...
Pausing for 60 seconds after processing 60 episodes...
Pausing for 60 seconds after processing 75 episodes...
Pausing for 60 seconds after processing 90 episodes...
Pausing for 60 seconds after processing 105 episodes...
Pausing for 60 seconds after processing 120 episodes...
Pausing for 60 seconds after processing 135 episodes...
Pausing for 60 seconds after processing 150 episodes...
Pausing for 60 seconds after processing 165 episodes...
Pausing for 60 seconds after processing 180 episodes...
Pausing for 60 seconds after processing 195 episodes...
Pausing for 60 seconds after processing 210 episodes...
Pausing for 60 seconds after processing 225 episodes...
Pausing for 60 seconds after processing 240 episodes...
Pausing for 60 seconds after processing 255 episodes...
Pausing for 60 seconds after processing 270 episodes..