In [None]:
# Cell 1 — install (run once in the notebook)
!pip install -q transformers accelerate datasets sentencepiece scikit-learn tqdm
# (No API keys, no paid services required)


In [None]:
# Cell 2 — imports + model selection
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from datasets import Dataset
import random, json, copy, math, time
from sklearn.metrics import f1_score, accuracy_score
from tqdm.auto import tqdm

# Choose free HF models:
TASK_LM_NAME = "google/flan-t5-small"    # small model for *task* (classification generation)
REFLECT_LM_NAME = "google/flan-t5-small" # reflection model (same here; you may use base if you have GPU)

# If you have GPU and want more power, use "google/flan-t5-base" instead.




In [None]:
# Cell 3 — load tokenizers & models (this downloads free models from HF)
print("Loading models (this can take a minute)...")
tokenizer_task = AutoTokenizer.from_pretrained(TASK_LM_NAME)
model_task = AutoModelForSeq2SeqLM.from_pretrained(TASK_LM_NAME)
task_pipe = pipeline("text2text-generation", model=model_task, tokenizer=tokenizer_task, device_map="auto" if hasattr(model_task, "is_loaded") else None)

tokenizer_ref = AutoTokenizer.from_pretrained(REFLECT_LM_NAME)
model_ref = AutoModelForSeq2SeqLM.from_pretrained(REFLECT_LM_NAME)
reflect_pipe = pipeline("text2text-generation", model=model_ref, tokenizer=tokenizer_ref, device_map="auto" if hasattr(model_ref, "is_loaded") else None)

# reduce generation noise:
GEN_KW = {"max_length": 256, "do_sample": False, "num_return_sequences": 1}
print("Models ready.")


In [None]:
import os

os.environ["OPENROUTER_API_KEY"] = "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"
os.environ["OPENAI_API_KEY"] = ""


In [None]:
import litellm
import os

litellm.api_key = os.getenv("OPENROUTER_API_KEY")
litellm.api_base = "https://openrouter.ai/api/v1"

print("LiteLLM + OpenRouter configured.")


LiteLLM + OpenRouter configured.


In [None]:
import pandas as pd

df = pd.read_csv("/content/final_telugu_multilabel.csv")

def row_to_answer(row):
    labels = [
        f"stereotype={row['stereotype']}",
        f"vilification={row['vilification']}",
        f"dehumanization={row['dehumanization']}",
        f"extreme_language={row['extreme_language']}",
        f"lack_of_empathy={row['lack_of_empathy']}",
        f"invalidation={row['invalidation']}",
    ]
    return ", ".join(labels)

dataset = []

for _, row in df.iterrows():
    dataset.append({
        "input": row["text"],
        "answer": row_to_answer(row),
        "additional_context": {}    # <-- REQUIRED FIX
    })

trainset = dataset[:50]
valset   = dataset[50:60]

len(trainset), len(valset)


(50, 10)

In [None]:
from google.colab import files
uploaded = files.upload()


Saving final_telugu_multilabel.csv to final_telugu_multilabel (1).csv


In [None]:
import pandas as pd

df = pd.read_csv("final_telugu_multilabel.csv")
df.head()


Unnamed: 0,text,translated_te,lang,id,polarization,political,racial/ethnic,religious,gender/sexual,other,stereotype,vilification,dehumanization,extreme_language,lack_of_empathy,invalidation
0,عاجل | حسب صحيفة ليكيب الفرنسية: تم تأجيل كأس ...,"ముఖ్యమైన | ఫ్రెంచ్ వార్తాపత్రిక ""లీకిప్"" ప్రకా...",arb,arb_c014938e00ef553fb45a831b480c9d6d,0,0,0,0,0,0,0,0,0,0,0,0
1,أسعار الفواتير ال بعد الزيادة الجديدة ايه يال...,"కొత్త ధరల ప్రకారం బిల్లుల ధరలు ఎలా ఉన్నాయి, ప్...",arb,arb_f425bc7f89e76e3953a112f5df89c5d5,0,0,0,0,0,0,0,0,0,0,0,0
2,شجار بين لاعبي الشباب السعودي والزمالك المصري ...,సౌదీ యువ ఆటగాళ్లు మరియు ఈజిప్టు జెమాల్క్ మధ్య ...,arb,arb_ea240083705403ad1d839f4c337894f0,0,0,0,0,0,0,0,0,0,0,0,0
3,أرقام نيمار مع سانتوس خلال 3 أسابيع أفضل من 17...,"నైమార, శాంటోస్‌తో ఆ 3 వారాల్లో సాధించిన విజయాల...",arb,arb_d380557a1e7c162fbf0bf8bb8cb7c374,0,0,0,0,0,0,0,0,0,0,0,0
4,"""ديبسيك"".. زلزال عرضي أم تحول استراتيجي في أسه...","""డైబైస్"" - కృత్రిమ మేధస్సు (AI) స్టాక్‌లలో ఒక ...",arb,arb_8421931dbf5d17ebca7533e75a459292,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
import pandas as pd

df = pd.read_csv("/content/final_telugu_multilabel.csv")

def row_to_answer(row):
    labels = [
        f"stereotype={row['stereotype']}",
        f"vilification={row['vilification']}",
        f"dehumanization={row['dehumanization']}",
        f"extreme_language={row['extreme_language']}",
        f"lack_of_empathy={row['lack_of_empathy']}",
        f"invalidation={row['invalidation']}",
    ]
    return ", ".join(labels)

dataset = []

for _, row in df.iterrows():
    dataset.append({
        "input": row["text"],
        "answer": row_to_answer(row)
    })

# small train/dev split
trainset = dataset[:80]
valset = dataset[80:100]

len(trainset), len(valset)


(80, 20)

In [None]:
seed_prompt = """
You are a polarization classifier.

Given a text, output:
stereotype=0/1, vilification=0/1, dehumanization=0/1,
extreme_language=0/1, lack_of_empathy=0/1, invalidation=0/1
"""
# seed_prompt = """
# You are a multilingual social media moderation model for detecting manifestations of polarization.

# TASK:
# Given a text snippet, classify whether it contains the following manifestations:
# 1. stereotype (0 or 1)
# 2. vilification (0 or 1)
# 3. dehumanization (0 or 1)
# 4. extreme_language (0 or 1)
# 5. lack_of_empathy (0 or 1)
# 6. invalidation (0 or 1)

# OUTPUT FORMAT:
# Return labels as: 0/1,0/1,0/1,0/1,0/1,0/1
# Example:
# Input: "They are all the same, useless people"
# Output: 1,1,0,1,0,0

# Guidelines:
# - Stereotype: A generalized belief that attributes specific characteristics to all members of a group, often neglecting individual differences, thereby reducing complex personalities to simplistic and uniform representations.
# - Vilification: The act of defaming or demonizing a particular group, person, or entity by inciting fear, often through exaggeration, misrepresentation, or biased framing that portrays the subject negatively and harmfully.
# - Dehumanization: The process of depriving a group or individual of their human qualities or personality by comparing them to animals, machines, or objects, or otherwise denying their humanity, dignity, or individuality.
# - Extreme language: The use of language that is extreme or makes definitive, all-encompassing statements, often involving words like "always," "never," "worst," or "best," and presenting issues in a dichotomous manner such as "us versus them" or "right versus wrong.
# - Lack of empathy: The absence of compassion or recognition for other viewpoints or experiences in the text.
# - Invalidation: The act of denying or dismissing the identity and existence of individuals or groups, thereby rejecting their sense of self and their presence. See Appendix A for further details and examples for each manifestation of polarization.

# Be consistent, follow instructions strictly, and do NOT provide explanations.
# """
# print(seed_prompt)


In [None]:
def my_evaluator(data, response):
    gold = data["answer"]
    pred = response

    is_correct = gold in pred
    score = 1.0 if is_correct else 0.0

    feedback = f"Expected: {gold}\nGot: {pred}"

    return score, feedback


In [None]:
import os
os.environ["OPENROUTER_API_KEY"] = "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"
os.environ["OPENAI_API_KEY"] = ""   # << VERY IMPORTANT TO EMPTY THIS


In [None]:
import os
os.environ["LITELLM_RATELIMIT_BACKOFF"] = "3"
os.environ["LITELLM_MAX_RETRIES"] = "10"
os.environ["LITELLM_RETRY_BACKOFF_FACTOR"] = "2"


In [None]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OPENAI API key: ")


Enter your OPENAI API key: ··········


In [None]:
seed_prompt = {
    "system_prompt": """
You are a multilingual classifier for polarization detection (SemEval 2026 Task 9 Subtask 3).

Given a text, output whether each manifestation is present (1) or absent (0):

stereotype
vilification
dehumanization
extreme_language
lack_of_empathy
invalidation

Output format:
stereotype: 0/1
vilification: 0/1
dehumanization: 0/1
extreme_language: 0/1
lack_of_empathy: 0/1
invalidation: 0/1

Only output 0s and 1s. Do not explain or add extra text.
"""
}


In [None]:
!pip install litellm


Collecting litellm
  Downloading litellm-1.81.3-py3-none-any.whl.metadata (30 kB)
Collecting fastuuid>=0.13.0 (from litellm)
  Downloading fastuuid-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Downloading litellm-1.81.3-py3-none-any.whl (11.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.9/11.9 MB[0m [31m77.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fastuuid-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (278 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.1/278.1 kB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fastuuid, litellm
Successfully installed fastuuid-0.14.0 litellm-1.81.3
