In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
from tqdm import tqdm

In [None]:
# Carica il modello
model_name = "unitary/toxic-bert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [None]:
# Carica i messaggi
df = pd.read_csv("../../data/processed/cleaned_twitch_messages.csv")

In [None]:
# Funzione per stimare la tossicità
def get_toxicity_score(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    # Usa il punteggio massimo tra le 6 classi
    score = torch.sigmoid(outputs.logits)[0].max().item()
    return score

In [None]:
# Applica ai messaggi
tqdm.pandas()
df["toxicity_score"] = df["message"].progress_apply(get_toxicity_score)

In [None]:
# Etichette: 1 = tossico, 0 = non tossico, -1 = ambiguo
#df["label"] = df["toxicity_score"].apply(lambda x: 1 if x >= 0.7 else (0 if x <= 0.3 else -1))
df["label"] = df["toxicity_score"].apply(lambda x: 1 if x >= 0.9 else 0 )


In [None]:
df = pd.read_csv("messages_with_toxicity_labels.csv")

In [None]:
# Salva
os.makedirs("data", exist_ok=True)
df.to_csv("./data/messages_with_toxicity_labels.csv", index=False)
print("✅ Etichettatura completata.")


In [None]:
# 6️⃣ Visualizzazione del punteggio
sns.set(style="whitegrid")
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x="toxicity_score", bins=30, kde=True, color="skyblue")
plt.title("Distribuzione dei punteggi di tossicità")
plt.xlabel("Toxicity score")
plt.ylabel("Numero di messaggi")
plt.show()

### Prova con CitizenLab distilbert-base-multilingual-cased-toxicity

In [None]:
# Modello multilingua specializzato in tossicità
MODEL_NAME = "citizenlab/distilbert-base-multilingual-cased-toxicity"

In [None]:
toxicity_classifier = pipeline("text-classification", model=MODEL_NAME, tokenizer=MODEL_NAME)

In [None]:
# Funzione per etichettare e ottenere score
def classify_toxicity(text):
    result = toxicity_classifier(text[:512])[0]  # truncation manuale max input
    label = 1 if result["label"] == "toxic" else 0
    score = result["score"]
    return pd.Series([label, score])

In [None]:
# Applica con progress bar
tqdm.pandas()
df[["label", "toxicity_score"]] = df["message"].progress_apply(classify_toxicity)

In [None]:
# Etichette: soglie personalizzabili
df["label"] = df["toxicity_score"].apply(lambda x: 1 if x >= 0.6 else 0)

In [None]:
df.to_csv("messages_labeled_citizenlab.csv", index=False)
print("✅ Etichettatura completata con il modello CitizenLab.")

In [None]:
df = pd.read_csv("./messages_labeled_citizenlab.csv")

In [None]:
sns.histplot(df["toxicity_score"], bins=70, kde=True, color="salmon")
plt.title("Distribuzione dei punteggi di tossicità (CitizenLab)")
plt.xlabel("Toxicity score")
plt.ylabel("Numero di messaggi")
plt.show()

In [None]:
df.query("toxicity_score > 0.99")