In [58]:
#!pip install pandas numpy emoji langdetect transformers torch



In [2]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
import torch
import numpy as np
from scipy.special import softmax
import urllib.request, csv

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
df = pd.read_csv('classified samples/2020_Classified_Balanced.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191798 entries, 0 to 191797
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   label   191798 non-null  object
 1   tweet   191798 non-null  object
dtypes: object(2)
memory usage: 2.9+ MB


In [None]:
# preprocess text 
# to follow consistency with the model's training data
def preprocess(text): 
    new_text = []
    for t in str(text).lower().split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

In [None]:
# Load model and tokenizer
MODEL = 'textdetox/twitter-xlmr-toxicity-classifier' 
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL, use_safetensors=True).to('cuda')
labels = ["non-toxic", "toxic"] #(Hanu, 2020; Lai-Lopez et al., 2025)

In [None]:
# function to get hate speech label and probabilities
def get_toxicity_label(text):
    text = preprocess(text)
    encoded_input = tokenizer(
          text, 
          return_tensors="pt", 
          truncation=True, 
          padding=True, 
          max_length=128
         ).to('cuda')
    with torch.no_grad():
     output = model(**encoded_input)
     scores = output.logits[0].detach().cpu().numpy()
     probs = softmax(scores)
    
    pred_id = np.argmax(probs)
    return labels[pred_id], probs

In [None]:
# Apply the function to the DataFrame
results = df["tweet"].apply(get_toxicity_label)
# split tuple results into separate columns
# Split results
df["toxicity_label"] = results.apply(lambda x: x[0])
df["toxicity_probs"] = results.apply(lambda x: x[1])
df.head()

Unnamed: 0,label,tweet,toxicity_label,toxicity_probs
0,Left,@ USER have the big rating in the US @ USER be...,non-toxic,"[0.9993185, 0.00068155455]"
1,Right,trump even try to taint Christmas with his cor...,toxic,"[0.008295586, 0.99170434]"
2,Right,@ USER @ USER Republicans have to admit @ USER...,toxic,"[0.0010695378, 0.9989304]"
3,Right,We believe in a government with limited power ...,non-toxic,"[0.9971385, 0.0028614667]"
4,Right,Presidential Debate Round 2 Clear debate winne...,non-toxic,"[0.97863686, 0.021363156]"


In [None]:
df_probs = pd.DataFrame(df["toxicity_probs"].tolist(), columns=labels)
df = pd.concat([df, df_probs], axis=1)
#(Hanu, 2020; Lai-Lopez et al., 2025)

**Results**

In [None]:
if "label" in df.columns:
    avg_tox = df.groupby("label")[labels].mean().reset_index()
   # Save output if needed
df.to_csv("analysis/2020_toxicity_results.csv", index=False)
#(Hanu, 2020; Lai-Lopez et al., 2025)

In [15]:
print("\n Average Toxicity Probabilities by Political Leaning:")
print(avg_tox)


 Average Toxicity Probabilities by Political Leaning:
   label  non-toxic     toxic
0   Left   0.617770  0.382230
1  Right   0.452292  0.547708
