# Toxicity analysis of Reddit AITA dataset for each AITA class
- Uses Toxigen RoBERTa model (https://huggingface.co/tomh/toxigen_roberta) as a toxicity classifier where LABEL_0 = "BENIGN" and LABEL_1 = "TOXIC"

In [None]:
%pip install transformers datasets huggingface_hub tqdm matplotlib accelerate

In [None]:
# mount drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/CPSS_24_Paper/results

import os
os.makedirs('toxicity_analysis', exist_ok=True)

%cd /content/drive/MyDrive/CPSS_24_Paper/results/toxicity_analysis

## Prepare Dataset

In [None]:
from datasets import load_dataset, concatenate_datasets

dataset = load_dataset("MattBoraske/Reddit-AITA-2018-to-2022-with-consensus")

## Classify Samples of Dataset as Toxic or Benign

In [None]:
from transformers import pipeline

# load toxigen roberta model
toxigen_roberta = pipeline("text-classification", model="tomh/toxigen_roberta", truncation=True, device_map='cuda')

# LABEL 0 = NON-TOXIC
# LABEL 1 = TOXIC

In [None]:
import pandas as pd

counter = 0

def calculate_toxicity(row):
    # classify top comment 1 as toxic or benign using toxigen roberta model and also get confidence score
    toxicity_score = toxigen_roberta(row['top_comment_1'])
    toxicity_label = toxicity_score[0]['label']
    if toxicity_label == 'LABEL_0': # LABEL 0 = BENIGN
      toxicity_label = 'BENIGN'
    else:
      toxicity_label = 'TOXIC' # LABEL 1 = TOXIC
    toxicity_score = toxicity_score[0]['score']

    global counter
    counter += 1
    if counter % 250 == 0:
      print(f'Processed {counter} samples')

    return pd.Series({
        'toxicity_label': toxicity_label,
        'toxicity_confidence_score': toxicity_score
    })

# calculate toxicity for each sample in dataset
df = dataset['train'].to_pandas()
df[['toxicity_label', 'toxicity_confidence_score']] = df.apply(calculate_toxicity, axis=1, result_type='expand')

In [None]:
# add toxicity label and confidence score columns to the dataset

dataset['train'] = dataset['train'].add_column('toxicity_label', df['toxicity_label'])
dataset['train'] = dataset['train'].add_column('toxicity_confidence_score', df['toxicity_confidence_score'])

In [None]:
# save dataset to huggingface

dataset.push_to_hub("MattBoraske/Reddit-AITA-2018-to-2022-with-consensus-and-toxicity-analysis")

## Analyze Results

In [None]:
df = dataset['train'].to_pandas()

In [None]:
# filter df to only contain samples where top_comment_1_AITA_class_by_keyword is NTA

df_NAH = df[df['top_comment_1_AITA_class_by_keyword'] == 1]
df_NTA = df[df['top_comment_1_AITA_class_by_keyword'] == 2]
df_YTA = df[df['top_comment_1_AITA_class_by_keyword'] == 3]
df_ESH = df[df['top_comment_1_AITA_class_by_keyword'] == 4]

In [None]:
# save consensus stats to output JSON

import json

toxicity_stats = {
    "All Classes": {
        'counts': df['toxicity_label'].value_counts().to_dict(),
        'percentiles': ((df['toxicity_label'].value_counts() / df['toxicity_label'].value_counts().sum()) * 100).to_dict()
    },
    "NTA": {
        'counts': df_NTA['toxicity_label'].value_counts().to_dict(),
        'percentiles': ((df_NTA['toxicity_label'].value_counts() / df_NTA['toxicity_label'].value_counts().sum()) * 100).to_dict()
    },
    "YTA": {
        'counts': df_YTA['toxicity_label'].value_counts().to_dict(),
        'percentiles': ((df_YTA['toxicity_label'].value_counts() / df_YTA['toxicity_label'].value_counts().sum()) * 100).to_dict()
    },
    "NAH": {
        'counts': df_NAH['toxicity_label'].value_counts().to_dict(),
        'percentiles': ((df_NAH['toxicity_label'].value_counts() / df_NAH['toxicity_label'].value_counts().sum()) * 100).to_dict()
    },
    "ESH": {
        'counts': df_ESH['toxicity_label'].value_counts().to_dict(),
        'percentiles': ((df_ESH['toxicity_label'].value_counts() / df_ESH['toxicity_label'].value_counts().sum()) * 100).to_dict()
    }
}

with open('toxicity_analysis/toxicity_stats_by_AITA_class.json', 'w') as f:
    json.dump(toxicity_stats, f)

In [None]:
import matplotlib.pyplot as plt

# Histograms of the toxic and benign samples for each AITA decision class

# Extract confidence scores of toxic samples for the four AITA decision classes
toxic_samples_NTA = df_NTA[df_NTA['toxicity_label'] == 'TOXIC']['toxicity_confidence_score'].to_list()
toxic_samples_YTA = df_YTA[df_YTA['toxicity_label'] == 'TOXIC']['toxicity_confidence_score'].to_list()
toxic_samples_ESH = df_ESH[df_ESH['toxicity_label'] == 'TOXIC']['toxicity_confidence_score'].to_list()
toxic_samples_NAH = df_NAH[df_NAH['toxicity_label'] == 'TOXIC']['toxicity_confidence_score'].to_list()

# Extract confidence scores of benign samples for the four AITA decision classes
benign_samples_NTA = df_NTA[df_NTA['toxicity_label'] == 'BENIGN']['toxicity_confidence_score'].to_list()
benign_samples_YTA = df_YTA[df_YTA['toxicity_label'] == 'BENIGN']['toxicity_confidence_score'].to_list()
benign_samples_ESH = df_ESH[df_ESH['toxicity_label'] == 'BENIGN']['toxicity_confidence_score'].to_list()
benign_samples_NAH = df_NAH[df_NAH['toxicity_label'] == 'BENIGN']['toxicity_confidence_score'].to_list()

# Calculate the total number of samples for each class
total_samples_NTA = len(toxic_samples_NTA) + len(benign_samples_NTA)
total_samples_YTA = len(toxic_samples_YTA) + len(benign_samples_YTA)
total_samples_ESH = len(toxic_samples_ESH) + len(benign_samples_ESH)
total_samples_NAH = len(toxic_samples_NAH) + len(benign_samples_NAH)

# Plot 4 histograms as a 2x2 subplot
plt.figure(figsize=(12, 12))
plt.suptitle('Reddit AITA Toxic and Benign Samples', fontsize=16)

# NTA
plt.subplot(2, 2, 1)
plt.hist(toxic_samples_NTA, bins=20, alpha=0.5, label='Toxic', color='red')
plt.hist(benign_samples_NTA, bins=20, alpha=0.5, label='Benign', color='blue')
plt.title('NTA Samples', fontsize=28)
plt.xlabel('Confidence Score', fontsize=22)
plt.ylabel('Number of Samples', fontsize=22)
plt.tick_params(axis='both', which='major', labelsize=18)
plt.legend()
plt.annotate(f'Toxic: {len(toxic_samples_NTA)} ({len(toxic_samples_NTA)/total_samples_NTA:.2%})\nBenign: {len(benign_samples_NTA)} ({len(benign_samples_NTA)/total_samples_NTA:.2%})',
             xy=(0.04, 0.80), xycoords='axes fraction', fontsize=16,
             horizontalalignment='left', verticalalignment='top',
             bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5'))

# YTA
plt.subplot(2, 2, 2)
plt.hist(toxic_samples_YTA, bins=20, alpha=0.5, label='Toxic', color='red')
plt.hist(benign_samples_YTA, bins=20, alpha=0.5, label='Benign', color='blue')
plt.title('YTA Samples', fontsize=28)
plt.xlabel('Confidence Score', fontsize=22)
plt.ylabel('Number of Samples', fontsize=22)
plt.tick_params(axis='both', which='major', labelsize=18)
plt.legend()
plt.annotate(f'Toxic: {len(toxic_samples_YTA)} ({len(toxic_samples_YTA)/total_samples_YTA:.2%})\nBenign: {len(benign_samples_YTA)} ({len(benign_samples_YTA)/total_samples_YTA:.2%})',
             xy=(0.04, 0.80), xycoords='axes fraction', fontsize=16,
             horizontalalignment='left', verticalalignment='top',
             bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5'))

# ESH
plt.subplot(2, 2, 3)
plt.hist(toxic_samples_ESH, bins=20, alpha=0.5, label='Toxic', color='red')
plt.hist(benign_samples_ESH, bins=20, alpha=0.5, label='Benign', color='blue')
plt.title('ESH Samples', fontsize=28)
plt.xlabel('Confidence Score', fontsize=22)
plt.ylabel('Number of Samples', fontsize=22)
plt.tick_params(axis='both', which='major', labelsize=18)
plt.legend(fontsize=18)
plt.annotate(f'Toxic: {len(toxic_samples_ESH)} ({len(toxic_samples_ESH)/total_samples_ESH:.2%})\nBenign: {len(benign_samples_ESH)} ({len(benign_samples_ESH)/total_samples_ESH:.2%})',
             xy=(0.04, 0.80), xycoords='axes fraction', fontsize=16,
             horizontalalignment='left', verticalalignment='top',
             bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5'))

# NAH
plt.subplot(2, 2, 4)
plt.hist(toxic_samples_NAH, bins=20, alpha=0.5, label='Toxic', color='red')
plt.hist(benign_samples_NAH, bins=20, alpha=0.5, label='Benign', color='blue')
plt.title('NAH Samples', fontsize=28)
plt.xlabel('Confidence Score', fontsize=22)
plt.ylabel('Number of Samples', fontsize=22)
plt.tick_params(axis='both', which='major', labelsize=18)
plt.legend()
plt.annotate(f'Toxic: {len(toxic_samples_NAH)} ({len(toxic_samples_NAH)/total_samples_NAH:.2%})\nBenign: {len(benign_samples_NAH)} ({len(benign_samples_NAH)/total_samples_NAH:.2%})',
             xy=(0.04, 0.80), xycoords='axes fraction', fontsize=16,
             horizontalalignment='left', verticalalignment='top',
             bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.5'))

# Plot details
plt.tight_layout(rect=[0, 0.03, 1, 0.95], h_pad=3.0)
plt.savefig('toxicity_analysis/AITA_toxicity_confidence_scores.png')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# create 2-bar chart of toxic and benign proportions for each AITA class

categories = ['NTA', 'YTA', 'ESH', 'NAH']

benign_counts = {
    'NTA': df_NTA['toxicity_label'].value_counts()['BENIGN'],
    'YTA': df_YTA['toxicity_label'].value_counts()['BENIGN'],
    'ESH': df_ESH['toxicity_label'].value_counts()['BENIGN'],
    'NAH': df_NAH['toxicity_label'].value_counts()['BENIGN'],
}

toxic_counts = {
    'NTA': df_NTA['toxicity_label'].value_counts()['TOXIC'],
    'YTA': df_YTA['toxicity_label'].value_counts()['TOXIC'],
    'ESH': df_ESH['toxicity_label'].value_counts()['TOXIC'],
    'NAH': df_NAH['toxicity_label'].value_counts()['TOXIC'],
}

total_counts = {
    'NTA': df_NTA['toxicity_label'].value_counts().sum(),
    'YTA': df_YTA['toxicity_label'].value_counts().sum(),
    'ESH': df_ESH['toxicity_label'].value_counts().sum(),
    'NAH': df_NAH['toxicity_label'].value_counts().sum(),
}

benign_percentages = [(benign_counts[category] / total_counts[category]) * 100 for category in categories]
toxic_percentages = [(toxic_counts[category] / total_counts[category]) * 100 for category in categories]

x = np.arange(len(categories))  # Label locations
width = 0.35  # Width of the bars
fig, ax = plt.subplots(figsize=(10, 12))  # Adjust the figure size (width, height)

# Adjust the space at the top of the plot to create room for the title and legend
plt.subplots_adjust(top=0.8)

rects1 = ax.bar(x - width/2, benign_percentages, width, label='Benign')
rects2 = ax.bar(x + width/2, toxic_percentages, width, label='Toxic')

# plot details
ax.set_ylabel('Percentages', fontsize=24)  # Axis label font size
ax.set_xticks(x)
ax.set_xticklabels(categories, fontsize=20)  # Tick label font size
ax.legend(fontsize=24, loc='upper left', ncol=2)  # Legend font size and placement

def add_bar_labels(rects, ax, position_factor=0.99):
    for rect in rects:
        height = rect.get_height()
        label_position = height * position_factor  # Adjust the position to 75% of the bar height
        ax.annotate(f'{height:.1f}%',
                    xy=(rect.get_x() + rect.get_width() / 2, label_position),
                    xytext=(0, 3),
                    textcoords="offset points",
                    ha='center', va='bottom',
                    fontsize=20,  # Bar label font size
                    bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.3'))  # Box properties

# Add labels to bars
add_bar_labels(rects1, ax)
add_bar_labels(rects2, ax)

plt.xticks(fontsize=20)  # X-axis tick font size
plt.yticks(fontsize=20)  # Y-axis tick font size

plt.savefig('toxicity_analysis/AITA_toxicity_proportions.png')
plt.show()