In [None]:
import pandas as pd
from sklearn.metrics import classification_report
import numpy as np
from tqdm import tqdm
import torch
from matplotlib import pyplot as plt
import seaborn as sns
import bitsandbytes as bnb

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, AutoModelForCausalLM, GenerationConfig

import openai
openai.api_key = # your OpenAI key here


from googleapiclient import discovery
import json
import time
API_KEY = # your Perspective key here
client = discovery.build(
  "commentanalyzer",
  "v1alpha1",
  developerKey=API_KEY,
  discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
  static_discovery=False,
)

path = # your path here

In [None]:
dataset = # dataset
df = pd.read_csv(path+dataset)

# Importing Seq2Seq models and running the inference

In [None]:
tokenizer = AutoTokenizer.from_pretrained("declare-lab/flan-alpaca-xl")
model = AutoModelForSeq2SeqLM.from_pretrained("declare-lab/flan-alpaca-xl",
                                              torch_dtype="auto",
                                              device_map="auto",
                                              load_in_8bit=True
                                              )

In [None]:
prompt_template = "Classify the input text as 'respectful' or 'toxic'.\nInput text: {text}.\nAnswer: "
label_map = {
        "toxic": 'toxic',
        "'toxic'": 'toxic',
        "Toxic": 'toxic',
        "'Toxic'": 'toxic',
        "respectful": 'respectful',
        "'respectful'": 'respectful',
        "Respectful": 'respectful',
        "'Respectful'": 'respectful'
    }

batch_size = 20
predicted_labels = []

total_rows = len(df)
num_batches = (total_rows + batch_size - 1) // batch_size

for batch_num in tqdm(range(num_batches)):
    start_idx = batch_num * batch_size
    end_idx = min((batch_num + 1) * batch_size, total_rows)

    batch_df = df.iloc[start_idx:end_idx]

    for index, row in batch_df.iterrows():
        text = row['text']
        prompt = prompt_template.format(text=text)

        input_ids = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(input_ids, max_length=20)
        predicted_label = tokenizer.decode(outputs[0], skip_special_tokens=True)

        standardized_label = label_map.get(predicted_label, predicted_label)
        predicted_labels.append(standardized_label)

    torch.cuda.empty_cache()

In [None]:
filename = # predictions/dataset_model.csv
ids = df['id'].tolist()

pd.DataFrame({'id':ids, 'results':predicted_labels}).to_csv(path+filename, index=False)

# Importing SeqClassification models and run inference

In [None]:
tokenizer = AutoTokenizer.from_pretrained("facebook/roberta-hate-speech-dynabench-r2-target")
model = AutoModelForSequenceClassification.from_pretrained("facebook/roberta-hate-speech-dynabench-r2-target",
                                                           torch_dtype="auto",
                                                           device_map="auto").to("cuda")

In [None]:
label_map = {0: "respectful", 1: "toxic"}

batch_size = 100
predicted_labels = []

total_rows = len(df)
num_batches = (total_rows + batch_size - 1) // batch_size

for batch_num in tqdm(range(num_batches)):
    start_idx = batch_num * batch_size
    end_idx = min((batch_num + 1) * batch_size, total_rows)

    batch_df = df.iloc[start_idx:end_idx]

    for index, row in batch_df.iterrows():
        text = row['text']

        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to("cuda")

        with torch.no_grad():
            outputs = model(**inputs)

        predicted_label_idx = torch.argmax(outputs.logits, dim=1).item()
        predicted_label = label_map[predicted_label_idx]

        predicted_labels.append(predicted_label)

    torch.cuda.empty_cache()

100%|██████████| 25/25 [00:33<00:00,  1.33s/it]


In [None]:
filename = # predictions/dataset_model.csv
ids = df['id'].tolist()

pd.DataFrame({'id':ids, 'results':predicted_labels}).to_csv(path+filename, index=False)

# Importing OpenAI models and run inference

In [None]:
def classify_hate_speech(texts, model):

  prompt_template = (
            "Classify the input text as 'respectful' or 'toxic'.\n"
            "Input text: {text}.\n"
            "Answer with only one word, 'respectful' or 'toxic': ")

  responses = []
  for text in tqdm(texts):
    prompt = prompt_template.format(text=text)
    try:
      response = openai.ChatCompletion.create(
                model=model,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant specialized in text classification."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=10)

      label = response['choices'][0]['message']['content'].strip()
      responses.append(label)
    except Exception as e:
      print(f"Error processing text: {text}\n{e}")
      responses.append("Error")

  return responses

In [None]:
texts = df['text'].tolist()

classifications = classify_hate_speech(texts, model="gpt-4o")

In [None]:
label_map = {
        "toxic": 'toxic',
        "'toxic'": 'toxic',
        "Toxic": 'toxic',
        "'Toxic'": 'toxic',
        "respectful": 'respectful',
        "'respectful'": 'respectful',
        "Respectful": 'respectful',
        "'Respectful'": 'respectful'
    }

predicted_labels = []
for classification in classifications:
  standardized_label = label_map.get(classification, classification)
  predicted_labels.append(standardized_label)

In [None]:
filename = # predictions/dataset_model.csv
ids = df['id'].tolist()

pd.DataFrame({'id':ids, 'results':predicted_labels}).to_csv(path+filename, index=False)

# Open AI Moderation API

In [None]:
def classify_hate_speech(texts, model="text-moderation-latest"):
    prompt_template = (
            "Classify the input text as 'respectful' or 'toxic'.\n"
            "Input text: {text}.\n"
            "Answer with only one word, 'respectful' or 'toxic': "
        )

    responses = []
    for text in tqdm(texts):

        prompt = prompt_template.format(text=text)

        try:
            response = openai.Moderation.create(
                model=model,
                input=prompt
                )

            label = response['results'][0]['flagged']
            responses.append(label)
        except Exception as e:
            print(f"Error processing text: {text}\n{e}")
            responses.append("Error")

    return responses

In [None]:
texts = df['text'].tolist()

classifications = classify_hate_speech(texts)

In [None]:
label_map = {
    False: "respectful",
    True: "toxic"
    }

predicted_labels = []
for classification in classifications:
  standardized_label = label_map.get(classification, classification)
  predicted_labels.append(standardized_label)

In [None]:
filename = # predictions/dataset_model.csv
ids = df['id'].tolist()

pd.DataFrame({'id':ids, 'results':predicted_labels}).to_csv(path+filename, index=False)

# Perspective API

In [None]:
def classify_hate_speech(texts, language='english'):

  responses = []
  for text in tqdm(texts):

    analyze_request = {'comment': { 'text': text },
                       'languages': ['en'],
                       'requestedAttributes': {'TOXICITY': {}}
                       }

    try:
        response = client.comments().analyze(body=analyze_request).execute()
        time.sleep(1)
    except:
        responses.append('respectful')
    else:
        hate_speech_value = response['attributeScores']['TOXICITY']['summaryScore']['value']

        if hate_speech_value > 0.5:
            responses.append('toxic')
        else:
            responses.append('respectful')

  return responses

In [None]:
texts = df['text'].tolist()

classifications = classify_hate_speech(texts)

In [None]:
filename = # predictions/dataset_model.csv
ids = df['id'].tolist()

pd.DataFrame({'id':ids, 'results':classifications}).to_csv(path+filename, index=False)

# Importing DeepSeek

In [None]:
model_name = "deepseek-ai/deepseek-llm-7b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
model.generation_config = GenerationConfig.from_pretrained(model_name)
model.generation_config.pad_token_id = model.generation_config.eos_token_id

In [None]:
prompt_template = "Classify the input text as 'respectful' or 'toxic'.\nInput text: {text}.\nAnswer: "
batch_size = 20  # You can adjust this based on your available GPU memory
predicted_labels = []

total_rows = len(df)
num_batches = (total_rows + batch_size - 1) // batch_size

for batch_num in tqdm(range(num_batches)):
    start_idx = batch_num * batch_size
    end_idx = min((batch_num + 1) * batch_size, total_rows)

    batch_df = df.iloc[start_idx:end_idx]

    for index, row in batch_df.iterrows():
        text = row['text']
        prompt = prompt_template.format(text=text)

        input_ids = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(input_ids, max_new_tokens=20)
        generated_tokens = outputs[0][input_ids.shape[1]:]  # decode only the newly generated tokens, ignore the prompt tokens
        predicted_label = tokenizer.decode(generated_tokens, skip_special_tokens=True)

        if 'toxic' in predicted_label.lower():
          standardized_label = 'toxic'
        elif 'respectful' in predicted_label.lower():
          standardized_label = 'respectful'
        else:
          standardized_label = predicted_label

        predicted_labels.append(standardized_label)

    torch.cuda.empty_cache()

100%|██████████| 150/150 [1:19:08<00:00, 31.66s/it]


In [None]:
filename = # predictions/dataset_model.csv
ids = df['id'].tolist()

pd.DataFrame({'id':ids, 'results':predicted_labels}).to_csv(path+filename, index=False)