I'm using colab for testing my baseline

In [None]:
!pip install transformers

In [None]:
!pip install datasets==2.11
!pip install evaluate

In [3]:
import os
import zipfile
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from tqdm import tqdm
import spacy
from datasets import load_dataset, load_metric, list_metrics
import evaluate
import warnings
warnings.warn("ignore")


def create_dataframe(zip_file_path):
    # check if zip file exists
    if not os.path.exists(zip_file_path):
        print(f"zip file '{zip_file_path}' does not exist.")
        return None

    # extract zip file
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        # Extract all files in the ZIP archive to a directory
        extraction_path = "./tmp_extraction"
        zip_ref.extractall(extraction_path)

    # find .tsv file
    tsv_file = None
    for root, dirs, files in os.walk(extraction_path):
        for file in files:
            if file.endswith(".tsv"):
                tsv_file = os.path.join(root, file)
                break

    if tsv_file is None:
        print("No .tsv file found in the extracted ZIP archive.")
        return None

    # create pandas DataFrame
    try:
        df = pd.read_csv(tsv_file, delimiter='\t')
        return df
    except Exception as e:
        print(f"Error while creating DataFrame: {str(e)}")
        return None
    finally:
        # remove the temporary extraction directory
        if os.path.exists(extraction_path):
            for root, dirs, files in os.walk(extraction_path):
                for file in files:
                    file_path = os.path.join(root, file)
                    os.remove(file_path)
            os.rmdir(extraction_path)


def create_new_columns(dataset):
    # create new columns
    # dataset['toxic'] = [''] * dataset.shape[0]
    # dataset['nontoxic'] = [''] * dataset.shape[0]
    # dataset['toxic_tox'] = [''] * dataset.shape[0]
    # dataset['nontoxic_tox'] = [''] * dataset.shape[0]

    # create the 'toxic' and 'nontoxic' columns
    dataset['toxic'] = dataset.apply(lambda row: row['reference'] if row['ref_tox'] > row['trn_tox'] else row['translation'], axis=1)
    dataset['nontoxic'] = dataset.apply(lambda row: row['translation'] if row['ref_tox'] > row['trn_tox'] else row['reference'], axis=1)

    # create the 'toxic_tox' and 'nontoxic_tox' columns
    dataset['toxic_tox'] = dataset.apply(lambda row: row['ref_tox'] if row['ref_tox'] > row['trn_tox'] else row['trn_tox'], axis=1)
    dataset['nontoxic_tox'] = dataset.apply(lambda row: row['trn_tox'] if row['ref_tox'] > row['trn_tox'] else row['ref_tox'], axis=1)

    # drop unuseful columns
    dataset = dataset.drop(columns=['reference', 'translation', 'similarity', 'lenght_diff', 'ref_tox', 'trn_tox'])

    return dataset
    # return dataset['toxic'].tolist(), dataset['nontoxic'].tolist()


def remove_unuseful_data(dataset):
    # toxic sentences > 0.75 of toxic_tox: 0.9243254750535241
    dataset = dataset[dataset['toxic_tox'] > 0.75]

    # detoxed sentences < 0.25 of nontoxic_tox: 0.959344176040237
    dataset = dataset[dataset['nontoxic_tox'] < 0.25]

    return dataset


def get_sentences(dataset):
    # toxic and detoxed sentences
    return dataset['toxic'].tolist(), dataset['nontoxic'].tolist()


def split_train_test(toxic, nontoxic, path):
    toxic_train, toxic_test, nontoxic_train, nontoxic_test = train_test_split(
        toxic,
        nontoxic,
        test_size=0.25,
        random_state=42,
    )

    with open(os.path.join(path, 'toxic_train'), "w", encoding="UTF-8") as file:
        file.write("\n".join(toxic_train))
    with open(os.path.join(path, 'toxic_test'), "w", encoding="UTF-8") as file:
        file.write("\n".join(toxic_test))
    with open(os.path.join(path, 'nontoxic_train'), "w", encoding="UTF-8") as file:
        file.write("\n".join(nontoxic_train))
    with open(os.path.join(path, 'nontoxic_test'), "w", encoding="UTF-8") as file:
        file.write("\n".join(nontoxic_test))

def save_csv(dataset):
    dataset.to_csv("converted.csv")

def get_dict():
    return load_dataset("csv", data_files="converted.csv")



In [6]:
import torch
from transformers import RobertaForSequenceClassification, RobertaTokenizer

class ToxicityClassifier:
    def __init__(self, model_name='s-nlp/roberta_toxicity_classifier_v1'):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = RobertaForSequenceClassification.from_pretrained(model_name).to(self.device)
        self.tokenizer = RobertaTokenizer.from_pretrained(model_name)

    def text_toxicity(self, texts):
        """
        baseline model
        https://huggingface.co/s-nlp/roberta_toxicity_classifier_v1
        """
        with torch.no_grad():
            input_ids = self.tokenizer(texts, return_tensors='pt', padding=True).to(self.device)
            logits = self.model(**input_ids).logits
            probabilities = torch.softmax(logits, -1)
            toxicity_scores = probabilities[:, 1].cpu().numpy()
        return toxicity_scores

    def delete_toxic(self, toxic_sentences, threshold=0.5):
        """
        remove toxic words from a list of sentences
        """
        nontoxic_text = []
        for toxic_sentence in toxic_sentences:
            words = toxic_sentence.split()
            toxic_scores = self.text_toxicity(words)
            nontoxic_words = []
            for word, score in zip(words, toxic_scores):
                if score < threshold:
                    nontoxic_words.append(word)
            nontoxic_text.append(" ".join(nontoxic_words))
        return nontoxic_text

In [8]:
dataset = create_dataframe("drive/MyDrive/filtered_paranmt.zip")

In [9]:
dataset = create_new_columns(dataset)
dataset = remove_unuseful_data(dataset)

In [11]:
toxic, nontoxic = get_sentences(dataset)

In [13]:
clf = ToxicityClassifier()

deleted_toxic = clf.delete_toxic(toxic[:20])

Some weights of the model checkpoint at s-nlp/roberta_toxicity_classifier_v1 were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [15]:
deleted_toxic[:10]

['if Alkar floods her with her mental waste, it would explain the high levels of neurotransmitter.',
 "you're becoming",
 'well, we can spare your life.',
 'you have to wake up.',
 'I have orders to her.',
 "I'm not gonna have a child... ...with the same genetic disorder as me who's gonna L...",
 "They're all laughing at us, so we'll kick your",
 'Maine was very short on people back then.',
 'Briggs, what the hell is going on?',
 "another simply didn't know what to do, so whenever he met my brother, he nearly beat the out of him."]

In [16]:
toxic[:10]

['if Alkar floods her with her mental waste, it would explain the high levels of neurotransmitter.',
 "you're becoming disgusting.",
 'well, we can spare your life.',
 'monkey, you have to wake up.',
 'I have orders to kill her.',
 "I'm not gonna have a child... ...with the same genetic disorder as me who's gonna die. L...",
 "They're all laughing at us, so we'll kick your ass.",
 'Maine was very short on black people back then.',
 'Briggs, what the hell is going on?',
 "another simply didn't know what to do, so whenever he met my brother, he nearly beat the shit out of him."]

In [17]:
import gc

def cleanup():
    if torch.cuda.is_available():
        gc.collect()
        torch.cuda.empty_cache()

cleanup()