# Exploring the FLAN model for text detoxification



In [1]:
# installing huggingface libraries for dataset, models and metrics
!pip install datasets transformers[sentencepiece] sacrebleu

!pip install numpy==1.24.3

Collecting sacrebleu
  Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.9/118.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting portalocker (from sacrebleu)
  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)
Installing collected packages: portalocker, sacrebleu
Successfully installed portalocker-2.8.2 sacrebleu-2.3.1
Collecting numpy==1.24.3
  Downloading numpy-1.24.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m60.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.23.5
    Uninstalling numpy-1.23.5:
      Successfully uninstalled numpy-1.23.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source

In [2]:
!pip install transformers[torch]
!pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.24.1-py3-none-any.whl (261 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m00:01[0m
Installing collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.22.0
    Uninstalling accelerate-0.22.0:
      Successfully uninstalled accelerate-0.22.0
Successfully installed accelerate-0.24.1


In [3]:
# Necessary inputs
import warnings

from datasets import load_dataset, load_metric
import transformers
import datasets
import random
import pandas as pd
import numpy as np
import torch
from IPython.display import display, HTML

warnings.filterwarnings('ignore')

## Selecting the model
Here I simply use the same model as for 2.0 python ntebook

In [4]:
# selecting model checkpoint
model_checkpoint = "t5-small"
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained(model_checkpoint)

try:
    from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, AutoConfig
    model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
except:

    from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, AutoConfig
    model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

## Loading the dataset

In [5]:
# setting random seed for transformers library
transformers.set_seed(42)

# Load the BLUE metric
metric = load_metric("sacrebleu")

Downloading builder script:   0%|          | 0.00/2.85k [00:00<?, ?B/s]

In [6]:
df = pd.read_csv("/kaggle/input/processdcorpus/processed.tsv", sep ="\t")
df = df.rename(columns={"translation": "target", "reference": "text"})
df.head(5)

Unnamed: 0,text,target,similarity,lenght_diff,ref_tox,trn_tox
0,I've spent the last seven years with four smel...,I've spent the last seven years with four men.,0.95,0.12963,0.990042,4.1e-05
1,tell me something I don't fucking know.,Tell me something I don't know!,0.949999,0.2,0.971418,7.9e-05
2,you're such a fucking tough guy.,You're such a Tough guy.,0.949997,0.242424,0.977852,0.000238
3,someone tried to kill her with an injection of...,Someone tried to kill her by injecting the virus?,0.949996,0.137931,0.980873,0.10218
4,I could look at women's legs for hours.,I could look at a woman's legs for hours.,0.949996,0.047619,0.941838,0.006438


## Dataset
Downloaded from HuggingFace dataset is a `DatasetDict`. It contains keys `["train", "validation", "test"]` - which represents a dataset splits

In [7]:
train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])

In [8]:
train_dataset = datasets.Dataset.from_dict(train)
test_dataset = datasets.Dataset.from_dict(test)
validation_dataset = datasets.Dataset.from_dict(validate)

In [9]:
raw_datasets = datasets.DatasetDict({"train":train_dataset,"test":test_dataset, "validation": validation_dataset})

## Preprocessing the data
As usual we will need to preprocess data and tokenize it before passing to model

In [10]:
# prefix for model input
prefix = "Detoxify this sentence:"

In [11]:
max_input_length = 128
max_target_length = 128
source_lang = "toxic"
target_lang = "detoxified"
padding = "max_length"

def preprocess_function(examples):
    inputs = [prefix + ex for ex in examples["text"]]
    targets = [ex for ex in examples["target"]]

    model_inputs = tokenizer(inputs, max_length=max_input_length, padding='longest')
    print(model_inputs)
    # Setup the tokenizer for targets
    labels = tokenizer(targets, max_length=max_target_length, padding='longest')

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

def preprocess_function(examples):
    inputs = [prefix + doc for doc in examples["text"]]
    model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True)

    # Setup the tokenizer for targets
    labels = tokenizer(text_target=examples["target"], max_length=max_target_length, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [13]:
# example of preprocessing
preprocess_function(raw_datasets['train'][:2])

{'input_ids': [[374, 235, 226, 4921, 48, 7142, 10, 18403, 2335, 16, 1826, 317, 7, 233, 255, 31, 7, 893, 396, 2886, 42, 396, 5551, 42, 396, 424, 5, 1], [374, 235, 226, 4921, 48, 7142, 10, 21310, 35, 6, 428, 140, 39, 1782, 6, 11, 27, 31, 195, 2612, 34, 5, 148, 31, 60, 11446, 5, 1]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], 'labels': [[334, 2335, 16, 1826, 317, 7, 255, 31, 7, 396, 2886, 42, 396, 28451, 5, 1], [3011, 6, 428, 140, 39, 1782, 6, 11, 27, 31, 195, 2612, 34, 5, 1]]}

In [12]:
cropped_datasets = raw_datasets
tokenized_datasets = cropped_datasets.map(preprocess_function, batched=True)

  0%|          | 0/30 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

In [15]:
type(tokenized_datasets["train"][:2]['labels'][0][0])

int

## Fine-tuning the model

In [18]:
# defining the parameters for training
batch_size = 32
model_name = model_checkpoint.split("/")[-1]
args = Seq2SeqTrainingArguments(
    f"{model_name}-finetuned-detoxify",
    evaluation_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=3,
    predict_with_generate=True,
    fp16=True,
    report_to='tensorboard',
)

In [19]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [20]:
import numpy as np

# simple postprocessing for text
def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]

    return preds, labels

# compute metrics function to pass to trainer
def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Some simple post-processing
    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    result = {"bleu": result["score"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

In [21]:
# instead of writing train loop we will use Seq2SeqTrainer
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss


In [None]:
# saving model
trainer.save_model('best')

In [None]:
from transformers import AutoModelForSeq2SeqLM
# loading the model and run inference for it
model = AutoModelForSeq2SeqLM.from_pretrained('best')
model.eval()
model.config.use_cache = False

In [None]:
def translate(model, inference_request, tokenizer=tokenizer):
    input_ids = tokenizer(inference_request, return_tensors="pt").input_ids
    outputs = model.generate(input_ids=input_ids)
    print(tokenizer.decode(outputs[0], skip_special_tokens=True,temperature=0))
    return tokenizer.decode(outputs[0], skip_special_tokens=True,temperature=0)

In [None]:
inference_request = prefix + 'You are a fucking piece of shit'
translate(model, inference_request,tokenizer)

In [None]:
inference_request = prefix + "kill yourself"
translate(model, inference_request,tokenizer)

This is just because I'm using a google collab

In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [None]:
!zip -r /kaggle/working/best_t5.zip /kaggle/working/best

# Inference. Analysis


The code below allows to check how model performs on detoxification via metrics proposed by skolkovo research team

In [None]:

from transformers import RobertaForSequenceClassification, RobertaTokenizer
clf_name = 'SkolkovoInstitute/roberta_toxicity_classifier_v1'
clf = RobertaForSequenceClassification.from_pretrained(clf_name).to(device);
clf_tokenizer = RobertaTokenizer.from_pretrained(clf_name)

In [None]:
def predict_toxicity(texts):
    with torch.inference_mode():
        inputs = clf_tokenizer(texts, return_tensors='pt', padding=True).to(clf.device)
        out = torch.softmax(clf(**inputs).logits, -1)[:, 1].cpu().numpy()
    return out

In [None]:
predict_toxicity(['hello world', 'hello aussie', 'hello fucking bitch'])

In [None]:
inference_request = 'This model is not fucking working'

predict_toxicity([inference_request, translate(model, prefix + inference_request,tokenizer)])

In [None]:
from tqdm import tqdm
def get_translation(model, inference_request, tokenizer=tokenizer):
    input_ids = tokenizer(inference_request, return_tensors="pt").input_ids
    outputs = model.generate(input_ids=input_ids)
    return tokenizer.decode(outputs[0], skip_special_tokens=True,temperature=0)




def check_detoxicity(model, tokenizer = tokenizer, dataset = tokenized_datasets["test"].select(range(300))):
    answers = []
    for data_row in tqdm(dataset):
        inference_request = prefix + data_row['text']
        answers.append(get_translation(model, inference_request))
    return np.array(answers)

In [None]:
test_csv = check_detoxicity(model)
test_csv

In [None]:
test_df =  pd.DataFrame(test_csv)

In [None]:
test_df.to_csv(model_name+"_test.csv")

In [None]:
test_df.to_csv()

In [22]:
origin_test = pd.DataFrame(np.array(tokenized_datasets["test"].select(range(300))['text']))
origin_test

Unnamed: 0,0
0,like the guy with tattoo on his butt?
1,maybe somebody threw a pie in his face.
2,Who's got my goddamn cigarette?
3,"Look, if h.G. Shot her with the same gun That ..."
4,"'all human children will die, Nogusto."
...,...
295,you're a drunk who never buys a round.
296,These servants are just like animals.
297,She's a wreck.
298,"Your muscles will stop working, and you'll han..."


In [25]:
origin_test = pd.DataFrame(np.array(tokenized_datasets["test"].select(range(300))['target']))
origin_test

Unnamed: 0,0
0,"Oh, like the guy with the butt tattoo?"
1,Hmm. Maybe someone threw a pie in his face.
2,who's got my cigarette?
3,"look, if H.G. shot her with the same gun as us..."
4,"'All of Man's children die, Nogusta."
...,...
295,You're becoming the drunk who never buys a round.
296,servants are like animals.
297,it's a wreck.
298,"the muscles stop working, and you hang yourself."


In [26]:
origin_test.to_csv("origin_target_test.csv")