In [1]:
!pip install datasets -q

[0m

In [2]:
!pip install accelerate -q

[0m

In [9]:
!pip install transformers[sentencepiece] sentencepiece -q

[0m

## Few-shot experiments

In [None]:
!pip install langdetect

In [None]:
import random

from langdetect import detect, LangDetectException

class PropmtSpitter:
    def __init__(self, detox_examples):
        self.detox_examples = detox_examples

    def __call__(self, input_text, lang=None):
        # prompt = ("Rewrite input toxic text "
        # "as neutral by replacing toxing words with neutral words or by removing "
        # "them completely. Here are some examples:")
        prompt=""
        if lang is None:
            try:
                lang = detect(input_text)[:2]
            except LangDetectException:
                lang = 'am' # only one not supported in langdetect lib

        idx = random.sample(range(len(self.detox_examples[lang])), 3)
        few_shot_examples = list(zip(
            [self.detox_examples[lang]["toxic_sentence"][i] for i in idx],
            [self.detox_examples[lang]["neutral_sentence"][i] for i in idx]
        ))

        for tox_sent, neut_sent in few_shot_examples:
            prompt += '\nToxic text: ' + tox_sent
            prompt += '\nNeutral text: ' + neut_sent

        prompt += '\nToxic text: ' + input_text + '\nNeutral text: '
        return prompt

In [None]:
from datasets import load_dataset

dataset_hf = load_dataset("textdetox/multilingual_paradetox")   # this is our dataset for competitoin

In [None]:
spitter = PropmtSpitter(dataset_hf)
print(spitter("You little piece of shit!"))

In [None]:
# pip install accelerate
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained("sberbank-ai/mGPT")
model = GPT2LMHeadModel.from_pretrained("sberbank-ai/mGPT")

In [None]:
lang_codes = ['en', 'ru', 'uk', 'de', 'es', 'am', 'zh', 'ar', 'hi']
for lang in lang_codes:
    idx = random.sample(range(len(dataset_hf[lang])), 1)
    toxic_example = dataset_hf[lang][idx[0]]["toxic_sentence"]
    neutral_example = dataset_hf[lang][idx[0]]["neutral_sentence"]

    print("Toxic example: " + toxic_example)
    print("Neutral example: " + neutral_example)

    prompted_input = spitter(toxic_example)
    input_ids = tokenizer(prompted_input, return_tensors="pt").input_ids
    out = model.generate(
        input_ids,
        min_length=20,
        max_length=512,
        eos_token_id=5,
        #pad_token=1,
        do_sample=True,
        top_k=0,
        top_p=0.8,
        no_repeat_ngram_size=4
    )

    generated_text = list(map(tokenizer.decode, out))[0]
    detox_generation = generated_text.split("Neutral text:")[-1]
    print("Model output: " + detox_generation)
    print("\n")
    print("---"*5)

Problems with tried approaches:
- llama - they don't give weights to me (raaaaurgh)
- flan-T5 - published model works only with english, other languages suck
- mGPT (by sber) - halucinates and duplicates input

New thing to try:

- produce predictions by delete_ baseline and mt5_baseline
- zero-shot pretrained multilingual model (like ) in COPA style task (selection between alternative)

### COPA (Choise of Plausible Alternative)

In [1]:
import pandas as pd

In [2]:
mt5_predictions = pd.read_csv('mt5_sub_dev.tsv', sep='\t')
delete_predictions = pd.read_csv('delete_baseline_dev.csv')

In [3]:
mt5_predictions = mt5_predictions.sort_values(by=['lang', 'toxic_sentence'])

In [4]:
delete_predictions = delete_predictions.sort_values(by=['lang', 'toxic_sentence'])

In [23]:
from transformers import XGLMTokenizer, XGLMForCausalLM

model_id="xglm-1.7B"
tokenizer = XGLMTokenizer.from_pretrained(f"facebook/{model_id}")
model = XGLMForCausalLM.from_pretrained(f"facebook/{model_id}")



tokenizer_config.json:   0%|          | 0.00/335 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/4.92M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/276 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [24]:
model = model.cuda()

In [26]:
import torch
import torch.nn.functional as F

from tqdm import tqdm

@torch.no_grad
def get_logprobs(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    for k, v in inputs.items():
        inputs[k] = inputs[k].cuda()
    
    input_ids, output_ids = inputs["input_ids"], inputs["input_ids"][:, 1:]
    outputs = model(**inputs, labels=input_ids)
    logits = outputs.logits
    logprobs = torch.gather(F.log_softmax(logits, dim=2), 2, output_ids.unsqueeze(2))
    return logprobs

@torch.no_grad
def copa_eval(prompt, alternative1, alternative2):
    lprob1 = get_logprobs(prompt + "\n" + alternative1).sum()
    lprob2 = get_logprobs(prompt + "\n" + alternative2).sum()
    return 0 if lprob1 > lprob2 else 1

copa_predictions = []
model.eval()
for idx in tqdm(range(len(mt5_predictions))):
    row1 = mt5_predictions.iloc[idx]
    row2 = delete_predictions.iloc[idx]

    toxic_premise = row1["toxic_sentence"]
    alt1 = row1["neutral_sentence"]
    alt2 = row2["neutral_sentence"]
    prompt = f'Is sentence {toxic_premise} close to '

    predict = copa_eval(prompt, alt1, alt2)
    copa_predictions.append(alt1 if predict == 0 else alt2)

100%|███████████████████████████████████████████████████████████████████████████████| 3600/3600 [06:52<00:00,  8.73it/s]


In [27]:
copa_subm_df = mt5_predictions.copy()
copa_subm_df["neutral_sentence"] = copa_predictions
copa_subm_df = copa_subm_df.sort_index()
copa_subm_df.to_csv(f"copa_xglm_{model_id}_submission.tsv", index=False, sep="\t")