# DETOXIFICATION

In [16]:
%pip install -q torch
%pip install -q transformers[torch]
%pip install -q googletrans==3.1.0a0
%pip install -q fastparquet
%pip install sentencepiece



In [17]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from googletrans import Translator
from transformers import Trainer, TrainingArguments, T5Tokenizer, T5ForConditionalGeneration

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

## Lectura de datos

Vamos a leer el dataset de entrenamiento. Este dataset esta formado por 400 frases tóxicas por idioma en 9 idiomas diferentes y sus versiones no tóxicas. Formaremos un dataset conjunto que contenga para cada frase, su idioma de origen, la frase tóxica y la frase de-toxificada.

In [18]:
splits = {'en': 'data/en-00000-of-00001.parquet', 'ru': 'data/ru-00000-of-00001.parquet', 'uk': 'data/uk-00000-of-00001.parquet', 'de': 'data/de-00000-of-00001.parquet', 'es': 'data/es-00000-of-00001.parquet', 'am': 'data/am-00000-of-00001.parquet', 'zh': 'data/zh-00000-of-00001.parquet', 'ar': 'data/ar-00000-of-00001.parquet', 'hi': 'data/hi-00000-of-00001.parquet'}
df_en = pd.read_parquet("hf://datasets/textdetox/multilingual_paradetox/" + splits["en"])
df_uk = pd.read_parquet("hf://datasets/textdetox/multilingual_paradetox/" + splits["uk"])
df_ru = pd.read_parquet("hf://datasets/textdetox/multilingual_paradetox/" + splits["ru"])
df_de = pd.read_parquet("hf://datasets/textdetox/multilingual_paradetox/" + splits["de"])
df_es = pd.read_parquet("hf://datasets/textdetox/multilingual_paradetox/" + splits["es"])
df_am = pd.read_parquet("hf://datasets/textdetox/multilingual_paradetox/" + splits["am"])
df_zh = pd.read_parquet("hf://datasets/textdetox/multilingual_paradetox/" + splits["zh"])
df_ar = pd.read_parquet("hf://datasets/textdetox/multilingual_paradetox/" + splits["ar"])
df_hi = pd.read_parquet("hf://datasets/textdetox/multilingual_paradetox/" + splits["hi"])


In [19]:
df_total = pd.concat([df_en, df_uk, df_ru, df_de, df_es, df_am, df_zh, df_ar, df_hi], keys=['en','uk', 'ru', 'de', 'es', 'am', 'zh-cn', 'ar', 'hi']).reset_index(level=[0])

In [20]:
df_total.sample(3)

Unnamed: 0,level_0,toxic_sentence,neutral_sentence
140,de,"Gabriel, was bist Du für ein scheinheiliger Au...","Gabriel, was bist Du für ein unaufrichtiger Au..."
1,es,Conclusión: pura mierda de paranormal....,Conclusión: pura de paranormal.
349,hi,"मादरचोद, मां के लोड़े, लड़ पाते नहीं हो","बातें तो बड़ी करते हो, लड़ पाते नहीं हो"


### Train-test split

In [21]:
train, test = train_test_split(df_total, train_size=0.8, random_state=33)

X_train = list(train['toxic_sentence'])
y_train = list(train['neutral_sentence'])

X_test = list(test['toxic_sentence'])
y_test = list(test['neutral_sentence'])
len(X_test)

720

## Pipeline de detoxificación

Nuestro modelo de detoxificación tendrá un pipeline que consistirá de los siguientes pasos:

1. **Traducción**: Traduciremos las frases del idioma original al inglés
2. **Detoxificación**: El modelo detoxificará la frase en inglés
3. **Retro-traducción**: La frase detoxificada será traducida del inglés al idioma original

### Traducción y retrotraducción

Usaremos la libreria de google translate para crear un modelo que traduzca un texto de un lenguaje a otro, pasados por argumentos.

In [22]:
from googletrans import Translator

translator = Translator()

In [23]:
def translate(text, source_lang, dest_lang):
    if source_lang == dest_lang:
        return text

    sol = translator.translate(text , src = source_lang, dest=dest_lang).text
    return sol

In [24]:
test_trad = df_total.sample(5).to_numpy()

for sentence in test_trad:
    print(f"Original sentence ({sentence[0]}): {sentence[1]}")
    translation = translate(sentence[1], sentence[0], "en")
    print(f"Transalation: {translation}")
    print("-"*50)

Original sentence (am): 3:30 Jaal Maroo አልጀዚራ ላይ ይቀርባል ሲባል አጅሬው 3 ሰዓት ላይ ሙሉ ከተማ መብራት ድርግም
Transalation: 3:30 Jaal Maroo will be presented on Al Jazeera.
--------------------------------------------------
Original sentence (uk): На колір та смак товариш не всякДля когось і Хакслі - хуйовий автор
Transalation: In terms of color and taste, a friend is not for everyone. For some, Huxley is a fucking author
--------------------------------------------------
Original sentence (am): እውነት ነው አባት እንደ ልጅ አይጨክንም . የዚህ አረመኔ ጭካኔ ግን በዛ
Transalation: It is true that a father is not cruel like a child. But the cruelty of this barbarian
--------------------------------------------------
Original sentence (es): JAJAJAJAJJAJA que vas a eclipsar tú y el otro subnormal xd
Transalation: HAHAHAHAHAHA, you and the other subnormal are going to outshine you xd
--------------------------------------------------
Original sentence (ar): انسان غبي وردوك مفهاش احترام للى قدامك ومعرفش مستحمل تبقى كدا ازاى .
Transalat

## Detoxificación

Ahora haremos un finetunning de un modelo de Huggingface para detoxificar el código.

In [25]:
model_name = "google/flan-t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

num_training_layers = 2 # El número de capas que se entrenarán

# Congelar todos los parámetros del modelo
for param in model.parameters():
    param.requires_grad = False

# Descongelar las últimas n capas del decoder
for param in model.decoder.block[-num_training_layers:].parameters():
    param.requires_grad = True

# Mantener la capa de salida (`lm_head`) entrenable
for param in model.lm_head.parameters():
    param.requires_grad = True

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("ESTADÍSTICAS DEL ENTRENAMIENTO:")
print(f"Total de parámetros: {total_params:,}")
print(f"Parámetros entrenables: {trainable_params:,}")
print(f"Parámetros congelados: {total_params - trainable_params:,}")

ESTADÍSTICAS DEL ENTRENAMIENTO:
Total de parámetros: 76,961,152
Parámetros entrenables: 22,744,064
Parámetros congelados: 54,217,088


#### Creación de los dataset

In [26]:
class DetoxificationDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, target_encodings):
        self.encodings = encodings
        self.target_encodings = target_encodings

    def __len__(self):
        return len(self.encodings['input_ids'])

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = self.target_encodings['input_ids'][idx].clone().detach()
        # Reemplazar pad_token_id con -100 para ignorar tokens de relleno en la pérdida
        item["labels"][item["labels"] == tokenizer.pad_token_id] = -100

        return item

In [27]:
X_train_tokenized = tokenizer(X_train, truncation=True, padding=True, return_tensors="pt")
y_train_tokenized = tokenizer(y_train, truncation=True, padding=True, return_tensors="pt")

X_test_tokenized = tokenizer(X_test, truncation=True, padding=True, return_tensors="pt")
y_test_tokenized = tokenizer(y_test, truncation=True, padding=True, return_tensors="pt")

train_dataset = DetoxificationDataset(X_train_tokenized, y_train_tokenized)
val_dataset = DetoxificationDataset(X_test_tokenized, y_test_tokenized)

train_dataset = DetoxificationDataset(X_train_tokenized, y_train_tokenized)
val_dataset = DetoxificationDataset(X_test_tokenized, y_test_tokenized)

print("Tamaño del dataset de entrenamiento:", len(train_dataset))
print("Ejemplo del dataset:", train_dataset[0])


training_args = TrainingArguments(
    output_dir='./checkpoints',
    eval_strategy="no",
    per_device_train_batch_size=128,
    learning_rate=1e-4,
    num_train_epochs=5,
    report_to=["none"],
    fp16=True, # acelerar entrenaminento
)


Tamaño del dataset de entrenamiento: 2880
Ejemplo del dataset: {'input_ids': tensor([ 3,  2,  3,  2,  3,  2,  3,  2,  3,  2,  3,  2,  3,  2,  3,  2,  3,  2,
         3,  2,  3,  2,  3,  2,  3,  2,  3,  2, 55,  1,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 

## Entrenamiento

In [28]:

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
)

# Entrenar el modelo
trainer.train()



  trainer = Trainer(


Step,Training Loss


TrainOutput(global_step=115, training_loss=0.0, metrics={'train_runtime': 67.78, 'train_samples_per_second': 212.452, 'train_steps_per_second': 1.697, 'total_flos': 601239601152000.0, 'train_loss': 0.0, 'epoch': 5.0})

In [33]:
inputs = tokenizer(X_train[:10], truncation=True, padding=True, return_tensors="pt").to(device)
labels = tokenizer(y_train[:10], truncation=True, padding=True, return_tensors="pt").input_ids.to(device)
# Pasa los datos por el modelo y calcula la pérdida
outputs = model(**inputs, labels=labels)
loss = outputs.loss
loss.back

# Imprime los gradientes de las capas entrenables
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name} grad: {param.grad}")



decoder.block.6.layer.0.SelfAttention.q.weight grad: tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], device='cuda:0')
decoder.block.6.layer.0.SelfAttention.k.weight grad: tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], device='cuda:0')
decoder.block.6.layer.0.SelfAttention.v.weight grad: tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., 