### Import libraries and Setup

In [2]:
import pandas as pd
from transformers import T5Tokenizer
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

### Load dataset

In [3]:
# Load the TSV file
file_path = "/kaggle/input/paradetox/paradetox.tsv"
df = pd.read_csv(file_path, sep="\t", encoding="utf-8")

# Display the first few rows to verify
print(df.head())
print(f"Dataset size: {len(df)}")

                                               toxic  \
0                           he had steel balls too !   
1  dude should have been taken to api , he would ...   
2  im not gonna sell the fucking picture , i just...   
3  the garbage that is being created by cnn and o...   
4  the reason they dont exist is because neither ...   

                                            neutral1  \
0                                  he was brave too!   
1  It would have been good if he went to api. He ...   
2  I'm not gonna sell the picture, i just want to...   
3  the news that is being created by cnn and othe...   
4  The reason they don't exist is because neither...   

                                            neutral2  \
0                                                NaN   
1                                                NaN   
2                                                NaN   
3  The news that is being created by cnn and othe...   
4                                             

### Data merging and processing

In [4]:
# Melt the DataFrame to pair each toxic sentence with all neutral paraphrases
df_melted = pd.melt(
    df,
    id_vars=["toxic"],
    value_vars=["neutral1", "neutral2", "neutral3"],
    var_name="neutral_type",
    value_name="target_text"
)

# Remove rows where target_text is NaN or empty
df_melted = df_melted[
    df_melted["target_text"].notna() & (df_melted["target_text"].str.strip() != "")
].reset_index(drop=True)

# Add task prefix to toxic sentences
prefix = "detoxify: "
df_melted["input_text"] = prefix + df_melted["toxic"]

# Drop unnecessary column and rename
df_melted = df_melted[["input_text", "target_text"]]

print(df_melted.head(8))
print(f"Expanded dataset size: {len(df_melted)}")

                                          input_text  \
0                 detoxify: he had steel balls too !   
1  detoxify: dude should have been taken to api ,...   
2  detoxify: im not gonna sell the fucking pictur...   
3  detoxify: the garbage that is being created by...   
4  detoxify: the reason they dont exist is becaus...   
5      detoxify: i hope they beat each other silly .   
6  detoxify: no good bastards that we are and we ...   
7       detoxify: stop the coverage and let em rot .   

                                         target_text  
0                                  he was brave too!  
1  It would have been good if he went to api. He ...  
2  I'm not gonna sell the picture, i just want to...  
3  the news that is being created by cnn and othe...  
4  The reason they don't exist is because neither...  
5                       I think they beat each other  
6  no good people that we are and we are unrepent...  
7                           kindly stop the coverage  


We can see that some detoxified data has the same sentence as the toxic input, which isn't ideal. We will do a deeper filtering.

In [5]:
# Additional filter to remove identical pairs
df_melted = df_melted[df_melted["input_text"] != prefix + df_melted["target_text"]].reset_index(drop=True)
print(f"Filtered dataset size: {len(df_melted)}")

Filtered dataset size: 19617


### Load tokenizer

In [6]:
# Load T5 tokenizer
tokenizer = T5Tokenizer.from_pretrained("t5-base")

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [7]:
# Tokenization function
def tokenize_batch(texts, max_length=128):
    encodings = tokenizer(
        texts.tolist(),
        max_length=max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )
    return encodings["input_ids"], encodings["attention_mask"]

# Tokenize inputs and targets
input_ids, input_masks = tokenize_batch(df_melted["input_text"])
target_ids, target_masks = tokenize_batch(df_melted["target_text"])

# Shift target_ids for teacher forcing
target_ids_shifted = torch.cat([torch.full((target_ids.size(0), 1), tokenizer.pad_token_id), target_ids[:, :-1]], dim=1)

print(f"Input IDs shape: {input_ids.shape}")
print(f"Target IDs shape: {target_ids.shape}")

Input IDs shape: torch.Size([19617, 128])
Target IDs shape: torch.Size([19617, 128])


### Dataset Split

In [8]:
# Split indices
train_idx, temp_idx = train_test_split(range(len(df_melted)), test_size=0.2, random_state=42)
val_idx, test_idx = train_test_split(temp_idx, test_size=0.5, random_state=42)

# Create splits
train_data = {
    "input_ids": input_ids[train_idx],
    "input_masks": input_masks[train_idx],
    "target_ids": target_ids[train_idx],
    "target_masks": target_masks[train_idx],
    "target_ids_shifted": target_ids_shifted[train_idx]
}

val_data = {
    "input_ids": input_ids[val_idx],
    "input_masks": input_masks[val_idx],
    "target_ids": target_ids[val_idx],
    "target_masks": target_masks[val_idx],
    "target_ids_shifted": target_ids_shifted[val_idx]
}

test_data = {
    "input_ids": input_ids[test_idx],
    "input_masks": input_masks[test_idx],
    "target_ids": target_ids[test_idx],
    "target_masks": target_masks[test_idx],
    "target_ids_shifted": target_ids_shifted[test_idx]
}

print(f"Train size: {len(train_idx)}, Val size: {len(val_idx)}, Test size: {len(test_idx)}")

Train size: 15693, Val size: 1962, Test size: 1962


### PyTorch Dataset and DataLoader

In [9]:
class DetoxDataset(Dataset):
    def __init__(self, data):
        self.data = data
    
    def __len__(self):
        return len(self.data["input_ids"])
    
    def __getitem__(self, idx):
        return {
            "input_ids": self.data["input_ids"][idx],
            "attention_mask": self.data["input_masks"][idx],
            "labels": self.data["target_ids_shifted"][idx],
            "decoder_attention_mask": self.data["target_masks"][idx]
        }

# Instantiate datasets
train_dataset = DetoxDataset(train_data)
val_dataset = DetoxDataset(val_data)
test_dataset = DetoxDataset(test_data)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

# Test a batch
batch = next(iter(train_loader))
print("Sample batch:", {k: v.shape for k, v in batch.items()})

Sample batch: {'input_ids': torch.Size([16, 128]), 'attention_mask': torch.Size([16, 128]), 'labels': torch.Size([16, 128]), 'decoder_attention_mask': torch.Size([16, 128])}


### Verification

In [10]:
# Filter original DataFrame for a row with multiple neutrals
sample_row = df[df["neutral2"].notna()].iloc[0]
print("Original toxic:", sample_row["toxic"])
print("Neutral1:", sample_row["neutral1"])
print("Neutral2:", sample_row["neutral2"])
print("Neutral3:", sample_row["neutral3"])

# Check corresponding rows in melted DataFrame
print("\nExpanded pairs:")
print(df_melted[df_melted["input_text"] == prefix + sample_row["toxic"]])

Original toxic: the garbage that is being created by cnn and other news agencies is outrageous .
Neutral1: the news that is being created by cnn and other news agencies is outrageous.
Neutral2: The news that is being created by cnn and other news agencies is outrageous.
Neutral3: the garbage that is being created by cnn and other news agencies is outrageous .

Expanded pairs:
                                              input_text  \
3      detoxify: the garbage that is being created by...   
11830  detoxify: the garbage that is being created by...   

                                             target_text  
3      the news that is being created by cnn and othe...  
11830  The news that is being created by cnn and othe...  


The sentence that's identical with the toxic one has been filtered out.

In [11]:
from transformers import T5ForConditionalGeneration, AdamW

# Load T5 model
model = T5ForConditionalGeneration.from_pretrained("t5-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Optimizer
optimizer = AdamW(model.parameters(), lr=3e-5)

# Training loop
num_epochs = 3
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        decoder_attention_mask = batch["decoder_attention_mask"].to(device)
        
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels,
            decoder_attention_mask=decoder_attention_mask
        )
        
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Loss: {avg_loss:.4f}")
    
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            decoder_attention_mask = batch["decoder_attention_mask"].to(device)
            
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels,
                decoder_attention_mask=decoder_attention_mask
            )
            val_loss += outputs.loss.item()
    
    avg_val_loss = val_loss / len(val_loader)
    print(f"Validation Loss: {avg_val_loss:.4f}")

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch 1/3, Average Loss: 0.3629
Validation Loss: 0.1289
Epoch 2/3, Average Loss: 0.1364
Validation Loss: 0.1110
Epoch 3/3, Average Loss: 0.1200
Validation Loss: 0.1044


### Test

In [12]:
model.eval()
test_sentence = "detoxify: You're a fucking moron!"
inputs = tokenizer(test_sentence, return_tensors="pt", max_length=128, 
                   padding="max_length", truncation=True)
input_ids = inputs["input_ids"].to(device)
attention_mask = inputs["attention_mask"].to(device)

# Generate detoxified output
outputs = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_length=128,
    num_beams=4,
    early_stopping=True
)

detoxified = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Toxic: {test_sentence}")
print(f"Detoxified: {detoxified}")

Toxic: detoxify: You're a fucking moron!
Detoxified: You're a bad person!


In [13]:
model.save_pretrained("/kaggle/working/")