In [2]:
!pip install loguru

Collecting loguru
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Downloading loguru-0.7.3-py3-none-any.whl (61 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.6/61.6 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: loguru
Successfully installed loguru-0.7.3


In [4]:
!pip install transformers==4.39.3 peft==0.10.0

Collecting peft==0.10.0
  Using cached peft-0.10.0-py3-none-any.whl.metadata (13 kB)
Using cached peft-0.10.0-py3-none-any.whl (199 kB)
Installing collected packages: peft
  Attempting uninstall: peft
    Found existing installation: peft 0.17.0
    Uninstalling peft-0.17.0:
      Successfully uninstalled peft-0.17.0
Successfully installed peft-0.10.0


In [2]:
!pip uninstall torch
!pip install torch

Found existing installation: torch 2.6.0+cu124
Uninstalling torch-2.6.0+cu124:
  Would remove:
    /usr/local/bin/torchfrtrace
    /usr/local/bin/torchrun
    /usr/local/lib/python3.11/dist-packages/functorch/*
    /usr/local/lib/python3.11/dist-packages/torch-2.6.0+cu124.dist-info/*
    /usr/local/lib/python3.11/dist-packages/torch/*
    /usr/local/lib/python3.11/dist-packages/torchgen/*
Proceed (Y/n)? y
  Successfully uninstalled torch-2.6.0+cu124
Collecting torch
  Downloading torch-2.8.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-runtime-cu12==12.8.90 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7

In [15]:
!pip install -U peft

Collecting peft
  Downloading peft-0.17.0-py3-none-any.whl.metadata (14 kB)
Downloading peft-0.17.0-py3-none-any.whl (503 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m503.9/503.9 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
  Attempting uninstall: peft
    Found existing installation: peft 0.10.0
    Uninstalling peft-0.10.0:
      Successfully uninstalled peft-0.10.0
Successfully installed peft-0.17.0


In [2]:
!pip install transformers==4.39.3



In [1]:
from peft import get_peft_model, TaskType

In [3]:
import pandas as pd
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
from peft import PeftModel, LoraConfig
from loguru import logger

LANG_PROMPTS = {
   'fr':"Détoxifier"
}

# Use a pipeline as a high-level helper
from transformers import pipeline
from sentence_transformers import SentenceTransformer

sta = pipeline("text-classification", model="textdetox/xlmr-large-toxicity-classifier")
sim = SentenceTransformer('sentence-transformers/LaBSE')


def detoxify(text, lang, model, tokenizer):
    encodings = tokenizer(LANG_PROMPTS[lang] + text, return_tensors='pt')
    with torch.no_grad():
        outputs = model.generate(**encodings.to(model.device),
                                 max_length=128,
                                 num_beams=10,
                                 no_repeat_ngram_size=3,
                                 repetition_penalty=1.2,
                                 num_beam_groups=5,
                                 diversity_penalty=2.5,
                                 num_return_sequences=5,
                                 early_stopping=True,
                                 )
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

def select_best_output(text, detoxs, sta, sim):
    vals = []
    for detox in detoxs:
        emb = sim.encode([text, detox], convert_to_tensor=True)

        sim_val = (emb[0] * emb[1]).sum()
        sta_val = sta(detox)[0]
        sta_score = sta_val['score']
        if sta_val['label'] == 'LABEL_1':
            sta_score = 1 - sta_score

        vals.append((detox, (sim_val*sta_score).item()))
    detox, _ = max(vals, key=lambda x: x[1])
    return detox


dataset = pd.read_csv('/content/test_inputs_upd.tsv', sep='\t')
dataset = dataset[dataset["lang"]=="fr"]

base_model_id = "bigscience/mt0-xl"
#base_model_id = "bigscience/mt0-xl"
#lora_model_id = "iproskurina/s-nlp-mt0-xl-detox-orpo-detoxify-prompt"
lora_model_id = "Dalfaxy/mt0_xl_french_detox_v3"

#from huggingface_hub import login
#login("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")

tokenizer = AutoTokenizer.from_pretrained(base_model_id)
base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_id, device_map="auto")


model = PeftModel.from_pretrained(base_model, lora_model_id)

USE_LORA = False # mt0-xxl
if USE_LORA:
    ADAPTER_PATH = 'Dalfaxy/mt0_xl_french_detox_v3'
    peft_config = LoraConfig(
        task_type=TaskType.SEQ_2_SEQ_LM,
        inference_mode=False,
        r=32,
        lora_alpha=32,
        lora_dropout=0.1,
        target_modules=['k', 'lm_head', 'o', 'q', 'v', 'wi_0', 'wi_1', 'wo']
    )

    model = get_peft_model(model, peft_config)
    model = PeftModel.from_pretrained(model, ADAPTER_PATH, config=peft_config, device='cuda').to('cuda')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/882 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Device set to use cuda:0


modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/804 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.88G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/397 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/114 [00:00<?, ?B/s]

2_Dense/model.safetensors:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

2_Dense/pytorch_model.bin:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/430 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/797 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/7.03G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/7.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_config.json:   0%|          | 0.00/882 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/2.36G [00:00<?, ?B/s]

In [4]:
def select_best_and_second_output(text, detoxs, sta, sim):
    vals = []
    for detox in detoxs:
        emb = sim.encode([text, detox], convert_to_tensor=True)

        sim_val = (emb[0] * emb[1]).sum()
        sta_val = sta(detox)[0]
        sta_score = sta_val['score']
        if sta_val['label'] == 'LABEL_1':
            sta_score = 1 - sta_score  # Moins toxique = meilleur

        combined_score = (sim_val * sta_score).item()
        vals.append((detox, combined_score))

    # Trier par score décroissant
    sorted_vals = sorted(vals, key=lambda x: x[1], reverse=True)

    chosen = sorted_vals[0][0]      # meilleur score
    rejected = sorted_vals[1][0]    # deuxième meilleur score

    return chosen, rejected


In [11]:
df = pd.read_csv("/content/french_sub_mt0_xl_v3_filtered.tsv", sep="\t")

In [13]:
df[df["lang"]=="fr"].head()

Unnamed: 0,toxic_sentence,neutral_sentence,lang
6000,# Qu'est ce que tu viens de putain de dire à p...,# Qu'est-ce que tu viens de dire à propos de moi?,fr
6001,Qu'est ce que tu viens de putain de dire à pro...,Qu'est-ce que tu viens de dire à propos des mi...,fr
6002,", je t'offre une vraie traduction maison : Tu...",", je t'offre une vraie traduction maison : Tu ...",fr
6003,"Oh non, je ne vais pas bouger, et c'est juste...","Oh non, je ne vais pas bouger, et c'est juste ...",fr
6004,"Faut pas downvote ce genre de truc, c'est cont...","Vous ne devez pas downvote ce genre de truc, c...",fr


In [5]:
detox_text = detoxify(dataset["toxic_sentence"].iloc[1], "fr", model, tokenizer)

Group Beam Search is scheduled to be moved to a `custom_generate` repository in v4.55.0. To prevent loss of backward compatibility, add `trust_remote_code=True` to your `generate` call.


In [7]:
best_detox = select_best_and_second_output(dataset["toxic_sentence"].iloc[1], detox_text, sta, sim)


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


In [6]:
detox_text

["Qu'est-ce que tu viens de dire à propos des milles feuilles?",
 "Qu'est-ce que tu viens de dire à propos des milles feuilles, petit gars?",
 'Quel est le problème avec les milles feuilles?',
 'Quoi que tu viens de dire à propos des Millesfeuilles?',
 'Quel est le problème avec les milles feuilles']

In [8]:
best_detox

("Qu'est-ce que tu viens de dire à propos des milles feuilles, petit gars?",
 "Qu'est-ce que tu viens de dire à propos des milles feuilles?")

In [None]:
detox = []
best_detoxes = []
for i, row in tqdm(dataset.iterrows(), total=dataset.shape[0]):
    detox_text = detoxify(row['toxic_sentence'], row['lang'], model, tokenizer)

    chosen , rejected = select_best_and_second_output(row['toxic_sentence'], detox_text, sta, sim)

    detox.append(rejected)
    best_detoxes.append(chosen)

    logger.info(f"{i} {row['toxic_sentence']} -> {detox_text} -> {best_detox}")
dataset['rejected'] = detox
dataset['chosen'] = best_detoxes

dataset.to_csv('ORPO_data.tsv', sep='\t', index=False)

  return forward_call(*args, **kwargs)
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
  return forward_call(*args, **kwargs)
[32m2025-08-06 23:49:26.191[0m | [1mINFO    [0m | [36m__main__[0m:[36m<cell line: 0>[0m:[36m11[0m - [1m6000 # Qu'est ce que tu viens de putain de dire à propos de moi, petite salope? -> ["Qu'est-ce que tu viens de dire à propos de moi?", "# Qu'est-ce que tu viens de dire à propos de moi?", 'Quoi que tu viens de dire à propos de moi?', 'Quoi que tu as dit à propos de moi?', 'Quel est le problème à propos de moi?'] -> ("Qu'est-ce que tu viens de dire à propos des milles feuilles, petit gars?", "Qu'est-ce que tu viens de dire à propos des milles feuilles?")[0m
  0%|          | 1/600 [00:01<17:37,  1.76s/it][32m2025-08-06 23:49:28.457[0m | [1mINFO    [0m | [36m__main__[0m:[36m<cell line: 0>[0m:[36m11[0m - [1m6001 Qu'est ce que tu viens de putain de dire à propos des putains de milles