Лабораторная работа №2: Детоксификация контента

Цель: Исследовать методы автоматического обнаружения токсичных высказываний и способы смягчения негативного тона текста с применением современных моделей.
Задания:

- Обучить детектор токсичности на реальных данных с использованием Transformer-based моделей (например, RoBERTa).Метрика оценки: F1-score, AUC ROC
- Использовать свежую версию RealToxicityPrompts для тренировки модели детоксикации и создать механизм исправления негативных формулировок в тексте.Модель сравнения: Google Perspective API, OpenAI's Detoxify

# Детектор

In [1]:
import torch

import pandas as pd
import numpy as np

from datasets import Dataset, DatasetDict 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

from peft import (LoraConfig, 
                  prepare_model_for_kbit_training, 
                  get_peft_model,
                  PeftModelForSequenceClassification,
                  PeftConfig)

from transformers.modeling_outputs import SequenceClassifierOutput
from transformers import (
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    TrainingArguments, 
    Trainer,
    EarlyStoppingCallback,
    DataCollatorWithPadding,
    AutoModelForCausalLM,
    Gemma3ForCausalLM)

import bitsandbytes as bnb
import evaluate
import os
import warnings

warnings.filterwarnings('ignore')

graphic_card = '0'
gpu_device = 'cuda:0'
os.environ['CUDA_VISIBLE_DEVICES'] = graphic_card
os.environ['CUDA_DEVICE_ORDER']= 'PCI_BUS_ID'
device = torch.device(f'cuda:{graphic_card}' if torch.cuda.is_available() else 'cpu')

torch.cuda.set_device(0) 

In [2]:
def load_pairwise(path):
    df = pd.read_csv(path, sep='\t', header=0)
    recs = []
    for toxic, neutral in zip(df['ru_toxic_comment'], df['ru_neutral_comment']):
        recs.append({'text': toxic,   'label': 1})
        recs.append({'text': neutral, 'label': 0})
    return Dataset.from_pandas(pd.DataFrame(recs))

train_ds = load_pairwise('data/train.tsv')
valid_ds = load_pairwise('data/dev.tsv')

dataset_toxic = DatasetDict({
    'train': train_ds,
    'validation': valid_ds,
})

print(dataset_toxic)

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 22180
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2232
    })
})


In [3]:
df = pd.DataFrame(dataset_toxic['train'])
df.head()

Unnamed: 0,text,label
0,"и,чё,блядь где этот херой был до этого со свои...",1
1,"Ну и где этот герой был,со своими доказательст...",0
2,"и,чё,блядь где этот херой был до этого со свои...",1
3,Где этот герой был до этого со своими доказате...,0
4,"и,чё,блядь где этот херой был до этого со свои...",1


In [None]:
hugging_face_model_id = 'google/gemma-3-4b-it'

tokenizer = AutoTokenizer.from_pretrained(
    hugging_face_model_id,
    padding_side='right',
    add_bos_token=True,     
    trust_remote_code=True
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

class2id = {
    'neutral': 0,
    'toxic': 1,
}
id2class = {v: k for k, v in class2id.items()}

def preprocess_function(samples):
    instruction = 'Определите, является ли следующий комментарий токсичным: '
    texts = [instruction + sample for sample in samples['text']]
    
    encodings = tokenizer(
        texts,
        truncation=True,
        padding='max_length',
        max_length=128
    )
    encodings['labels'] = samples['label']
    return encodings

dataset_toxic_tokenized = dataset_toxic.map(
    preprocess_function,
    batched=True,
    remove_columns=['text','label']
)

print(dataset_toxic_tokenized)

Map:   0%|          | 0/22180 [00:00<?, ? examples/s]

Map:   0%|          | 0/2232 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 22180
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 2232
    })
})


In [5]:
sample_index = 0 

sample_input_ids = dataset_toxic_tokenized['train']['input_ids'][sample_index]
sample_label= dataset_toxic_tokenized['train']['labels'][sample_index]

print(f'IDs   : {sample_input_ids}')
print(f'Label : {sample_label}  -->  {id2class[sample_label]}\n')
print(f'Tokens: {tokenizer.decode(sample_input_ids)}')

IDs   : [2, 237248, 18043, 68538, 236764, 24572, 6377, 137447, 95876, 27058, 159361, 57844, 236787, 1079, 236764, 123940, 236764, 220467, 16276, 24489, 31475, 194171, 17421, 17907, 2610, 20708, 2790, 89682, 114347, 41617, 2062, 236881, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Label : 1  -->  toxic

Tokens: <bos>Определите, является ли следующий комментарий токсичным: и,чё,блядь где этот херой был до этого со своими доказательствами?<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><p

In [6]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

sample_batch_ids = dataset_toxic_tokenized['train']['input_ids'][0:3]
sample_batch_ids_collator = data_collator(dataset_toxic_tokenized['train'][:3])['input_ids'][0:3]
print([len(x) for x in sample_batch_ids ])
print([len(x) for x in sample_batch_ids_collator ])

[128, 128, 128]
[128, 128, 128]


In [7]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16)


model = Gemma3ForCausalLM.from_pretrained(hugging_face_model_id, 
                                          torch_dtype=torch.bfloat16, 
                                          device_map=gpu_device,
                                          attn_implementation='eager',
                                          quantization_config=bnb_config  )

model.lm_head = torch.nn.Linear(model.config.hidden_size, len(class2id.keys()), bias=False,device=gpu_device)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit 
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
        if 'lm_head' in lora_module_names: 
            lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)
modules = ['gate_proj', 'down_proj', 'v_proj', 'k_proj', 'q_proj', 'o_proj', 'up_proj']
lora_config = LoraConfig(
    r=64,
    lora_alpha=32,
    target_modules=modules,
    lora_dropout=0.1,
    bias='none',
    task_type='SEQ_CLS')

model = get_peft_model(model, lora_config)

In [9]:
class Gemma3ForSequenceClassification(PeftModelForSequenceClassification):
    def __init__(self, peft_config: PeftConfig, model: AutoModelForCausalLM, adapter_name='default'):
        super().__init__(model, peft_config, adapter_name)
        self.num_labels = model.config.num_labels
        self.problem_type = 'multi_label_classification'

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        **kwargs):
        
        return_dict = return_dict if return_dict is not None else self.config.return_dict

        outputs = self.base_model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
            **kwargs)

        logits = outputs.logits

        sequence_lengths = torch.sum(attention_mask, dim=1)
        last_token_indices = sequence_lengths - 1
        batch_size = logits.shape[0]
       
        logits = logits[torch.arange(batch_size, device=logits.device), last_token_indices, :]

        loss = None
        if labels is not None:
            if self.problem_type == 'regression':
                loss_fct = torch.nn.MSELoss()
                loss = loss_fct(logits.squeeze(), labels.squeeze())
            else:
                loss_fct = torch.nn.CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[1:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions)

In [10]:
def custom_binary_crossentropy_loss(logits, labels, epsilon=1e-7):
  
    probs = torch.sigmoid(logits)
    probs = torch.clamp(probs, min=epsilon, max=1-epsilon) 
    loss = -(labels * torch.log(probs) + (1 - labels) * torch.log(1 - probs))
    return torch.mean(loss)

class CustomTrainer(Trainer): 
    def compute_loss(self, model, inputs,num_items_in_batch=4, return_outputs=False): 
        labels = inputs.get('labels')
        inputs = inputs.to(gpu_device)
        outputs = model(**inputs)
        logits = outputs.logits 
        
        loss = custom_binary_crossentropy_loss(logits, labels)

        return (loss, outputs) if return_outputs else loss
    
clf_metrics = evaluate.combine(['accuracy', 'f1', 'precision', 'recall'])

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    probs = torch.softmax(torch.tensor(logits), dim=1)[:, 1].numpy()

    return {
        'accuracy': accuracy_score(labels, preds),
        'f1': f1_score(labels, preds),
        'precision': precision_score(labels, preds),
        'recall': recall_score(labels, preds),
        'auc_roc': roc_auc_score(labels, probs),
    }

Using the latest cached version of the module from /home/danya/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Dec  7 02:04:16 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/danya/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/0ca73f6cf92ef5a268320c697f7b940d1030f8471714bffdb6856c641b818974 (last modified on Sat Dec  7 02:04:25 2024) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/danya/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/4e7f439a346715f68500ce6f2be82bf3272abd3f20bdafd203a2c4f85b61dd5f (last modified on Sat Dec  7 02:04:27 2024) since it couldn't be found locally at evaluate-metric--pr

In [11]:
early_stop = EarlyStoppingCallback(early_stopping_patience=3, 
                                   early_stopping_threshold=0.001) 
checkpoints_dir = 'results/gemma_toxicity_classification' 

os.environ['TOKENIZERS_PARALLELISM'] = 'false'

In [12]:
training_args = TrainingArguments(
    gradient_checkpointing=False,  
    gradient_checkpointing_kwargs={'use_reentrant': False},
    logging_strategy='steps',
    logging_steps=100,
    dataloader_num_workers=4,
    output_dir= checkpoints_dir ,  
    learning_rate=5e-5,  
    per_device_train_batch_size=32,  
    per_device_eval_batch_size=32,  
    num_train_epochs=10,  
    weight_decay=0.01,  
    eval_strategy='steps', 
    eval_steps=100,     
    save_strategy='steps',
    save_steps=100,  
    report_to='none',
    load_best_model_at_end=True,  
    push_to_hub=False,  
    bf16=True,
    warmup_ratio=0.05, 
    metric_for_best_model='eval_accuracy',
    greater_is_better=True)  

In [None]:
peft_config = PeftConfig(peft_type='LORA', task_type='SEQ_CLS', inference_mode=False)
for key, value in lora_config.__dict__.items():
    setattr(peft_config, key, value)

wrapped_model = Gemma3ForSequenceClassification(peft_config, model)
wrapped_model.num_labels = len(class2id.keys())
wrapped_model.config.id2label = id2class
wrapped_model.config.label2id = class2id
wrapped_model.config.problem_type = 'single_label_classification'

trainer = Trainer(
    model=wrapped_model,
    args=training_args,
    train_dataset=dataset_toxic_tokenized['train'],
    eval_dataset=dataset_toxic_tokenized['validation'],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[early_stop],
)

trainer.label_names = ['labels']

trainer.train()
trainer.evaluate()

No label_names provided for model class `Gemma3ForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auc Roc
200,0.2253,0.174756,0.942652,0.939962,0.98622,0.897849,0.982661
400,0.1212,0.133208,0.956989,0.955842,0.982042,0.931004,0.988762
600,0.1155,0.136739,0.960573,0.959522,0.985822,0.934588,0.990383
800,0.0623,0.165462,0.959677,0.95814,0.996132,0.922939,0.987666
1000,0.0574,0.1272,0.963262,0.962385,0.985902,0.939964,0.989439
1200,0.0703,0.111196,0.964606,0.963678,0.989613,0.939068,0.991903
1400,0.0615,0.11727,0.965054,0.964055,0.99241,0.937276,0.992465
1600,0.022,0.152956,0.969534,0.969175,0.980734,0.957885,0.99259
1800,0.0242,0.14111,0.965502,0.964888,0.982358,0.948029,0.99165
2000,0.0284,0.141904,0.96819,0.968261,0.966102,0.97043,0.993775


{'eval_loss': 0.15295617282390594,
 'eval_accuracy': 0.9695340501792115,
 'eval_f1': 0.9691749773345422,
 'eval_precision': 0.9807339449541285,
 'eval_recall': 0.9578853046594982,
 'eval_auc_roc': 0.9925898626687735,
 'eval_runtime': 74.1289,
 'eval_samples_per_second': 30.11,
 'eval_steps_per_second': 0.944,
 'epoch': 3.170028818443804}

# Детоксификатор

In [1]:
import os
os.environ['UNSLOTH_COMPILE_DISABLE'] = '1'

from unsloth import FastModel
from unsloth.chat_templates import get_chat_template
from unsloth.chat_templates import standardize_data_formats
from unsloth.chat_templates import train_on_responses_only

import torch
from datasets import Dataset, DatasetDict
import evaluate
import numpy as np
from trl import SFTTrainer, SFTConfig
from transformers import EarlyStoppingCallback
import pandas as pd
from tqdm.auto import tqdm

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
model, tokenizer = FastModel.from_pretrained(
    model_name = 'unsloth/gemma-3-4b-it',
    max_seq_length = 256, 
    load_in_4bit = False,  
    load_in_8bit = True,
    full_finetuning = False,
)

model = FastModel.get_peft_model(
    model,
    finetune_vision_layers = False, 
    finetune_language_layers = True,  
    finetune_attention_modules = True,  
    finetune_mlp_modules = True,  
    r = 16,           
    lora_alpha = 32,  
    lora_dropout = 0,
    bias = 'none',
    random_state = 3407,
)

==((====))==  Unsloth 2025.4.7: Fast Gemma3 patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 4070 Ti SUPER. Num GPUs = 1. Max memory: 15.593 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 8.9. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Unsloth: Making `model.base_model.model.language_model.model` require gradients


In [3]:
tokenizer = get_chat_template(
    tokenizer,
    chat_template = 'gemma-3',
)

def load_pairwise(path):
    import pandas as pd
    df = pd.read_csv(path, sep='\t')
    return Dataset.from_dict({
        'toxic': df['ru_toxic_comment'].tolist(),
        'neutral': df['ru_neutral_comment'].tolist(),
    })

ds = DatasetDict({
    'train': load_pairwise('data/train.tsv'),
    'validation': load_pairwise('data/dev.tsv'),
})

tokenizer = get_chat_template(tokenizer, chat_template='gemma-3')
def build_conversations(batch):
    return {
        'conversations': [
            [{'role': 'user', 'content': u},
             {'role': 'assistant', 'content': n}]
            for u, n in zip(batch['toxic'], batch['neutral'])
        ]
    }

ds = ds.map(build_conversations, batched=True, remove_columns=['toxic','neutral'])
ds

Map:   0%|          | 0/11090 [00:00<?, ? examples/s]

Map:   0%|          | 0/1116 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['conversations'],
        num_rows: 11090
    })
    validation: Dataset({
        features: ['conversations'],
        num_rows: 1116
    })
})

In [4]:
def apply_template(batch):
    return {'text': tokenizer.apply_chat_template(batch['conversations'])}

ds = ds.map(apply_template, batched=True)

print(ds['train'][0]['text'])

Map:   0%|          | 0/11090 [00:00<?, ? examples/s]

Map:   0%|          | 0/1116 [00:00<?, ? examples/s]

<bos><start_of_turn>user
и,чё,блядь где этот херой был до этого со своими доказательствами?<end_of_turn>
<start_of_turn>model
Ну и где этот герой был,со своими доказательствами?<end_of_turn>



In [5]:
train_std = standardize_data_formats(ds['train'])
valid_std = standardize_data_formats(ds['validation'])

ds = DatasetDict({
    'train': train_std,
    'validation': valid_std,
})

ds

Unsloth: Standardizing formats (num_proc=12):   0%|          | 0/11090 [00:00<?, ? examples/s]

Unsloth: Standardizing formats (num_proc=12):   0%|          | 0/1116 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['conversations', 'text'],
        num_rows: 11090
    })
    validation: Dataset({
        features: ['conversations', 'text'],
        num_rows: 1116
    })
})

In [6]:
ds['train'][0]

{'conversations': [{'content': 'и,чё,блядь где этот херой был до этого со своими доказательствами?',
   'role': 'user'},
  {'content': 'Ну и где этот герой был,со своими доказательствами?',
   'role': 'assistant'}],
 'text': '<bos><start_of_turn>user\nи,чё,блядь где этот херой был до этого со своими доказательствами?<end_of_turn>\n<start_of_turn>model\nНу и где этот герой был,со своими доказательствами?<end_of_turn>\n'}

In [None]:
def apply_chat_template(examples):
    texts = tokenizer.apply_chat_template(examples['conversations'])
    return { 'text': texts }

dataset = ds.map(apply_chat_template, batched = True)

Map:   0%|          | 0/11090 [00:00<?, ? examples/s]

Map:   0%|          | 0/1116 [00:00<?, ? examples/s]

In [None]:
early_stop_cb = EarlyStoppingCallback(
    early_stopping_patience=3,          
)
    
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = ds['train'],
    eval_dataset = ds['validation'], 
    args = SFTConfig(
        output_dir = 'results/gemma_detoxificator',
        dataset_text_field = 'text',
        per_device_train_batch_size = 16,
        num_train_epochs = 5,
        warmup_steps = 5,
        eval_strategy='steps',
        learning_rate = 5e-5, 
        logging_steps = 100,
        eval_steps=100,
        optim = 'adamw_8bit',
        weight_decay = 0.01,
        lr_scheduler_type = 'linear',
        seed = 3407,
        report_to = 'none',
    ),
    callbacks = [early_stop_cb],
)

trainer.args.save_strategy = 'steps'
trainer.args.save_steps = trainer.args.eval_steps  
trainer.args.load_best_model_at_end = True                       
trainer.args.metric_for_best_model = 'eval_loss'             
trainer.args.greater_is_better = False             

Unsloth: Tokenizing ["text"] (num_proc=12):   0%|          | 0/11090 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=12):   0%|          | 0/1116 [00:00<?, ? examples/s]

In [9]:
trainer = train_on_responses_only(
    trainer,
    instruction_part = '<start_of_turn>user\n',
    response_part = '<start_of_turn>model\n',
)

Map (num_proc=12):   0%|          | 0/11090 [00:00<?, ? examples/s]

Map (num_proc=12):   0%|          | 0/1116 [00:00<?, ? examples/s]

In [10]:
tokenizer.decode(trainer.train_dataset[0]['input_ids'])

'<bos><bos><start_of_turn>user\nи,чё,блядь где этот херой был до этого со своими доказательствами?<end_of_turn>\n<start_of_turn>model\nНу и где этот герой был,со своими доказательствами?<end_of_turn>\n'

In [11]:
tokenizer.decode([tokenizer.pad_token_id if x == -100 else x for x in trainer.train_dataset[0]['labels']]).replace(tokenizer.pad_token, ' ')

'                             Ну и где этот герой был,со своими доказательствами?<end_of_turn>\n'

In [12]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 11,090 | Num Epochs = 5 | Total steps = 1,735
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 2 x 1) = 32
 "-____-"     Trainable parameters = 29,802,496/4,329,881,968 (0.69% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
100,1.1634,0.919399
200,0.8126,0.882307
300,0.7786,0.85726
400,0.7294,0.85001
500,0.6533,0.848046
600,0.647,0.843478
700,0.6428,0.849156
800,0.5254,0.881972
900,0.533,0.883745


Unsloth: Not an error, but Gemma3ForConditionalGeneration does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


In [20]:
tokenizer = get_chat_template(
    tokenizer,
    chat_template = 'gemma-3',
)
messages = [{
    'role': 'user',
    'content': [{
        'type' : 'text',
        'text' : 'Ну ты и тварь всё-таки!',
    }]
}]
text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
)
outputs = model.generate(
    **tokenizer([text], return_tensors = 'pt').to('cuda'),
    max_new_tokens = 64, 
    temperature = 1.0, top_p = 0.95, top_k = 64,
)
tokenizer.batch_decode(outputs)

['<bos><start_of_turn>user\nНу ты и тварь всё-таки!<end_of_turn>\n<start_of_turn>model\nТы все-таки не очень добрый человек<end_of_turn>']

In [None]:
model.save_pretrained('results/gemma_detoxificator/best')  
tokenizer.save_pretrained('results/gemma_detoxificator/best')

['results/gemma_detoxificator/best/processor_config.json']

In [2]:
model, tokenizer = FastModel.from_pretrained(
    model_name = 'results/gemma_detoxificator/best',
    max_seq_length = 256,
    load_in_4bit = False,
    load_in_8bit = True,
)
tokenizer = get_chat_template(tokenizer, chat_template='gemma-3')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device).eval()

df = pd.read_csv('data/dev.tsv', sep='\t')

grouped = (
    df
    .groupby('ru_toxic_comment')['ru_neutral_comment']
    .apply(list)
    .reset_index()
)
toxic_texts = grouped['ru_toxic_comment'].tolist()
multi_references = grouped['ru_neutral_comment'].tolist()

batch_size = 32
preds = []

for i in tqdm(range(0, len(toxic_texts), batch_size), desc='Generating'):
    batch_toxic = toxic_texts[i : i + batch_size]
    raw_prompts = []
    for txt in batch_toxic:
        messages = [{'role': 'user', 'content': txt}]
        raw_prompts.append(
            tokenizer.apply_chat_template(
                messages, tokenize=False, add_generation_prompt=True
            )
        )

    inputs = tokenizer(
        raw_prompts,
        return_tensors='pt',
        padding=True,
        truncation=True,
        padding_side='left'
    ).to(device)

    prompt_len = inputs.input_ids.shape[1]
    out_ids = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=64,
        temperature=1.0,
        top_p=0.95,
        top_k=64,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

    gen_ids = out_ids[:, prompt_len:]
    gen_texts = tokenizer.batch_decode(gen_ids, skip_special_tokens=True)
    preds.extend([t.strip() for t in gen_texts])

bertscore = evaluate.load('bertscore')
results = bertscore.compute(
    predictions=preds,
    references=multi_references,
    lang='ru',
    batch_size=batch_size,
    device=device,
)

print(f"F1:        {np.mean(results['f1']):.4f}")
print(f"Precision: {np.mean(results['precision']):.4f}")
print(f"Recall:    {np.mean(results['recall']):.4f}")

==((====))==  Unsloth 2025.4.7: Fast Gemma3 patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 4070 Ti SUPER. Num GPUs = 1. Max memory: 15.593 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 8.9. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Generating: 100%|██████████| 25/25 [00:56<00:00,  2.26s/it]


F1:        0.8631
Precision: 0.8599
Recall:    0.8685


In [None]:
mapping = {
    text: (pred, refs)
    for text, pred, refs in zip(toxic_texts, preds, multi_references)
}

seen = set()
for i, row in df.iterrows():
    orig = row['ru_toxic_comment']
    if orig in seen:
        continue
    seen.add(orig)
    
    pred, refs = mapping[orig]
    
    print(f"\n[{i}] Оригинал: {orig}")
    for j, ref in enumerate(refs, start=1):
        print(f"    Референс {j}: {ref}")
    print(f"    Модель: {pred}")


[0] Оригинал: пиздеж! температуры горения хватит чтобы её расплавить к херам..
    Референс 1: Враньё! Температуры горения хватит чтобы ее расплавить
    Референс 2: неправда,температуры горения хватит чтобы расплавить её
    Референс 3: Враньё! Температуры горения хватит на чтобы её расплавить полностью.
    Модель: Такого не может быть! Температуры горения хватит чтобы её расплавить

[3] Оригинал: а ты чмо там был.ты вообще служил.гандон
    Референс 1: А ты там был? Ты вообще служил?
    Модель: А ты там был. Ты вообще служил

[4] Оригинал: пиздабол ---- а сам где кормишься ?
    Референс 1: а сам где кормишься ?
    Модель: ----- а сам где кормишься ?

[5] Оригинал: этому сайту я давно не доверяю, пишут разную х...
    Референс 1: Этому сайту давно не доверяю, пишут всякую ерунду
    Модель: этому сайту я давно не доверяю, пишут разную ерунду

[6] Оригинал: у меня сегодня подобный звонок был. достали говнюки.
    Референс 1: у меня сегодня подобный звонок был. Достали.
    Референ