In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("final_combined_data.csv")

In [3]:
 from sklearn.utils import shuffle
data = shuffle(data, random_state=0)

In [4]:
data_dict = data.to_dict("list")

In [5]:
# print(data_dict["text"][:5])

In [6]:
data_dict["human_metric"] = []
for row in zip(data_dict["Актуальность"], data_dict["Последовательность"], data_dict["Беглость"], data_dict["Согласованность"]):
    metric_value = sum(row) / 20 # [0; 1]
    data_dict["human_metric"].append(metric_value)

In [7]:
mean_human = np.mean(data_dict["human_metric"])
mean_bleu = np.mean(data_dict["bleu"])
mean_rouge = np.mean(data_dict["rouge1"])
mean_bertscore = np.mean(data_dict["bertscore_f1"])
mean_meteor = np.mean(data_dict["meteor"])

# orig_scores["bleu"] = data_dict["bleu"].copy()
# orig_scores["rouge"] = data_dict["rouge1"].copy()
# orig_scores["meteor"] = data_dict["meteor"].copy()
# orig_scores["bertscore"] = data_dict["bertscore_f1"].copy()

deviations = {"bleu": [], "rouge": [], "meteor": [], "bertscore": []}

for i in range(len(data)):
    data_dict["human_metric"][i] -= mean_human
    data_dict["bleu"][i] -= mean_bleu
    data_dict["rouge1"][i] -= mean_rouge
    data_dict["meteor"][i] -= mean_meteor
    data_dict["bertscore_f1"][i] -= mean_bertscore

    deviations["bleu"].append(abs(data_dict["bleu"][i] - data_dict["human_metric"][i]))
    deviations["rouge"].append(abs(data_dict["rouge1"][i] - data_dict["human_metric"][i]))
    deviations["meteor"].append(abs(data_dict["meteor"][i] - data_dict["human_metric"][i]))
    deviations["bertscore"].append(abs(data_dict["bertscore_f1"][i] - data_dict["human_metric"][i]))

print("Median devs:")
print("\tBLEU: ", np.median(deviations["bleu"]), np.max(deviations["bleu"]))
print("\tROUGE: ", np.median(deviations["rouge"]), np.max(deviations["rouge"]))
print("\tMETEOR: ", np.median(deviations["meteor"]), np.max(deviations["meteor"]))
print("\tBERTSCORE: ", np.median(deviations["bertscore"]), np.max(deviations["bertscore"]))

labels = {}
labels["bleu"] = [1.0 if deviations["bleu"][i] < np.quantile(deviations["bleu"], 0.5) else 0.0 for i in range(len(data_dict["text"]))]
labels["rouge"] = [1.0 if deviations["rouge"][i] < np.quantile(deviations["rouge"], 0.5) else 0.0 for i in range(len(data_dict["text"]))]
labels["meteor"] = [1.0 if deviations["meteor"][i] < np.quantile(deviations["meteor"], 0.5) else 0.0 for i in range(len(data_dict["text"]))]
labels["bertscore"] = [1.0 if deviations["bertscore"][i] < np.quantile(deviations["bertscore"], 0.5) else 0.0  for i in range(len(data_dict["text"]))]

Median devs:
	BLEU:  0.09726538223806144 0.66593545429133
	ROUGE:  0.10804984680762901 0.6722531834954013
	METEOR:  0.11309853341103354 0.7875010757096961
	BERTSCORE:  0.07588821450869243 0.5758402320440265


## Data

In [8]:
class MetricsCorrelationDataset(Dataset):

    def __init__(self, texts, summaries, labels, tokenizer, max_length):
        self.texts = texts
        self.summaries = summaries
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        summary = self.summaries[idx]
        label = self.labels[idx]
        # text_encoding = self.tokenizer(text, return_tensors='pt', max_length=self.max_length, padding='max_length', truncation=True)
        # summary_encoding = self.tokenizer(summary, return_tensors='pt', max_length=self.max_length, padding='max_length', truncation=True)
        encoding = self.tokenizer(text, summary, truncation="only_first", max_length=self.max_length)
        ans = {
            'input_ids': encoding['input_ids'],
            'attention_mask': encoding['attention_mask'],
            'label': torch.tensor(label)
        }
        
        return ans

In [9]:
from transformers import DataCollatorWithPadding
from transformers import AutoTokenizer

def get_dataset(tokenizer, metric_name):
    texts_len = len(data_dict["text"])
    train = int(texts_len * 0.9)
    test_val = int(texts_len * 0.1)
    # max_length = 512
    max_length = 482
    return {
        "train": MetricsCorrelationDataset(texts=data_dict["summary"][0:train], summaries=data_dict["machine_summary"][0:train], labels=labels[metric_name][0:train], tokenizer=tokenizer, max_length=max_length),
        "test": MetricsCorrelationDataset(texts=data_dict["summary"][train:texts_len], summaries=data_dict["machine_summary"][train:texts_len], labels=labels[metric_name][train:texts_len], tokenizer=tokenizer, max_length=max_length),
       }

## Training

In [10]:
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score
)
from sklearn.preprocessing import label_binarize
import numpy as np

def compute_metrics(eval_pred):
    predictions = eval_pred.predictions
    labels = eval_pred.label_ids
    probabilities = np.exp(predictions) / np.sum(np.exp(predictions), axis=-1, keepdims=True)
    predictions = torch.tensor([float(round(x)) for x in predictions.flatten()])
    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="macro")
    p = precision_score(labels, predictions, average="macro")
    r = recall_score(labels, predictions, average="macro")
    return {"precision": p, "recall": r, "f1": f1, "accuracy": accuracy}

In [11]:
from transformers import TrainingArguments
from transformers import Trainer
from peft import (
    get_peft_config,
    get_peft_model,
    get_peft_model_state_dict,
    set_peft_model_state_dict,
    PeftType,
    PromptEncoderConfig,
)

peft_config = PromptEncoderConfig(task_type="SEQ_CLS", num_virtual_tokens=30, encoder_hidden_size=256)


training_args = TrainingArguments(
    output_dir="automatic_predictor",
    learning_rate=3e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=25,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,
)

### ROUGE1

In [12]:
import os
os.environ["CUBLAS_LOGINFO_DBG"] = "1"
os.environ["CUBLAS_LOGDEST_DBG"] = "stdout"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [13]:
from transformers import AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split



model = AutoModelForSequenceClassification.from_pretrained("ai-forever/ruRoberta-large", num_labels=1)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

tokenizer = AutoTokenizer.from_pretrained("ai-forever/ruRoberta-large")
tokenizer.model_max_length=484
# tokenizer.model_max_length=514

rouge_dataset = get_dataset(tokenizer, "rouge")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=rouge_dataset["train"],
    eval_dataset=rouge_dataset["test"],
    tokenizer=rouge_dataset["train"].tokenizer,
    # data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ai-forever/ruRoberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,672,705 || all params: 357,033,474 || trainable%: 0.4685




Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3007,0.270392,0.63587,0.587121,0.553812,0.6
2,0.3094,0.278423,0.567661,0.518561,0.402273,0.5
3,0.3151,0.265478,0.535844,0.532955,0.526197,0.53913
4,0.3031,0.317094,0.425595,0.492424,0.354451,0.513043
5,0.2965,0.280336,0.524155,0.508712,0.423559,0.526087




KeyboardInterrupt: 

In [None]:
model

### BLEU

In [14]:
from transformers import AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split



model = AutoModelForSequenceClassification.from_pretrained("ai-forever/ruRoberta-large", num_labels=1)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

tokenizer = AutoTokenizer.from_pretrained("ai-forever/ruRoberta-large")
tokenizer.model_max_length=484
# tokenizer.model_max_length=514

rouge_dataset = get_dataset(tokenizer, "bleu")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=rouge_dataset["train"],
    eval_dataset=rouge_dataset["test"],
    tokenizer=rouge_dataset["train"].tokenizer,
    # data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ai-forever/ruRoberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,672,705 || all params: 357,033,474 || trainable%: 0.4685


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3255,0.335566,0.580518,0.510853,0.389034,0.53913
2,0.3062,0.255741,0.585301,0.550167,0.508621,0.569565
3,0.3244,0.29283,0.455979,0.455829,0.455688,0.456522
4,0.324,0.303826,0.507548,0.503339,0.433459,0.526087
5,0.3113,0.374572,0.439027,0.462128,0.396771,0.443478




KeyboardInterrupt: 

### BERTScore

In [16]:
from transformers import AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split



model = AutoModelForSequenceClassification.from_pretrained("ai-forever/ruRoberta-large", num_labels=1)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

tokenizer = AutoTokenizer.from_pretrained("ai-forever/ruRoberta-large")
tokenizer.model_max_length=484
# tokenizer.model_max_length=514

rouge_dataset = get_dataset(tokenizer, "bertscore")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=rouge_dataset["train"],
    eval_dataset=rouge_dataset["test"],
    tokenizer=rouge_dataset["train"].tokenizer,
    # data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ai-forever/ruRoberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,672,705 || all params: 357,033,474 || trainable%: 0.4685


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3235,0.304601,0.772926,0.504762,0.362541,0.547826
2,0.2939,0.261076,0.549091,0.536,0.488889,0.513043
3,0.288,0.255912,0.556154,0.555619,0.555619,0.56087
4,0.2957,0.261551,0.583413,0.559619,0.537262,0.582609
5,0.2798,0.263344,0.491529,0.493143,0.456647,0.473913
6,0.2773,0.266392,0.567935,0.54381,0.511991,0.569565
7,0.2747,0.275571,0.485691,0.490476,0.430223,0.465217
8,0.2831,0.255224,0.540984,0.541143,0.540971,0.543478
9,0.2837,0.266275,0.507505,0.507048,0.49181,0.495652




KeyboardInterrupt: 

### METEOR

In [17]:
from transformers import AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split



model = AutoModelForSequenceClassification.from_pretrained("ai-forever/ruRoberta-large", num_labels=1)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

tokenizer = AutoTokenizer.from_pretrained("ai-forever/ruRoberta-large")
tokenizer.model_max_length=484
# tokenizer.model_max_length=514

rouge_dataset = get_dataset(tokenizer, "meteor")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=rouge_dataset["train"],
    eval_dataset=rouge_dataset["test"],
    tokenizer=rouge_dataset["train"].tokenizer,
    # data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ai-forever/ruRoberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,672,705 || all params: 357,033,474 || trainable%: 0.4685


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3264,0.245078,0.586914,0.550942,0.502165,0.556522
2,0.3098,0.25724,0.495413,0.499092,0.369774,0.491304
3,0.3113,0.248794,0.521739,0.518266,0.49996,0.521739
4,0.2963,0.304246,0.254348,0.5,0.337176,0.508696
5,0.3063,0.247854,0.540623,0.537289,0.526175,0.534783
6,0.296,0.244728,0.610566,0.592126,0.576427,0.595652
7,0.283,0.248803,0.542635,0.537743,0.522423,0.534783
8,0.2842,0.268825,0.609091,0.518153,0.39519,0.526087
9,0.2879,0.259513,0.521739,0.513917,0.463139,0.508696
10,0.2893,0.264208,0.515382,0.50658,0.420909,0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


TypeError: 'method' object is not subscriptable

In [12]:
class MetricsCorrelationDataset(Dataset):

    def __init__(self, texts, summaries, machine_summaries, labels, tokenizer, max_length):
        self.texts = texts
        self.summaries = summaries
        self.machine_summaries = machine_summaries
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        summary = self.summaries[idx]
        machine_summary = self.machine_summaries[idx]
        label = self.labels[idx]
        # text_encoding = self.tokenizer(text, return_tensors='pt', max_length=self.max_length, padding='max_length', truncation=True)
        # summary_encoding = self.tokenizer(summary, return_tensors='pt', max_length=self.max_length, padding='max_length', truncation=True)
        encoding_text = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length)
        encoding_summary = self.tokenizer(summary, truncation=True, padding='max_length', max_length=self.max_length)
        encoding_machine_summary = self.tokenizer(machine_summary, truncation=True, padding='max_length', max_length=self.max_length)
        ans = {
            'input_ids_text': encoding_text['input_ids'],
            'input_ids_summary': encoding_summary['input_ids'],
            'input_ids_machine_summary': encoding_machine_summary['input_ids'],
            'attention_mask_text': encoding_text['attention_mask'],
            'attention_mask_summary': encoding_summary['attention_mask'],
            'attention_mask_machine_summary': encoding_machine_summary['attention_mask'],
            'labels': torch.tensor(label)
        }
        
        return ans

In [13]:
from transformers import DataCollatorWithPadding
from transformers import AutoTokenizer

def get_dataset(tokenizer, metric_name):
    texts_len = len(data_dict["text"])
    train = int(texts_len * 0.9)
    test_val = int(texts_len * 0.1)
    # max_length = 512
    max_length = 512
    return {
        "train": MetricsCorrelationDataset(texts=data_dict["text"][0:train], summaries=data_dict["summary"][0:train], machine_summaries=data_dict["machine_summary"][0:train], labels=labels[metric_name][0:train], tokenizer=tokenizer, max_length=max_length),
        "test": MetricsCorrelationDataset(texts=data_dict["text"][train:texts_len], summaries=data_dict["summary"][train:texts_len], machine_summaries=data_dict["machine_summary"][train:texts_len], labels=labels[metric_name][train:texts_len], tokenizer=tokenizer, max_length=max_length),
       }

In [18]:
import torch
from torch import nn
from transformers import AutoConfig, RobertaModel, RobertaForSequenceClassification, PreTrainedModel
from transformers.modeling_outputs import SequenceClassifierOutput
from typing import Optional, Union, Tuple

class ClassificationHead(nn.Module):
    """Head for sentence-level classification tasks."""

    def __init__(self, config):#, num_extra_dims):
        super().__init__()
        # total_dims = config.hidden_size+num_extra_dims
        total_dims = 3 * config.hidden_size
        self.dense = nn.Linear(total_dims, total_dims)
        classifier_dropout = (
            config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        self.out_proj = nn.Linear(total_dims, config.num_labels)

    def forward(self, features, **kwargs):
        x = self.dropout(features)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.out_proj(x)
        return x

class CustomSequenceClassification(RobertaForSequenceClassification):

    def __init__(self, config):#, num_extra_dims):
        print(config)
        super().__init__(config)
        self.num_labels = config.num_labels
        self.config = config

        # self.bert_model =  RobertaModel.from_pretrained("ai-forever/ruRoberta-large")
        # self.bert_model = RobertaMode(config)
        self.classifier = ClassificationHead(config)#, num_extra_dims)

        self.post_init()

    
    def forward(
        self,
        input_ids_text: Optional[torch.LongTensor] = None,
        input_ids_summary: Optional[torch.LongTensor] = None,
        input_ids_machine_summary: Optional[torch.LongTensor] = None,
        attention_mask_text = None,
        attention_mask_summary = None,
        attention_mask_machine_summary = None,
        attention_mask: Optional[torch.FloatTensor] = None,
        extra_data: Optional[torch.FloatTensor] = None,
        token_type_ids: Optional[torch.LongTensor] = None,
        position_ids: Optional[torch.LongTensor] = None,
        head_mask: Optional[torch.FloatTensor] = None,
        inputs_embeds: Optional[torch.FloatTensor] = None,
        labels: Optional[torch.LongTensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple, SequenceClassifierOutput]:

        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs_text = super().base_model(#forward(
            input_ids=input_ids_text, attention_mask=attention_mask_text, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds#, labels=labels
        )
        outputs_summary = super().base_model(#forward(
            input_ids=input_ids_summary, attention_mask=attention_mask_summary, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds#, labels=labels
        )
        outputs_machine_summary = super().base_model(#forward(
            input_ids=input_ids_machine_summary, attention_mask=attention_mask_machine_summary, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds#, labels=labels
        )

        sequence_output_text = outputs_text[0]
        sequence_output_summary = outputs_summary[0]
        sequence_output_machine_summary = outputs_machine_summary[0]


        cls_embedding_text = sequence_output_text[:, 0, :]
        cls_embedding_summary = sequence_output_summary[:, 0, :]
        cls_embedding_machine_summary = sequence_output_machine_summary[:, 0, :]

        # output = cls_embedding
        output = torch.cat((cls_embedding_text, cls_embedding_summary, cls_embedding_machine_summary), dim=-1)

        logits = self.classifier(output)

        loss = None

        if labels is not None:
            loss_fct = nn.BCEWithLogitsLoss()
            loss = loss_fct(logits.squeeze(dim=-1), labels)
            # print(loss)
        

        hidden_states = None
        attentions = None
        # hidden_states = outputs_text.hidden_states + outputs_summary.hidden_states + outputs_machine_summary.hidden_states 
        # attentions = outputs_text.attentions + outputs_summary.attentions + outputs_machine_summary.attentions 
        
        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=hidden_states,
            attentions=attentions,
        )

In [19]:
import torch
from transformers import DataCollator

def custom_three_inputs_data_collator(batch):
    """
    Custom data collator that returns three sets of input_ids: 
    input_ids_text, input_ids_summary, and input_ids_machine_summary.
    
    Args:
        batch (List[Dict]): A list of dictionaries representing a batch of samples.
        
    Returns:
        Dict[str, torch.Tensor]: A dictionary containing three sets of input_ids and other necessary tensors.
    """
    # Extracting features from the batch
    input_ids_text = [item['input_ids_text'] for item in batch]
    attention_masks_text = [item['attention_mask_text'] for item in batch]
    
    input_ids_summary = [item['input_ids_summary'] for item in batch]
    attention_masks_summary = [item['attention_mask_summary'] for item in batch]
    
    input_ids_machine_summary = [item['input_ids_machine_summary'] for item in batch]
    attention_masks_machine_summary = [item['attention_mask_machine_summary'] for item in batch]
    
    labels = [item['labels'] for item in batch]
    
    # Determine the maximum sequence length across all three types of input_ids
    max_length = max([
        max(len(ids) for ids in input_ids_text),
        max(len(ids) for ids in input_ids_summary),
        max(len(ids) for ids in input_ids_machine_summary)
    ])
    
    # Padding the sequences to ensure they all have the same length
    def pad_sequences(sequences, max_length):
        return [seq + [0] * (max_length - len(seq)) for seq in sequences]
    
    padded_input_ids_text = pad_sequences(input_ids_text, max_length)
    padded_attention_masks_text = pad_sequences(attention_masks_text, max_length)
    
    padded_input_ids_summary = pad_sequences(input_ids_summary, max_length)
    padded_attention_masks_summary = pad_sequences(attention_masks_summary, max_length)
    
    padded_input_ids_machine_summary = pad_sequences(input_ids_machine_summary, max_length)
    padded_attention_masks_machine_summary = pad_sequences(attention_masks_machine_summary, max_length)
    
    # Converting lists to tensors
    input_ids_tensor_text = torch.tensor(padded_input_ids_text)
    attention_masks_tensor_text = torch.tensor(padded_attention_masks_text)
    
    input_ids_tensor_summary = torch.tensor(padded_input_ids_summary)
    attention_masks_tensor_summary = torch.tensor(padded_attention_masks_summary)
    
    input_ids_tensor_machine_summary = torch.tensor(padded_input_ids_machine_summary)
    attention_masks_tensor_machine_summary = torch.tensor(padded_attention_masks_machine_summary)
    
    labels_tensor = torch.tensor(labels)
    
    # Returning the processed batch
    return {
        'input_ids_text': input_ids_tensor_text,
        'attention_mask_text': attention_masks_tensor_text,
        'input_ids_summary': input_ids_tensor_summary,
        'attention_mask_summary': attention_masks_tensor_summary,
        'input_ids_machine_summary': input_ids_tensor_machine_summary,
        'attention_mask_machine_summary': attention_masks_tensor_machine_summary,
        'labels': labels_tensor
    }

In [20]:
from transformers import TrainingArguments
from transformers import Trainer
from peft import (
    get_peft_config,
    get_peft_model,
    get_peft_model_state_dict,
    set_peft_model_state_dict,
    PeftType,
    PromptEncoderConfig,
)

peft_config = PromptEncoderConfig(task_type="SEQ_CLS", num_virtual_tokens=30, encoder_hidden_size=256)


training_args = TrainingArguments(
    output_dir="automatic_predictor",
    learning_rate=3e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=25,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,
    remove_unused_columns=False
)

In [None]:
from transformers import AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split



model = CustomSequenceClassification.from_pretrained("ai-forever/ruRoberta-large", num_labels=1)
# model = get_peft_model(model, peft_config)
# model.print_trainable_parameters()

tokenizer = AutoTokenizer.from_pretrained("ai-forever/ruRoberta-large")
# tokenizer.model_max_length=482
tokenizer.model_max_length=514

rouge_dataset = get_dataset(tokenizer, "rouge")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=rouge_dataset["train"],
    eval_dataset=rouge_dataset["test"],
    tokenizer=rouge_dataset["train"].tokenizer,
    data_collator=custom_three_inputs_data_collator,
    compute_metrics=compute_metrics,
    
)

trainer.train()

Some weights of CustomSequenceClassification were not initialized from the model checkpoint at ai-forever/ruRoberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaConfig {
  "_name_or_path": "ai-forever/ruRoberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 1,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.41.2",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}



Could not estimate the number of tokens of the input, floating-point operations will not be computed


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,1.4015,2.614234,0.0,0.0,0.0,0.0
2,1.2789,0.859942,0.0,0.0,0.0,0.0
3,0.9617,0.819989,0.0,0.0,0.0,0.0
4,1.6249,0.695216,0.23913,0.5,0.323529,0.478261
5,1.1613,0.760299,0.0,0.0,0.0,0.0
6,1.2409,0.731282,0.23913,0.5,0.323529,0.478261
7,1.0359,0.813214,0.0,0.0,0.0,0.0
8,1.1291,0.898164,0.0,0.0,0.0,0.0
9,0.8402,0.759284,0.0,0.0,0.0,0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize