In [None]:
!git clone https://github.com/ai4se-course/ai4se-hse-course-24-25.git

Cloning into 'ai4se-hse-course-24-25'...
remote: Enumerating objects: 12, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 12 (delta 0), reused 0 (delta 0), pack-reused 5 (from 1)[K
Receiving objects: 100% (12/12), 4.42 KiB | 238.00 KiB/s, done.


In [None]:
!pip install -r "ai4se-hse-course-24-25/01-toxic-review-classification/requirements.txt"
!pip install -r "ai4se-hse-course-24-25/01-toxic-review-classification/requirements_dev.txt"

Collecting datasets==2.20.0 (from -r ai4se-hse-course-24-25/01-toxic-review-classification/requirements.txt (line 1))
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate==0.4.2 (from -r ai4se-hse-course-24-25/01-toxic-review-classification/requirements.txt (line 2))
  Downloading evaluate-0.4.2-py3-none-any.whl.metadata (9.3 kB)
Collecting scikit-learn==1.5.1 (from -r ai4se-hse-course-24-25/01-toxic-review-classification/requirements.txt (line 4))
  Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting tqdm==4.66.4 (from -r ai4se-hse-course-24-25/01-toxic-review-classification/requirements.txt (line 5))
  Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets==2.20.0->-r ai4se-hse-course-24-25/01-toxic-review-classification/re

In [None]:
from statistics import mean, stdev

import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

def classifier(dataset, model):
    X = dataset['message']
    y = dataset['is_toxic']

    if model == 'classic_ml':
        # Преобразование текста в числовое представление

        vectorizers = {
            'tfidf': TfidfVectorizer(),
            'count': CountVectorizer()
        }

        models = {
            'rf': RandomForestClassifier(),
            'lr': LogisticRegression()
        }

        for vect_name, vectorizer in vectorizers.items():
          X_vec = vectorizer.fit_transform(X)
          for model_name, model_instance in models.items():
              X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2)
              model_instance.fit(X_train, y_train)

              kf = KFold(n_splits=10, shuffle=True)
              scores = cross_val_score(model_instance, X_train, y_train, cv=kf, scoring='f1')
              print(f"{vect_name} - {model_name} - f1: {scores.mean():.4f}")


              y_pred = model_instance.predict(X_test)
              print(confusion_matrix(y_test, y_pred))

    elif model == 'microsoft/codebert-base':
        tokenizer = AutoTokenizer.from_pretrained(model)
        model = AutoModelForSequenceClassification.from_pretrained(model, num_labels=2)

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)

        class Dataset(torch.utils.data.Dataset):
            def __init__(self, encodings, labels):
                self.encodings = encodings
                self.labels = labels

            def __getitem__(self, idx):
                item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
                item['labels'] = torch.tensor(self.labels[idx]).to(device)
                return item

            def __len__(self):
                return len(self.labels)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        train_encodings = tokenizer(X_train, padding=True, truncation=True, return_tensors='pt')
        test_encodings = tokenizer(X_test, padding=True, truncation=True, return_tensors='pt')

        train_dataset = Dataset(train_encodings, y_train)
        test_dataset = Dataset(test_encodings, y_test)

        training_args = TrainingArguments(
            output_dir='./results',
            num_train_epochs=3,
            dataloader_pin_memory=False,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir='./logs',
            evaluation_strategy="epoch"
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
            compute_metrics=compute_metrics
        )

        trainer.train()

        codebert_eval_results = trainer.evaluate()
        print(codebert_eval_results)
    else:
        raise ValueError("Invalid model type")



In [None]:
from pathlib import Path

import re
import datasets
import pandas as pd

contractions_dict = {"ain't": "is not", "aren't": "are not",
                       "can't": "cannot", "'cause": "because",
                       "could've": "could have", "couldn't": "could not",
                       "didn't": "did not", "doesn't": "does not",
                       "don't": "do not", "hadn't": "had not", "hasn't": "has not",
                       "haven't": "have not", "he'd": "he would", "he'll": "he will",
                       "he's": "he is", "how'd": "how did", "how'd'y": "how do you",
                       "how'll": "how will", "how's": "how is", "I'd": "I would",
                       "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have",
                       "I'm": "I am", "I've": "I have", "i'd": "i would", "i'd've": "i would have",
                       "i'll": "i will", "i'll've": "i will have", "i'm": "i am",
                       "i've": "i have", "isn't": "is not", "it'd": "it would",
                       "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have",
                       "it's": "it is", "let's": "let us", "ma'am": "madam", "mayn't": "may not",
                       "might've": "might have", "mightn't": "might not",
                       "mightn't've": "might not have", "must've": "must have",
                       "mustn't": "must not", "mustn't've": "must not have",
                       "needn't": "need not", "needn't've": "need not have",
                       "o'clock": "of the clock", "oughtn't": "ought not",
                       "oughtn't've": "ought not have", "shan't": "shall not",
                       "sha'n't": "shall not", "shan't've": "shall not have",
                       "she'd": "she would", "she'd've": "she would have",
                       "she'll": "she will", "she'll've": "she will have",
                       "she's": "she is", "should've": "should have", "shouldn't": "should not",
                       "shouldn't've": "should not have", "so've": "so have", "so's": "so as",
                       "this's": "this is", "that'd": "that would", "that'd've": "that would have",
                       "that's": "that is", "there'd": "there would",
                       "there'd've": "there would have", "there's": "there is",
                       "here's": "here is", "they'd": "they would", "they'd've": "they would have",
                       "they'll": "they will", "they'll've": "they will have", "they're": "they are",
                       "they've": "they have", "to've": "to have", "wasn't": "was not", "we'd": "we would",
                       "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have",
                       "we're": "we are", "we've": "we have", "weren't": "were not",
                       "what'll": "what will",
                       "what'll've": "what will have", "what're": "what are", "what's": "what is",
                       "what've": "what have", "when's": "when is", "when've": "when have",
                       "where'd": "where did", "where's": "where is", "where've": "where have",
                       "who'll": "who will", "who'll've": "who will have", "who's": "who is",
                       "who've": "who have", "why's": "why is", "why've": "why have",
                       "will've": "will have", "won't": "will not", "won't've": "will not have",
                       "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have",
                       "y'all": "you all", "y'all'd": "you all would", "y'all'd've": "you all would have",
                       "y'all're": "you all are", "y'all've": "you all have", "you'd": "you would",
                       "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have",
                       "you're": "you are", "you've": "you have", "aint": "is not", "arent": "are not",
                       "cant": "cannot", "cause": "because",
                       "couldve": "could have", "couldnt": "could not",
                       "didnt": "did not", "doesnt": "does not",
                       "dont": "do not", "hadnt": "had not", "hasnt": "has not",
                       "havent": "have not", "howdy": "how do you",
                       "its": "it is", "lets": "let us", "maam": "madam", "maynt": "may not",
                       "mightve": "might have", "mightnt": "might not",
                       "mightntve": "might not have", "mustve": "must have",
                       "mustnt": "must not", "mustntve": "must not have",
                       "neednt": "need not", "needntve": "need not have",
                       "oclock": "of the clock", "oughtnt": "ought not",
                       "shouldve": "should have", "shouldnt": "should not",
                       "werent": "were not", "yall": "you all", "youre": "you are",
                       "youve": "you have"}

profanity_dict = {
    r'(f)(u|[^a-z0-9 ])(c|[^a-z0-9 ])(k|[^a-z0-9 ])([^ ])*': 'fuck',
    r'(f)([^a-z]*)(u)([^a-z]*)(c)([^a-z]*)(k)': 'fuck',
    r' f[!@#\$%\^\&\*]*u[!@#\$%\^&\*]*k': 'fuck',
    r'f u u c': 'fuck',
    r'(f)(c|[^a-z ])(u|[^a-z ])(k)': 'fuck',
    r'f\*': 'fuck',
    r'feck ': 'fuck',
    r' fux ': 'fuck',
    r'f\*\*': 'fuck',
    r'f\-ing': 'fuck',
    r'f\.u\.': 'fuck',
    r'f###': 'fuck',
    r' fu ': 'fuck',
    r'f@ck': 'fuck',
    r'f u c k': 'fuck',
    r'f uck': 'fuck',
    r'f ck': 'fuck',
    r' (c)(r|[^a-z0-9 ])(a|[^a-z0-9 ])(p|[^a-z0-9 ])([^ ])*': 'crap',
    r' (c)([^a-z]*)(r)([^a-z]*)(a)([^a-z]*)(p)': 'crap',
    r' c[!@#\$%\^\&\*]*r[!@#\$%\^&\*]*p': 'crap',
    r'cr@p': 'crap',
    r' c r a p': 'crap',
    r'[^a-z]ass ': 'ass',
    r'[^a-z]azz ': 'ass',
    r'arrse': 'ass',
    r' arse ': 'ass',
    r'@\\$\\$': 'ass',
    r'[^a-z]anus': 'ass',
    r' a\*s\*s': 'ass',
    r'[^a-z]ass[^a-z ]': 'ass',
    r'a[@#\$%\^&\*][@#\$%\^&\*]': 'ass',
    r'[^a-z]anal ': 'ass',
    r'a s s': 'ass',
    r' a[s|z]*wipe': 'asshole',
    r'a[s|z]*[w]*h[o|0]+[l]*e': 'asshole',
    r'@\\$\\$hole': 'asshole',
    r'bitches': 'bitch',
    r' b[w]*i[t]*ch': 'bitch',
    r' b!tch': 'bitch',
    r' bi\+ch': 'bitch',
    r' b!\+ch': 'bitch',
    r' (b)([^a-z]*)(i)([^a-z]*)(t)([^a-z]*)(c)([^a-z]*)(h)': 'bitch',
    r' biatch': 'bitch',
    r' bi\*\*h': 'bitch',
    r' bytch': 'bitch',
    r'b i t c h': 'bitch',
    r'ba[s|z]+t[e|a]+rd': 'bastard',
    r'transgender': 'transgender',
    r'gay': 'gay',
    r'homo': 'gay',
    r'[^a-z]cock': 'cock',
    r'c0ck': 'cock',
    r'[^a-z]cok ': 'cock',
    r'c0k': 'cock',
    r'[^a-z]cok[^aeiou]': 'cock',
    r' cawk': 'cock',
    r'(c)([^a-z ])(o)([^a-z ]*)(c)([^a-z ]*)(k)': 'cock',
    r'c o c k': 'cock',
    r' dick[^aeiou]': 'dick',
    r'd i c k': 'dick',
    r'sucker': 'suck',
    r'(s)([^a-z ]*)(u)([^a-z ]*)(c)([^a-z ]*)(k)': 'suck',
    r'sucks': 'suck',
    r'5uck': 'suck',
    r's u c k': 'suck',
    r'cunt': 'cunt',
    r'c u n t': 'cunt',
    r'bullsh\*t': 'bullshit',
    r'bull\\$hit': 'bullshit',
    r'bull sh.t': 'bullshit',
    r'jerk': 'jerk',
    r'i[d]+io[t]+': 'idiot',
    r'(i)([^a-z ]*)(d)([^a-z ]*)(i)([^a-z ]*)(o)([^a-z ]*)(t)': 'idiot',
    r'idiots': 'idiot',
    r'i d i o t': 'idiot',
    r'(d)([^a-z ]*)(u)([^a-z ]*)(m)([^a-z ]*)(b)': 'dumb',
    r'shitty': 'shit',
    r'(s)([^a-z ]*)(h)([^a-z ]*)(i)([^a-z ]*)(t)': 'shit',
    r'shite': 'shit',
    r'\\$hit': 'shit',
    r's h i t': 'shit',
    r'sh\*tty': 'shit',
    r'sh\*ty': 'shit',
    r'sh\*t': 'shit',
    r'shythole': 'shit hole',
    r'sh.thole': 'shit hole',
    r'returd': 'retard',
    r'retad': 'retard',
    r'retard': 'retard',
    r'wiktard': 'retard',
    r'wikitud': 'retard',
    r'raped': 'rape',
    r'dumbass': 'dumb ass',
    r'dubass': 'dumb ass',
    r'butthead': 'ass head',
    r'sexy': 'sex',
    r's3x': 'sex',
    r'sexuality': 'sex',
    r'nigger': 'nigger',
    r'ni[g]+a': 'nigger',
    r' nigr ': 'nigger',
    r'negrito': 'nigger',
    r'niguh': 'nigger',
    r'n3gr': 'nigger',
    r'n i g g e r': 'nigger',
    r' stfu': 'shut the fuck up',
    r'^stfu': 'shut the fuck up',
    r' fyfi': 'for your fucking information',
    r'^fyfi': 'for your fucking information',
    r'gtfo': 'get the fuck off',
    r'^gtfo': 'get the fuck off',
    r' omfg': 'oh my fucking god',
    r'^omfg': 'oh my fucking god',
    r' wth': 'what the hell',
    r'^wth': 'what the hell',
    r' wtf': 'what the fuck',
    r'^wtf': 'what the fuck',
    r' sob ': 'son of bitch',
    r'^sob ': 'son of bitch',
    r'pussy[^c]': 'pussy',
    r'pusy': 'pussy',
    r'pussi[^l]': 'pussy',
    r'pusses': 'pussy',
    r'(p)(u|[^a-z0-9 ])(s|[^a-z0-9 ])(s|[^a-z0-9 ])(y)': 'pussy',
    r'faggot': 'faggot',
    r' fa[g]+[s]*[^a-z ]': 'faggot',
    r'fagot': 'faggot',
    r'f a g g o t': 'faggot',
    r'faggit': 'faggot',
    r'(f)([^a-z ]*)(a)([^a-z ]*)([g]+)([^a-z ]*)(o)([^a-z ]*)(t)': 'faggot',
    r'fau[g]+ot': 'faggot',
    r'fae[g]+ot': 'faggot',
    r' motha f': 'mother fucker',
    r' mother f': 'mother fucker',
    r'motherucker': 'mother fucker',
    r' mofo': 'mother fucker',
    r' mf ': 'mother fucker',
    r'wh\*\*\*': 'whore',
    r'w h o r e': 'whore',
    r'ha\*\*\*ha': 'haha',
}


def clean_text(text):
    # Удаление URL-ссылок
    text = re.sub(r'http\S+|www\S+', '', text)

    # Исправление сокращений
    for word, correction in contractions_dict.items():
        text = re.sub(r'\b' + word + r'\b', correction, text)

    # Удаление повторяющихся символов
    text = re.sub(r'(.)\1+', r'\1', text)

    # Удаление специальных символов
    text = re.sub(r'[&^#*]', '', text)

    # Исправление ругательных слов
    #for pattern, replacement in profanity_dict.items():
    #    text = re.sub(r'\b' + pattern + r'\b', replacement, text)

    return text

def prepare(raw_data: Path) -> datasets.Dataset:
    # Загрузка данных
    df = pd.read_excel(raw_data)

    # Удаление пропущенных значений и дубликатов
    df.dropna(inplace=True)
    df.drop_duplicates(inplace=True)

    # Очистка текста
    df['message'] = df['message'].apply(clean_text)

    # Преобразование DataFrame в Dataset
    dataset = datasets.Dataset.from_pandas(df)

    return dataset

def load_dataset(path: Path) -> datasets.Dataset:
    return datasets.load_from_disk(str(path))

def save_dataset(dataset: datasets.Dataset, path: Path) -> None:
    dataset.save_to_disk(str(path))



In [None]:
!wget https://github.com/WSU-SEAL/ToxiCR/raw/refs/heads/master/models/code-review-dataset-full.xlsx -O /content/code-review-dataset-full.xlsx

--2024-11-11 11:57:03--  https://github.com/WSU-SEAL/ToxiCR/raw/refs/heads/master/models/code-review-dataset-full.xlsx
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/WSU-SEAL/ToxiCR/refs/heads/master/models/code-review-dataset-full.xlsx [following]
--2024-11-11 11:57:03--  https://raw.githubusercontent.com/WSU-SEAL/ToxiCR/refs/heads/master/models/code-review-dataset-full.xlsx
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1031798 (1008K) [application/octet-stream]
Saving to: ‘/content/code-review-dataset-full.xlsx’


2024-11-11 11:57:04 (25.0 MB/s) - ‘/content/code-review-dataset-full.xlsx’ sa

In [None]:
default_data_path = Path('./prepared-dataset')
choices=['classic_ml', 'microsoft/codebert-base'],

def prepare_data():
    dataset = prepare(Path('/content/code-review-dataset-full.xlsx'))
    save_dataset(dataset, default_data_path)


def classify(model):
    dataset = load_dataset(default_data_path)
    classifier(dataset, model)

In [None]:
prepare_data()

Saving the dataset (0/1 shards):   0%|          | 0/12904 [00:00<?, ? examples/s]

In [None]:
classify('classic_ml')

tfidf - rf - f1: 0.6522
[[2042   30]
 [ 245  264]]
tfidf - lr - f1: 0.5727
[[2055   30]
 [ 290  206]]
count - rf - f1: 0.6495
[[2025   26]
 [ 254  276]]
count - lr - f1: 0.7278
[[2024   72]
 [ 170  315]]


In [None]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold, cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.pipeline import Pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch


def experiments_lr(dataset):
    X = dataset['message']
    y = dataset['is_toxic']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    kf = KFold(n_splits=10, shuffle=True)

    pipeline = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', LogisticRegression())
    ])

    param_grid = {
        'tfidf__max_features': [1000, 5000, 10000],
        'tfidf__ngram_range': [(1, 1), (1, 2)],
        'clf__C': [0.1, 1, 10],
        'clf__solver': ['liblinear', 'lbfgs']
    }

    grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='f1')
    grid_search.fit(X_train, y_train)

    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best score: {grid_search.best_score_}")

def experiments_rf(dataset):
    X = dataset['message']
    y = dataset['is_toxic']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    kf = KFold(n_splits=10, shuffle=True)

    pipeline = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', RandomForestClassifier())
    ])

    param_grid = {
        'tfidf__max_features': [1000, 5000, 10000],
        'tfidf__ngram_range': [(1, 1), (1, 2)],
        'clf__n_estimators': [100, 200, 300],
        'clf__max_depth': [None, 10, 20],
        'clf__min_samples_split': [2, 5, 10],
        'clf__min_samples_leaf': [1, 2, 4]
    }
    grid_search = GridSearchCV(pipeline, param_grid, cv=kf, scoring='f1')
    grid_search.fit(X_train, y_train)

    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best score: {grid_search.best_score_}")

experiments_lr(load_dataset(default_data_path))

  _data = np.array(data, dtype=dtype, copy=copy,


Best parameters: {'clf__C': 10, 'clf__solver': 'lbfgs', 'tfidf__max_features': 5000, 'tfidf__ngram_range': (1, 1)}
Best score: 0.7231015211867423


In [None]:
import os
os.environ['WANDB_DISABLED'] = 'true'

classify('microsoft/codebert-base')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3596,0.277647,0.904301,0.748216,0.782516,0.716797
2,0.2412,0.260293,0.909337,0.790323,0.730132,0.861328
3,0.1794,0.270786,0.922898,0.797558,0.832272,0.765625


  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).to(device) for key, val in self.encodings.items()}


{'eval_loss': 0.27078625559806824, 'eval_accuracy': 0.9228981015110422, 'eval_f1': 0.797558494404883, 'eval_precision': 0.832271762208068, 'eval_recall': 0.765625, 'eval_runtime': 78.3044, 'eval_samples_per_second': 32.961, 'eval_steps_per_second': 2.069, 'epoch': 3.0}
