In [1]:
from huggingface_hub import login
from secret import HF_TOKEN

login(
  token=HF_TOKEN, 
  add_to_git_credential=True
)

In [2]:
import torch
print("CUDA available:", torch.cuda.is_available())

CUDA available: True


In [3]:
from datasets import load_dataset

ds = load_dataset("Glebosol/spb_POS_data")

print(len(ds['train']))

21996


In [4]:
ds['train']

Dataset({
    features: ['text', 'label'],
    num_rows: 21996
})

In [5]:
from random import randrange

random_id = randrange(len(ds['train']))
ds['train'][random_id]

{'text': ' невский дк стремительно уходящий в забвение', 'label': 7}

In [6]:
from transformers import AutoTokenizer


model_id = "cointegrated/rubert-tiny2"
tokenizer = AutoTokenizer.from_pretrained(model_id)

def tokenize(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True, return_tensors="pt")

raw_dataset =  ds.rename_column("label", "labels") 
raw_dataset = raw_dataset.shuffle(seed=23)
tokenized_dataset = raw_dataset.map(tokenize, batched=True,remove_columns=["text"])
tokenized_dataset = tokenized_dataset["train"].train_test_split(test_size=0.2)

print(tokenized_dataset["train"].features.keys())

Map:   0%|          | 0/21996 [00:00<?, ? examples/s]

dict_keys(['labels', 'input_ids', 'token_type_ids', 'attention_mask'])


In [7]:
from transformers import AutoModelForSequenceClassification


model_id = "cointegrated/rubert-tiny2"


labels = tokenized_dataset["train"].features["labels"].names
num_labels = len(labels)
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

model = AutoModelForSequenceClassification.from_pretrained(
    model_id, num_labels=num_labels, label2id=label2id, id2label=id2label
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cointegrated/rubert-tiny2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
import evaluate
import numpy as np


metric = evaluate.load("f1")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels, average="weighted")

In [9]:
from huggingface_hub import HfFolder
from transformers import Trainer, TrainingArguments
import torch._dynamo
torch._dynamo.config.suppress_errors = True

# Id for remote repository
repository_id = "Glebosol/spb_city_prob_class"

# Define training args
training_args = TrainingArguments(
    output_dir=repository_id,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=4,
    learning_rate=5e-5,
	num_train_epochs=3,
	# PyTorch 2.0 specifics
    bf16=True, # bfloat16 training
	torch_compile=True, # optimizations
    optim="adamw_torch_fused", # improved optimizer
    # logging & evaluation strategies
    logging_dir=f"{repository_id}/logs",
    logging_strategy="steps",
    logging_steps=200,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    # push to hub parameters
    report_to="tensorboard",
    push_to_hub=True,
    hub_strategy="every_save",
    hub_model_id=repository_id,
    hub_token=HfFolder.get_token(),

)

# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics,
)



In [10]:
trainer.train()

W0123 20:54:39.097000 11248 site-packages\torch\_dynamo\convert_frame.py:1125] WON'T CONVERT forward c:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\bert\modeling_bert.py line 1636 
W0123 20:54:39.097000 11248 site-packages\torch\_dynamo\convert_frame.py:1125] due to: 
W0123 20:54:39.097000 11248 site-packages\torch\_dynamo\convert_frame.py:1125] Traceback (most recent call last):
W0123 20:54:39.097000 11248 site-packages\torch\_dynamo\convert_frame.py:1125]   File "c:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\_dynamo\output_graph.py", line 1446, in _call_user_compiler
W0123 20:54:39.097000 11248 site-packages\torch\_dynamo\convert_frame.py:1125]     compiled_fn = compiler_fn(gm, self.example_inputs())
W0123 20:54:39.097000 11248 site-packages\torch\_dynamo\convert_frame.py:1125]                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
W0123 20:54:39.097000 11248 site-packages\torch\_dynamo\convert_frame.py:11

Epoch,Training Loss,Validation Loss,F1
1,0.8437,0.819343,0.740288
2,0.6405,0.7884,0.753462
3,0.5648,0.80368,0.758882


W0124 02:16:37.931000 11248 site-packages\torch\_dynamo\convert_frame.py:1125] WON'T CONVERT forward c:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\accelerate\utils\operations.py line 818 
W0124 02:16:37.931000 11248 site-packages\torch\_dynamo\convert_frame.py:1125] due to: 
W0124 02:16:37.931000 11248 site-packages\torch\_dynamo\convert_frame.py:1125] Traceback (most recent call last):
W0124 02:16:37.931000 11248 site-packages\torch\_dynamo\convert_frame.py:1125]   File "c:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\_dynamo\output_graph.py", line 1446, in _call_user_compiler
W0124 02:16:37.931000 11248 site-packages\torch\_dynamo\convert_frame.py:1125]     compiled_fn = compiler_fn(gm, self.example_inputs())
W0124 02:16:37.931000 11248 site-packages\torch\_dynamo\convert_frame.py:1125]                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
W0124 02:16:37.931000 11248 site-packages\torch\_dynamo\convert_frame.py:1125]   File "

TrainOutput(global_step=6600, training_loss=0.7731756973266601, metrics={'train_runtime': 60730.4745, 'train_samples_per_second': 0.869, 'train_steps_per_second': 0.109, 'total_flos': 1559108853891072.0, 'train_loss': 0.7731756973266601, 'epoch': 3.0})

In [11]:
tokenizer.save_pretrained(repository_id)
trainer.create_model_card()
trainer.push_to_hub()

events.out.tfevents.1737654874.DESKTOP-ER3Q3E9.11248.0:   0%|          | 0.00/15.2k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Glebosol/spb_city_prob_class/commit/3343aa59eee38d2c7298c40e4b4209ac88ccbb9b', commit_message='End of training', commit_description='', oid='3343aa59eee38d2c7298c40e4b4209ac88ccbb9b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Glebosol/spb_city_prob_class', endpoint='https://huggingface.co', repo_type='model', repo_id='Glebosol/spb_city_prob_class'), pr_revision=None, pr_num=None)