In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

DATA_FILE = "parsed_data.csv"
df = pd.read_csv("parsed_data.csv", usecols=["title", "pov"])
titles = df["title"]
povs = df["pov"].map({"UA":0, "RU":1, "NONE":2}).rename("label")
df = df.drop("pov", axis=1)
df = df.rename(columns={"title":"text"})
df = df.join(povs)

df = df.dropna(axis=0)

train_ds = df.sample(frac=0.8)
test_ds = df.drop(train_ds.index)

from datasets import Dataset

train_ds = Dataset.from_pandas(train_ds, split="train")
test_ds = Dataset.from_pandas(test_ds, split="test")

print(df)



In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

def tokenize_dataset(data):
    return tokenizer(data["text"], max_length=50, truncation=True, padding="max_length")

train_ds_tokenized = train_ds.map(tokenize_dataset)
test_ds_tokenized = test_ds.map(tokenize_dataset)

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=3)

In [None]:
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback

training_args = TrainingArguments(
    output_dir="./urr_transformer/",          
    logging_dir='./urr_transformer/logs',            
    logging_strategy='epoch',
    logging_steps=100,    
    num_train_epochs=2,              
    per_device_train_batch_size=4,  
    per_device_eval_batch_size=4,  
    learning_rate=5e-6,
    seed=42,
    save_strategy='epoch',
    save_steps=100,
    evaluation_strategy='epoch',
    eval_steps=100,
    load_best_model_at_end=True
)

import evaluate

def compute_metrics(eval_pred):
    metric = evaluate.load("accuracy")
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    return metric.compute(predictions=predictions, references=labels)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds_tokenized,
    eval_dataset=test_ds_tokenized,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)

trainer.train()

In [None]:
trainer.save_model("./my_model")