In [1]:
from utils.io import read_tabular
from utils.finetuning import get_device, split_data, create_sequence_classification_dataset, preprocess_sequence_classification_dataset

from datasets import DatasetDict
from transformers import AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification, Trainer, TrainingArguments

from utils.metrics import parse_sequence_classifier_prediction_output, compute_sequence_classification_metrics_binary

In [2]:
model_name = 'roberta-base'
device = get_device()
print(f'Using device: {str(device)}')

Using device: mps


In [3]:
# fp = '../data/labeled/barbera_automated_2021/barbera_automated_2021-econ_news_sentiment.tsv'
fp = '../data/labeled/bestvater_sentiment_2023/bestvater_sentiment_2023-motn_responses_sentiment.tsv'
df = read_tabular(fp, columns=['text', 'label'])

In [4]:
len(df)

5417

In [5]:
df.label.value_counts(normalize=True)

label
0    0.565442
1    0.434558
Name: proportion, dtype: float64

In [6]:
label2id = {l: i for i, l in enumerate(df.label.unique())}
id2label = {i: l for l, i in label2id.items()}

{0: 0, 1: 1}

In [7]:
data_splits = split_data(df, dev_size=0.15, test_size=0.15, seed=42, stratify_by='label', return_dict=True)

In [8]:
data_splits = DatasetDict({s: create_sequence_classification_dataset(df) for s, df in data_splits.items()})

In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
data_splits = data_splits.map(lambda x: preprocess_sequence_classification_dataset(x, tokenizer=tokenizer, label2id=label2id, truncation=True), batched=True)



Map:   0%|          | 0/3793 [00:00<?, ? examples/s]

Map:   0%|          | 0/812 [00:00<?, ? examples/s]

Map:   0%|          | 0/812 [00:00<?, ? examples/s]

In [10]:
data_splits = data_splits.remove_columns(['text', 'label'])

In [11]:
dest = './../results/example_classifier/'
training_args = TrainingArguments(
    output_dir=dest,
    # hyperparameters
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    optim='adamw_torch',
    use_mps_device=str(device)=='mps',
    fp16=str(device).startswith('cuda'),
    # evaluation on dev set
    eval_strategy='epoch',
    metric_for_best_model='f1', # use 'f1_macro' if multiclass
    # model saving
    save_strategy='epoch',
    load_best_model_at_end=True,
    save_total_limit=2,
    # logging
    logging_strategy='epoch',
    logging_dir=dest+'logs',
    # for reproducibility
    seed=42,
    data_seed=42,
    full_determinism=True
)



In [15]:
def model_init():
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(label2id))
    if model.config.problem_type is None:
        model.config.problem_type = 'single_label_classification'
    if isinstance(id2label[0], str):
        model.config.id2label = id2label
        model.config.label2id = label2id
    model.to(device);
    return model

In [13]:
def compute_metrics(p):
    labels, predictions = parse_sequence_classifier_prediction_output(p)
    return compute_sequence_classification_metrics_binary(y_true=labels, y_pred=predictions)

In [16]:
trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=data_splits['train'],
    eval_dataset=data_splits['dev'],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer),
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
trainer.train()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/714 [00:00<?, ?it/s]

{'loss': 0.3736, 'grad_norm': 3.4375319480895996, 'learning_rate': 3.3333333333333335e-05, 'epoch': 1.0}


  0%|          | 0/26 [00:00<?, ?it/s]

{'eval_loss': 0.2506186068058014, 'eval_accuracy': 0.9014778325123153, 'eval_accuracy_balanced': 0.8964987316928661, 'eval_f1': 0.8833819241982507, 'eval_precision': 0.9099099099099099, 'eval_recall': 0.8583569405099151, 'eval_runtime': 21.9676, 'eval_samples_per_second': 36.964, 'eval_steps_per_second': 1.184, 'epoch': 1.0}
{'loss': 0.2161, 'grad_norm': 0.11916719377040863, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}


  0%|          | 0/26 [00:00<?, ?it/s]

{'eval_loss': 0.30391860008239746, 'eval_accuracy': 0.9125615763546798, 'eval_accuracy_balanced': 0.9099008189993025, 'eval_f1': 0.8984263233190272, 'eval_precision': 0.9075144508670521, 'eval_recall': 0.8895184135977338, 'eval_runtime': 9.8305, 'eval_samples_per_second': 82.6, 'eval_steps_per_second': 2.645, 'epoch': 2.0}
{'loss': 0.1383, 'grad_norm': 0.11332330107688904, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/26 [00:00<?, ?it/s]

{'eval_loss': 0.38194358348846436, 'eval_accuracy': 0.9224137931034483, 'eval_accuracy_balanced': 0.9228677936393317, 'eval_f1': 0.9121338912133892, 'eval_precision': 0.8983516483516484, 'eval_recall': 0.9263456090651558, 'eval_runtime': 10.2987, 'eval_samples_per_second': 78.845, 'eval_steps_per_second': 2.525, 'epoch': 3.0}
{'train_runtime': 539.4033, 'train_samples_per_second': 21.096, 'train_steps_per_second': 1.324, 'train_loss': 0.2426774535192495, 'epoch': 3.0}


TrainOutput(global_step=714, training_loss=0.2426774535192495, metrics={'train_runtime': 539.4033, 'train_samples_per_second': 21.096, 'train_steps_per_second': 1.324, 'total_flos': 377377822814460.0, 'train_loss': 0.2426774535192495, 'epoch': 3.0})

In [18]:
trainer.evaluate(data_splits['test'])

  0%|          | 0/26 [00:00<?, ?it/s]

{'eval_loss': 0.46976912021636963,
 'eval_accuracy': 0.9137931034482759,
 'eval_accuracy_balanced': 0.9142612033796836,
 'eval_f1': 0.9025069637883009,
 'eval_precision': 0.8876712328767123,
 'eval_recall': 0.9178470254957507,
 'eval_runtime': 35.6702,
 'eval_samples_per_second': 22.764,
 'eval_steps_per_second': 0.729,
 'epoch': 3.0}