In [35]:
import os
import re
from datasets import Dataset
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

In [36]:
import warnings
warnings.filterwarnings("ignore", message=".*pin_memory.*")

from transformers import logging as hf_logging
hf_logging.set_verbosity_error()


In [19]:

############################################################
# 1. Setup and constants
############################################################

EMOTIONS = ["Angry", "Happy", "Relaxed", "Sad"]

label2id = {label: i for i, label in enumerate(EMOTIONS)}
id2label = {i: label for label, i in label2id.items()}

# Timestamp pattern like [01:23.45]
timestamp_pattern = re.compile(r"\[\d{2}:\d{2}(?:\.\d{2})?\]")

DATASET_DIR = "NJU_MusicMood_v1.0"

############################################################
# 2. Cleaning function (clean text for modeling)
############################################################

def clean_lyrics(text: str) -> str:
    # Remove timestamps like [00:29]
    text = timestamp_pattern.sub("", text)

    # Lowercase
    text = text.lower()

    # Normalize quotes
    text = text.replace("’", "'").replace("“", '"').replace("”", '"')

    # Remove ellipses and repeated dots
    text = re.sub(r"\.{2,}", " ", text)

    # Remove long underscores
    text = re.sub(r"_{2,}", " ", text)

    # Remove trailing "end" markers
    text = re.sub(r"\bend[.\s]*$", "", text.strip())

    # # Replace newlines with space
    # text = text.replace("\n", " ")

    # Remove special characters except letters, digits, spaces, apostrophes
    text = re.sub(r"[^a-z0-9' ]+", " ", text)

    # Collapse multiple spaces
    text = re.sub(r"\s+", " ", text)

    return text.strip()


############################################################
# 3. Load dataset
############################################################

def get_lyrics(path: str) -> str:
    with open(path, "r", encoding="utf-8") as f:
        raw = f.read()
    return clean_lyrics(raw)

def get_lyrics_and_labels(split: str):
    texts, labels = [], []
    for emotion in EMOTIONS:
        folder = os.path.join(DATASET_DIR, emotion, split)

        if not os.path.isdir(folder):
            continue

        for fname in os.listdir(folder):
            if fname.lower() == "info.txt":
                continue
            if not fname.endswith(".txt"):
                continue

            path = os.path.join(folder, fname)
            text = get_lyrics(path)

            if text.strip():
                texts.append(text)
                labels.append(emotion)

    return texts, labels

train_texts, train_labels = get_lyrics_and_labels("Train")
dev_texts, dev_labels = get_lyrics_and_labels("Test")

train_ds = Dataset.from_dict({
    "text": train_texts,
    "label": [label2id[l] for l in train_labels]
})

dev_ds = Dataset.from_dict({
    "text": dev_texts,
    "label": [label2id[l] for l in dev_labels]
})




In [20]:
############################################################
# 4. Helper for evaluation
############################################################

def print_results(true_labels, predicted_labels):
    p, r, f, _ = precision_recall_fscore_support(
        true_labels, predicted_labels, average="macro", zero_division=0
    )
    acc = accuracy_score(true_labels, predicted_labels)

    print("Macro Precision:", p)
    print("Macro Recall:", r)
    print("Macro F1:", f)
    print("Accuracy:", acc)


############################################################
# 5. Position tagging function (START, MID, END)
############################################################

def position_tag(text):
    """Tag each token with its position in the song.
       First 20 percent = _START
       Middle 60 percent = _MID
       Last 20 percent = _END
    """
    tokens = text.split()
    n = len(tokens)
    tagged = []

    for i, tok in enumerate(tokens):
        ratio = i / n
        if ratio < 0.2:
            tagged.append(tok + "_START")
        elif ratio > 0.8:
            tagged.append(tok + "_END")
        else:
            tagged.append(tok + "_MID")

    return " ".join(tagged)

In [21]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def print_results(gold_labels, predicted_labels):
    # Overall macro metrics
    p, r, f, _ = precision_recall_fscore_support(
        gold_labels, predicted_labels, average="macro", zero_division=0
    )
    acc = accuracy_score(gold_labels, predicted_labels)

    print("=== Overall (Macro Avg) ===")
    print("Precision:", p)
    print("Recall:", r)
    print("F1:", f)
    print("Accuracy:", acc)
    print()

    # Per class metrics
    p_i, r_i, f_i, _ = precision_recall_fscore_support(
        gold_labels, predicted_labels, average=None, zero_division=0
    )

    print("=== Per Emotion (Class) Metrics ===")
    for i, emotion in enumerate(EMOTIONS):
        print(f"{emotion}:")
        print("  Precision:", p_i[i])
        print("  Recall:   ", r_i[i])
        print("  F1:       ", f_i[i])
    print()  # empty line at the end



In [23]:
############################################################
# 6. Vectorize with TF-IDF + train Logistic Regression
############################################################

print("=== Training Logistic Regression with Position Tags (START/MID/END) ===")

vectorizer = TfidfVectorizer(
    max_features=50000,
    ngram_range=(1, 2)           # unigrams + bigrams improve performance
)
train_tagged = [position_tag(t) for t in train_texts]
dev_tagged = [position_tag(t) for t in dev_texts]
X_train = vectorizer.fit_transform(train_tagged)
X_dev = vectorizer.transform(dev_tagged)

clf = LogisticRegression(
    max_iter=2000,
    class_weight="balanced",
    # multi_class="multinomial",
    solver="lbfgs"
)

clf.fit(X_train, train_ds["label"])

preds = clf.predict(X_dev)

############################################################
# 7. Print results
############################################################

print("\n=== Results with START/MID/END position tagging ===")
print_results(dev_ds["label"], preds)

=== Training Logistic Regression with Position Tags (START/MID/END) ===

=== Results with START/MID/END position tagging ===
=== Overall (Macro Avg) ===
Precision: 0.3902636986855047
Recall: 0.40970226440661606
F1: 0.39418263460750874
Accuracy: 0.38992042440318303

=== Per Emotion (Class) Metrics ===
Angry:
  Precision: 0.45544554455445546
  Recall:    0.647887323943662
  F1:        0.5348837209302325
Happy:
  Precision: 0.40963855421686746
  Recall:    0.32075471698113206
  F1:        0.35978835978835977
Relaxed:
  Precision: 0.3626373626373626
  Recall:    0.32673267326732675
  F1:        0.34375
Sad:
  Precision: 0.3333333333333333
  Recall:    0.3434343434343434
  F1:        0.3383084577114428



In [25]:
############################################################
# 6. Vectorize with TF-IDF + train Logistic Regression
############################################################

print("=== Training Logistic Regression with Position Tags (START/MID/END) ===")

vectorizer = TfidfVectorizer(
    max_features=50000,
    ngram_range=(1, 2)           # unigrams + bigrams improve performance
)

X_train = vectorizer.fit_transform(train_tagged)
X_dev = vectorizer.transform(dev_tagged)

clf = LogisticRegression(
    max_iter=2000,
    class_weight="balanced",
    # multi_class="multinomial",
    solver="lbfgs"
)

clf.fit(X_train, train_ds["label"])

preds = clf.predict(X_dev)

############################################################
# 7. Print emotion based results
############################################################

print("\n=== Emotion based results with START/MID/END position tagging ===")
print_results(dev_ds["label"], preds)


=== Training Logistic Regression with Position Tags (START/MID/END) ===

=== Emotion based results with START/MID/END position tagging ===
=== Overall (Macro Avg) ===
Precision: 0.3902636986855047
Recall: 0.40970226440661606
F1: 0.39418263460750874
Accuracy: 0.38992042440318303

=== Per Emotion (Class) Metrics ===
Angry:
  Precision: 0.45544554455445546
  Recall:    0.647887323943662
  F1:        0.5348837209302325
Happy:
  Precision: 0.40963855421686746
  Recall:    0.32075471698113206
  F1:        0.35978835978835977
Relaxed:
  Precision: 0.3626373626373626
  Recall:    0.32673267326732675
  F1:        0.34375
Sad:
  Precision: 0.3333333333333333
  Recall:    0.3434343434343434
  F1:        0.3383084577114428



## Separate Models

In [24]:
def get_segment(text, segment="start", portion=0.3):
    """
    Extracts a portion of the lyrics.
    portion=0.3 means 30 percent of lyrics.
    segment can be start, middle, or end.
    """
    tokens = text.split()
    n = len(tokens)

    if n == 0:
        return ""

    cut = int(n * portion)  # number of tokens per section

    if segment == "start":
        return " ".join(tokens[:cut])

    elif segment == "middle":
        start = int(n * 0.35)
        end = int(n * 0.65)
        return " ".join(tokens[start:end])

    elif segment == "end":
        return " ".join(tokens[-cut:])

    else:
        return text


In [26]:
# Build segmented datasets
train_start = [get_segment(t, "start") for t in train_ds["text"]]
train_middle = [get_segment(t, "middle") for t in train_ds["text"]]
train_end = [get_segment(t, "end") for t in train_ds["text"]]

dev_start = [get_segment(t, "start") for t in dev_ds["text"]]
dev_middle = [get_segment(t, "middle") for t in dev_ds["text"]]
dev_end = [get_segment(t, "end") for t in dev_ds["text"]]


In [29]:
def train_segment_model(train_texts, dev_texts, train_labels, dev_labels, name=""):
    print(f"\n=== Training {name} model ===")

    vectorizer = TfidfVectorizer(
        max_features=50000,
        ngram_range=(1, 2)
    )

    X_train = vectorizer.fit_transform(train_texts)
    X_dev = vectorizer.transform(dev_texts)

    clf = LogisticRegression(
        max_iter=2000,
        class_weight="balanced",
        # multi_class="multinomial"
    )

    clf.fit(X_train, train_labels)
    preds = clf.predict(X_dev)

    print(f"=== Results ({name}) ===")
    print_results(dev_labels, preds)


In [30]:
# Train and evaluate beginning model
train_segment_model(
    train_start,
    dev_start,
    train_ds["label"],
    dev_ds["label"],
    name="BEGINNING ONLY"
)

# Train and evaluate middle model
train_segment_model(
    train_middle,
    dev_middle,
    train_ds["label"],
    dev_ds["label"],
    name="MIDDLE ONLY"
)

# Train and evaluate end model
train_segment_model(
    train_end,
    dev_end,
    train_ds["label"],
    dev_ds["label"],
    name="END ONLY"
)



=== Training BEGINNING ONLY model ===
=== Results (BEGINNING ONLY) ===
=== Overall (Macro Avg) ===
Precision: 0.3458269076705815
Recall: 0.3602639861381833
F1: 0.3481884320377146
Accuracy: 0.3421750663129973

=== Per Emotion (Class) Metrics ===
Angry:
  Precision: 0.4375
  Recall:    0.5915492957746479
  F1:        0.5029940119760479
Happy:
  Precision: 0.4157303370786517
  Recall:    0.3490566037735849
  F1:        0.37948717948717947
Relaxed:
  Precision: 0.2911392405063291
  Recall:    0.22772277227722773
  F1:        0.25555555555555554
Sad:
  Precision: 0.23893805309734514
  Recall:    0.2727272727272727
  F1:        0.25471698113207547


=== Training MIDDLE ONLY model ===
=== Results (MIDDLE ONLY) ===
=== Overall (Macro Avg) ===
Precision: 0.4168987689585285
Recall: 0.43011603844413143
F1: 0.4214992552389918
Accuracy: 0.41644562334217505

=== Per Emotion (Class) Metrics ===
Angry:
  Precision: 0.48863636363636365
  Recall:    0.6056338028169014
  F1:        0.5408805031446541
Ha

## DistilBERT with position tags
Fine-tune a transformer on position-tagged lyrics so the model can learn positional cues alongside word content.

In [40]:

import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, DataCollatorWithPadding

import warnings
warnings.filterwarnings("ignore", message=".*pin_memory.*")

from transformers import logging
logging.set_verbosity_error()



def distilbert_tokenize(dataset, tokenizer, text_field="text", max_length=256):
    "Tokenize a dataset column and set torch format for Trainer."
    def _tok(batch):
        return tokenizer(batch[text_field], truncation=True, padding="max_length", max_length=max_length)

    tokenized = dataset.map(_tok, batched=True)
    tokenized = tokenized.remove_columns([text_field])
    tokenized.set_format(type="torch")
    return tokenized


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    p, r, f, _ = precision_recall_fscore_support(labels, preds, average="macro", zero_division=0)
    acc = accuracy_score(labels, preds)
    return {"precision": p, "recall": r, "f1": f, "accuracy": acc}


In [32]:
# Apply START/MID/END tags before feeding lyrics into DistilBERT
position_train_ds = train_ds.map(lambda ex: {"text": position_tag(ex["text"])})
position_dev_ds = dev_ds.map(lambda ex: {"text": position_tag(ex["text"])})

model_name = "distilbert/distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenized_train = distilbert_tokenize(position_train_ds, tokenizer)
tokenized_dev = distilbert_tokenize(position_dev_ds, tokenizer)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(EMOTIONS),
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)

training_args = TrainingArguments(
    output_dir="distilbert_position_output",
    num_train_epochs=3,
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    weight_decay=0.01,
    eval_strategy="epoch",
    logging_steps=25,
    save_strategy="epoch",
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_dev,
    compute_metrics=compute_metrics,
    processing_class=tokenizer,
)

# Fine-tune DistilBERT on position-tagged lyrics
trainer.train()
eval_results = trainer.evaluate()

# Get predictions on the dev split to reuse the earlier reporting helper
position_logits = trainer.predict(tokenized_dev).predictions
position_preds = np.argmax(position_logits, axis=-1)

print("\n=== DistilBERT (position-tagged lyrics) ===")
print_results(dev_ds["label"], position_preds)
print("Trainer eval metrics:", eval_results)


Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [00:00<00:00, 7952.91 examples/s]
Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [00:00<00:00, 13835.44 examples/s]
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [00:00<00:00, 3799.65 examples/s]
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [00:00<00:00, 4098.89 examples/s]
Some weights of DistilBertForSequenceClassif

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,1.3926,1.3911,0.047082,0.25,0.079241,0.188329
2,1.3968,1.383823,0.143604,0.283778,0.189136,0.302387
3,1.3866,1.382379,0.327411,0.300948,0.216044,0.328912







=== DistilBERT (position-tagged lyrics) ===
=== Overall (Macro Avg) ===
Precision: 0.32741141732283463
Recall: 0.30094829506867776
F1: 0.21604395491381792
Accuracy: 0.32891246684350134

=== Per Emotion (Class) Metrics ===
Angry:
  Precision: 0.6666666666666666
  Recall:    0.028169014084507043
  F1:        0.05405405405405406
Happy:
  Precision: 0.3346456692913386
  Recall:    0.8018867924528302
  F1:        0.4722222222222222
Relaxed:
  Precision: 0.0
  Recall:    0.0
  F1:        0.0
Sad:
  Precision: 0.30833333333333335
  Recall:    0.37373737373737376
  F1:        0.3378995433789954

Trainer eval metrics: {'eval_loss': 1.382379412651062, 'eval_precision': 0.32741141732283463, 'eval_recall': 0.30094829506867776, 'eval_f1': 0.21604395491381792, 'eval_accuracy': 0.32891246684350134, 'eval_runtime': 9.3576, 'eval_samples_per_second': 40.288, 'eval_steps_per_second': 10.152, 'epoch': 3.0}


## Build DistilBERT-ready segmented datasets

In [41]:
from datasets import Dataset

# Build segmented versions
train_start_ds = Dataset.from_dict({
    "text": [get_segment(t, "start") for t in train_ds["text"]],
    "label": train_ds["label"]
})

train_middle_ds = Dataset.from_dict({
    "text": [get_segment(t, "middle") for t in train_ds["text"]],
    "label": train_ds["label"]
})

train_end_ds = Dataset.from_dict({
    "text": [get_segment(t, "end") for t in train_ds["text"]],
    "label": train_ds["label"]
})

dev_start_ds = Dataset.from_dict({
    "text": [get_segment(t, "start") for t in dev_ds["text"]],
    "label": dev_ds["label"]
})

dev_middle_ds = Dataset.from_dict({
    "text": [get_segment(t, "middle") for t in dev_ds["text"]],
    "label": dev_ds["label"]
})

dev_end_ds = Dataset.from_dict({
    "text": [get_segment(t, "end") for t in dev_ds["text"]],
    "label": dev_ds["label"]
})


## DistilBERT training function

In [44]:
def train_distilbert_segment(train_ds, dev_ds, label2id, id2label, name=""):
    print(f"\n=== Training DistilBERT ({name}) ===")

    model_name = "distilbert-base-uncased"

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Tokenizer mapping
    def tokenize_batch(batch):
        return tokenizer(
            batch["text"],
            truncation=True,
            padding=False,
            max_length=256
        )

    tokenized_train = train_ds.map(tokenize_batch, batched=True)
    tokenized_dev = dev_ds.map(tokenize_batch, batched=True)

    tokenized_train = tokenized_train.remove_columns(["text"])
    tokenized_dev = tokenized_dev.remove_columns(["text"])
    tokenized_train.set_format("torch")
    tokenized_dev.set_format("torch")

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=len(EMOTIONS),
        id2label=id2label,
        label2id=label2id
    )

    training_args = TrainingArguments(
        output_dir=f"distilbert_{name.lower().replace(' ','_')}",
        num_train_epochs=3,
        learning_rate=5e-5,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        weight_decay=0.01,
        eval_strategy="epoch",          
        save_strategy="epoch",
        logging_steps=25,
        report_to="none"
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_dev,
        data_collator=data_collator,
        processing_class=tokenizer,
        compute_metrics=compute_metrics
    )

    trainer.train()

    pred_output = trainer.predict(tokenized_dev)
    pred_ids = np.argmax(pred_output.predictions, axis=-1)

    print(f"\n=== Results for DistilBERT ({name}) ===")
    print_results(dev_ds["label"], pred_ids)


In [45]:
train_distilbert_segment(train_start_ds, dev_start_ds, label2id, id2label, name="BEGINNING ONLY")

train_distilbert_segment(train_middle_ds, dev_middle_ds, label2id, id2label, name="MIDDLE ONLY")

train_distilbert_segment(train_end_ds, dev_end_ds, label2id, id2label, name="END ONLY")



=== Training DistilBERT (BEGINNING ONLY) ===


Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [00:00<00:00, 16225.86 examples/s]
Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [00:00<00:00, 21070.73 examples/s]


{'loss': 1.3862, 'grad_norm': 8.852052688598633, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.25}
{'loss': 1.4156, 'grad_norm': 4.586274147033691, 'learning_rate': 4.183333333333334e-05, 'epoch': 0.5}
{'loss': 1.4111, 'grad_norm': 3.341245651245117, 'learning_rate': 3.766666666666667e-05, 'epoch': 0.75}
{'loss': 1.41, 'grad_norm': 3.914538860321045, 'learning_rate': 3.35e-05, 'epoch': 1.0}
{'eval_loss': 1.349113941192627, 'eval_precision': 0.1911770680968633, 'eval_recall': 0.3654302049923302, 'eval_f1': 0.23991321434419519, 'eval_accuracy': 0.35013262599469497, 'eval_runtime': 7.6282, 'eval_samples_per_second': 49.422, 'eval_steps_per_second': 12.454, 'epoch': 1.0}
{'loss': 1.29, 'grad_norm': 4.489414691925049, 'learning_rate': 2.9333333333333336e-05, 'epoch': 1.25}
{'loss': 1.2563, 'grad_norm': 8.825271606445312, 'learning_rate': 2.5166666666666667e-05, 'epoch': 1.5}
{'loss': 1.3849, 'grad_norm': 4.975724220275879, 'learning_rate': 2.1e-05, 'epoch': 1.75}
{'loss': 1.132, 'grad_

Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [00:00<00:00, 11068.08 examples/s]
Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [00:00<00:00, 15053.38 examples/s]


{'loss': 1.4118, 'grad_norm': 4.958386421203613, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.25}
{'loss': 1.3816, 'grad_norm': 2.928382158279419, 'learning_rate': 4.183333333333334e-05, 'epoch': 0.5}
{'loss': 1.3188, 'grad_norm': 5.535400867462158, 'learning_rate': 3.766666666666667e-05, 'epoch': 0.75}
{'loss': 1.2922, 'grad_norm': 7.746114253997803, 'learning_rate': 3.35e-05, 'epoch': 1.0}
{'eval_loss': 1.2121134996414185, 'eval_precision': 0.4825542717086835, 'eval_recall': 0.4684201445656126, 'eval_f1': 0.4316324829269062, 'eval_accuracy': 0.4562334217506631, 'eval_runtime': 5.7614, 'eval_samples_per_second': 65.435, 'eval_steps_per_second': 16.489, 'epoch': 1.0}
{'loss': 1.0936, 'grad_norm': 4.896649360656738, 'learning_rate': 2.9333333333333336e-05, 'epoch': 1.25}
{'loss': 0.9986, 'grad_norm': 6.769309043884277, 'learning_rate': 2.5166666666666667e-05, 'epoch': 1.5}
{'loss': 1.1846, 'grad_norm': 8.552339553833008, 'learning_rate': 2.1e-05, 'epoch': 1.75}
{'loss': 0.8799, 'g

Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400/400 [00:00<00:00, 12873.66 examples/s]
Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 377/377 [00:00<00:00, 16475.67 examples/s]


{'loss': 1.4134, 'grad_norm': 6.828812599182129, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.25}
{'loss': 1.3891, 'grad_norm': 2.9576778411865234, 'learning_rate': 4.183333333333334e-05, 'epoch': 0.5}
{'loss': 1.3693, 'grad_norm': 3.1087357997894287, 'learning_rate': 3.766666666666667e-05, 'epoch': 0.75}
{'loss': 1.3423, 'grad_norm': 4.609830379486084, 'learning_rate': 3.35e-05, 'epoch': 1.0}
{'eval_loss': 1.2294718027114868, 'eval_precision': 0.510072402565609, 'eval_recall': 0.4874881289617136, 'eval_f1': 0.47069412069412075, 'eval_accuracy': 0.47480106100795755, 'eval_runtime': 5.5845, 'eval_samples_per_second': 67.508, 'eval_steps_per_second': 17.011, 'epoch': 1.0}
{'loss': 1.0863, 'grad_norm': 4.492818355560303, 'learning_rate': 2.9333333333333336e-05, 'epoch': 1.25}
{'loss': 0.9779, 'grad_norm': 7.61391019821167, 'learning_rate': 2.5166666666666667e-05, 'epoch': 1.5}
{'loss': 1.132, 'grad_norm': 7.3603057861328125, 'learning_rate': 2.1e-05, 'epoch': 1.75}
{'loss': 0.8536, 