### LR - 4e-5 epochs 6 weight decay 0.01

In [1]:
import pandas as pd
import re
from datasets import Dataset
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import f1_score
import numpy as np
import evaluate


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_data = pd.read_csv('dataset/sent_train.csv')
validation = pd.read_csv('dataset/sent_valid.csv')

In [3]:
def clean_tweet(text):
    text = re.sub(r'http\S+', '',text)
    text = re.sub(r'\$\w+', "",text)
    text = re.sub(r'@\w+',"", text)
    text = re.sub(r'[^a-zA-Z\s]','',text)
    return text.lower().strip()

In [4]:
train_data['clean_tweet'] = train_data['text'].apply(clean_tweet)
validation['clean_tweet'] = validation['text'].apply(clean_tweet)

print(train_data[['clean_tweet','label']].head())
print(train_data['label'].unique())

                                         clean_tweet  label
0      jpmorgan reels in expectations on beyond meat      0
1  nomura points to bookings weakness at carnival...      0
2  cemex cut at credit suisse jp morgan on weak b...      0
3                      btig research cuts to neutral      0
4            funko slides after piper jaffray pt cut      0
[0 1 2]


In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_id = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=3)


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def tokenize(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=128)


In [7]:
train_dataset = train_data[['text', 'label']].rename(columns={'text': 'text', 'label': 'labels'})
val_dataset = validation[['text', 'label']].rename(columns={'text': 'text', 'label': 'labels'})


train_dataset = Dataset.from_pandas(train_dataset)
val_dataset = Dataset.from_pandas(val_dataset)


train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)

train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])



Map: 100%|██████████| 9543/9543 [00:00<00:00, 19798.10 examples/s]
Map: 100%|██████████| 2388/2388 [00:00<00:00, 18945.76 examples/s]


### LR - 3e-5 epochs 6 weight decay 0.1

In [8]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions= np.argmax(logits, axis=1)
    f1 = f1_score(labels, predictions, average='macro')
    return {"macro_f1":f1}

training_args = TrainingArguments(
    output_dir="./modernbert_2_checkpoints",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=4e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=6,
    weight_decay=0.01,
    logging_dir='./logs',
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
)

In [9]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()




Epoch,Training Loss,Validation Loss,Macro F1
1,No log,0.306577,0.848359
2,0.379500,0.284872,0.862233
3,0.379500,0.49037,0.850741
4,0.094400,0.609151,0.859031
5,0.094400,0.7358,0.85998
6,0.018100,0.74224,0.860826




TrainOutput(global_step=1794, training_loss=0.13745610274731648, metrics={'train_runtime': 15656.3339, 'train_samples_per_second': 3.657, 'train_steps_per_second': 0.115, 'total_flos': 4877812002912768.0, 'train_loss': 0.13745610274731648, 'epoch': 6.0})

In [10]:
results = trainer.evaluate()
print("Final Macro-F1 on validation set:", results['eval_macro_f1'])




Final Macro-F1 on validation set: 0.8622328332127448
