In [1]:
# run sentiment analysis for finbert (AutoModelForSequenceClassification)

from utils import TestDataset, save_to_csv
from tqdm import tqdm
import pandas as pd
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification
device = "mps"

print("----- Load Dataset -----")
# use appropriate dataset for inference
# data_path = "data/train_easy.csv"
# data_path = "data/train_hard.csv"
data_path = "data/FPB.csv"

test_dataset = TestDataset(data_path)
test_dataloader = DataLoader(test_dataset, batch_size=8, collate_fn=test_dataset.collate_fn)


print("----- Load Tokenizer and Model -----")
model_name_or_path = "ProsusAI/finbert"
model_name = "finbert"
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, 
                                            padding_side="left",
                                            use_fast=True)
model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path,
                                                            torch_dtype=torch.bfloat16,
                                                            device_map=device)

print("----- Run Sentiment Analysis -----")
labels = []
for i, batch in tqdm(enumerate(test_dataloader)):
    tokenized_batch = tokenizer(batch['sentence'], 
                                padding=True, 
                                truncation=True, 
                                return_tensors="pt").to(device)
    output_logits = model(**tokenized_batch).logits

    label_map = {0: 'positive', 1: 'negative', 2: 'neutral'}
    output_labels = torch.argmax(output_logits, dim=-1)
    labels.extend([label_map[label.item()] for label in output_labels])

print("----- Save Outputs -----")
save_to_csv(data_path, model_name, labels)


  from .autonotebook import tqdm as notebook_tqdm


----- Load Dataset -----
----- Load Tokenizer and Model -----
----- Run Sentiment Analysis -----


38it [00:04,  7.86it/s]

----- Save Outputs -----



