<a href="https://colab.research.google.com/github/ArjunNPatel/finbertuconn2024/blob/main/Benchmarking_Model_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers --quiet
!pip install torch --quiet
!pip install tqdm boto3 requests regex sentencepiece sacremoses --quiet
import numpy as np
from transformers import BertTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import torch.nn as nn
import tqdm, boto3, requests, regex, sentencepiece, sacremoses



In [None]:
!pip install huggingface_hub --quiet
from huggingface_hub import PyTorchModelHubMixin

tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'yiyanghkust/finbert-tone')
class BertForSequenceClassification(nn.Module, PyTorchModelHubMixin):
    def __init__(self, pretrained_model_name, num_labels=3):
        super(BertForSequenceClassification, self).__init__()
        self.num_labels = num_labels
        self.bert = torch.hub.load('huggingface/pytorch-transformers', 'model', pretrained_model_name)
        self.tokenizer = tokenizer
        self.loss_fn = nn.CrossEntropyLoss()
        self.dropout = nn.Dropout()
        self.fc1 = nn.Linear(self.bert.config.hidden_size, 128)
        self.finaloutput = nn.Linear(128, num_labels)
        self.softmaxlayer = nn.Softmax(dim=1)

    def forward(self, input_ids, attention_mask, labels = None, *args, **kwargs):
        #print(kwargs)
        #print(args)
        outputs = self.bert(input_ids = input_ids, attention_mask = attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        fc1_output = nn.functional.relu(self.fc1(pooled_output))
        logits = self.finaloutput(fc1_output)
        logits = self.softmaxlayer(logits)
        loss = None
        if labels is not None:
          loss = self.loss_fn(logits.view(-1, self.num_labels), labels.view(-1))
        return {"logits":logits,
                "loss": loss
                }

def tokenize_function(examples):
     return tokenizer(examples["text"], padding = "max_length", truncation=True, max_length=128, return_tensors = "pt")

model = BertForSequenceClassification.from_pretrained("ANP1/finbert-tone-v0")

In [8]:
myinputs = ["we don't expect a lot",
            "growth is strong and will continue to be",
            "the CEO had a meeting",
            "stock market meltdown"]
outputs = tokenizer(myinputs, return_tensors = 'pt', padding = "max_length", truncation=True, max_length=128)
outputs = model(**outputs)["logits"].detach().numpy()

def score(logit):
  return logit[0]*-1 + logit[1]*0 + logit[2]*1
def magnitude(logit):
  return 1.5*np.amax(logit) - 0.5

for i in range(len(myinputs)):
  logit = outputs[i,:]
  print(myinputs[i], score(logit),magnitude(logit))

we don't expect a lot -0.11002052575349808 0.6259264349937439
growth is strong and will continue to be 0.5824856609106064 0.6269700825214386
the CEO had a meeting -0.0010115168988704681 0.8955188393592834
stock market meltdown -0.18174418807029724 0.3567809760570526
