# PT-BR Financial News Sentiment

In [None]:
#packages and configuration

!pip install transformers torch numpy xmltodict
!pip install ipywidgets
!pip install --upgrade notebook

In [24]:
from transformers import AutoTokenizer, BertForSequenceClassification
import os
import pandas as pd

In [2]:
from transformers import (
    AutoTokenizer, 
    BertForSequenceClassification,
    pipeline,
)

finbert_pt_br_tokenizer = AutoTokenizer.from_pretrained("lucas-leme/FinBERT-PT-BR")
finbert_pt_br_model = BertForSequenceClassification.from_pretrained("lucas-leme/FinBERT-PT-BR")

finbert_pt_br_pipeline = pipeline(task='text-classification', model=finbert_pt_br_model, tokenizer=finbert_pt_br_tokenizer)



In [25]:
def article_classification(directory, max_length=512):
    results = []
    for filename in os.listdir(directory):
        path = os.path.join(directory, filename)

        with open(path, 'r', encoding='utf-8') as fhand:
            article = fhand.read()
            tokens = tokenizer.encode(article, truncation=True, max_length=max_length, return_tensors='pt')
            if tokens.shape[1] > max_length:
                tokens = tokens[:, :max_length]

            truncated_text = tokenizer.decode(tokens[0], skip_special_tokens=True)

            sentiment = finbert_pt_br_pipeline(truncated_text)

            classification = {
                'file': os.path.basename(filename),
                'sentiment': sentiment[0]['label'],
                'score': sentiment[0]['score']
            }
            results.append(classification)

    results = pd.DataFrame(results)

    return results

print(article_classification('News_Sample/andre'))

                                        file sentiment     score
0   13794279-7660-4991-8f26-95333f8ca11f.xml  NEGATIVE  0.823842
1   1863b43e-cf14-4f9b-9226-7ac40bff2c46.xml  POSITIVE  0.593563
2   18af844d-3cab-45b2-97e8-0c30d3a95cc6.xml  NEGATIVE  0.750617
3   1a5bca42-e31a-4336-81bd-916b7a3c503c.xml  NEGATIVE  0.791573
4   2e21b65c-2d3c-46a4-bd26-93238ba0bbb1.xml   NEUTRAL  0.536406
5   31785a5f-5762-4d70-8447-7a5f8799cc8a.xml  NEGATIVE  0.720258
6   5948f06f-5fd8-476e-8d7d-b7bf1af16fa0.xml  POSITIVE  0.451566
7   66061c43-3cf9-4a92-a72c-b55c2bf23324.xml  NEGATIVE  0.831076
8   85931aef-92f7-4e24-b9bf-4a0949f041d4.xml  NEGATIVE  0.578087
9   9ad2c6c3-7c2c-45d4-8b19-da699c9eb335.xml  POSITIVE  0.371683
10  a4930391-601d-458b-af1a-3e0af3a7cc59.xml  POSITIVE  0.827733
11  b51b53b8-34cb-4a83-b928-e2b8f5277377.xml  NEGATIVE  0.780306
12  bfbd2175-164e-4f02-b136-0685c1c53dc1.xml   NEUTRAL  0.671244
13  c1667639-f956-4794-b03f-03f6042f31e6.xml  NEGATIVE  0.685905
14  c1d0c39f-de76-4442-ad