## Using FinBert to Analyze NKE, LULU, ATZ Sentiment

**Load Data (Yahoo Finance)**

In [54]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, UTC
import time

# Tickers to analyze
TICKERS = ["NKE", "LULU", "ATZ.TO"]

# How many days back to look (Note: yfinance free tier often limits this history, 
# but we set it high just in case)
LOOKBACK_DAYS = 365 

# Output filename
OUTPUT_FILE = "financial_news_dataset.csv"

all_news_data = []

for tic in TICKERS:
    ticker = yf.Ticker(tic)
    news_list = ticker.news
    print(f"{len(news_list)}  articles downloaded from yfinance for ticker {tic}")
    news_df = pd.DataFrame(news_list)
    print(news_df.head())
    print(f"{tic} new successfuly converted to dataframe")

    all_news_data.extend(news_df)
    print(f"Successfully added {tic} news to all_news_dataframe")
    #let API rest
    time.sleep(1)

all_news_df = pd.DataFrame(all_news_data)
print(all_news_df.head())



10  articles downloaded from yfinance for ticker NKE
                                     id  \
0  353f79bb-28d4-3576-9604-ea027dbc1863   
1  099cdb70-42ad-33dd-8710-67e58926300c   
2  3521219f-b614-39c3-bf7c-d576239c10c7   
3  fd56221d-e8af-3194-8936-31f4dfc34eed   
4  c3f8eea9-6fdb-31ae-b81a-ab73c4adeaa0   

                                             content  
0  {'id': '353f79bb-28d4-3576-9604-ea027dbc1863',...  
1  {'id': '099cdb70-42ad-33dd-8710-67e58926300c',...  
2  {'id': '3521219f-b614-39c3-bf7c-d576239c10c7',...  
3  {'id': 'fd56221d-e8af-3194-8936-31f4dfc34eed',...  
4  {'id': 'c3f8eea9-6fdb-31ae-b81a-ab73c4adeaa0',...  
NKE new successfuly converted to dataframe
Successfully added NKE news to all_news_dataframe
10  articles downloaded from yfinance for ticker LULU
                                     id  \
0  2dbad12b-5395-3ee7-b817-315712ad780f   
1  c8aa2195-917d-3b04-9c7d-329ca64c9588   
2  fa4c4729-e5c3-34d3-9a32-8bb08ecbfe34   
3  561a0c2e-1eb8-3441-887a-e67df5f3a8d9

**Config**

**Load Model**

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch


MODEL_NAME = "ProsusAI/finbert" #tabularisai/ModernFinBERT" or "ProsusAI/finbert"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
model.eval()

id2label = model.config.id2label  # e.g. {0: 'negative', 1: 'neutral', 2: 'positive'}

print(f"{MODEL_NAME} loaded")


ProsusAI/finbert loaded


**Scoring Function**

In [27]:
def finbert_score(texts):
    if isinstance(texts, str):
        texts = [texts]
    enc = tokenizer(
        texts,
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )
    with torch.no_grad():
        logits = model(**enc).logits
        probs = torch.softmax(logits, dim=-1).numpy()
    labels = [id2label[int(i)] for i in probs.argmax(axis=1)]
    return labels, probs
