In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from db_helper_functions import get_stock_news_from_db
from sklearn.decomposition import NMF, LatentDirichletAllocation, MiniBatchNMF
import matplotlib.pyplot as plt
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    LongformerTokenizer,
)
from bertopic import BERTopic
from finbert_embedding.embedding import FinbertEmbedding
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = get_stock_news_from_db("AAPL")
df = df[~df.article.isnull()]
t_df = df.iloc[:100]
t_df

Unnamed: 0,id,ticker,title,url,article,date
0,4235,AAPL,"Microsoft, Alphabet, Meta, Apple, And Amazon L...",https://www.benzinga.com/news/earnings/22/07/2...,(Monday Market Open) Investors appear optimist...,2022-07-25
1,4376,AAPL,Benzinga Before The Bell: More Amazon Workers ...,https://www.benzinga.com/news/22/08/28532545/b...,CNBCBlackRock To Pledge A$1B In Australian Bat...,2022-08-17
2,3,AAPL,"Market Rebounds On Trade Optimism, Tech Bounce...",https://www.benzinga.com/node/12934767,A stronger-than-expected government report on ...,2019-01-04
3,4,AAPL,American Shippers And Carriers React To Signs ...,https://www.benzinga.com/node/12938837,The first week of 2019 saw three of America's ...,2019-01-07
4,5,AAPL,TD Ameritrade IMX Continued Its Dip In Decembe...,https://www.benzinga.com/node/12953011,"Declining for the third month in a row, TD Ame...",2019-01-08
...,...,...,...,...,...,...
95,133,AAPL,Mid-Afternoon Market Update: Nu Skin Enterpris...,https://www.benzinga.com/node/13643990,"Toward the end of trading Wednesday, the Dow t...",2019-05-01
96,135,AAPL,60 Biggest Movers From Yesterday,https://www.benzinga.com/node/13649199,"Loading...Loading...Gainers\nARCA biopharma, I...",2019-05-02
97,137,AAPL,"After Sliding On Powell's Words, Stocks Look F...",https://www.benzinga.com/node/13652243,The Fed threw one right down the middle late W...,2019-05-02
98,139,AAPL,Learn About The Everything Store To The Everyt...,https://www.benzinga.com/node/13656252,"In 2013, when Brad Stone set out to write abo...",2019-05-02


In [3]:
finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

In [4]:
longform_tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")

In [5]:
finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [6]:
start_pos = 512
end_pos = 512 * 2
raw_list_data_for_finbert = [x[start_pos:end_pos] for x in t_df.article]

In [7]:
finbert_mat = finbert_tokenizer(
    raw_list_data_for_finbert, padding=True, return_tensors="pt"
)

In [8]:
outputs = finbert_model(**finbert_mat)
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[-1.5397,  2.6536, -0.6232],
        [ 0.1548,  1.4477, -2.0372],
        [ 2.0611, -1.5176, -1.7251],
        [-1.8773,  3.0955, -0.8240],
        [-1.8005,  2.9926, -0.8952],
        [ 1.8799, -1.6165, -1.3588],
        [ 1.6774, -1.8672, -0.6992],
        [-1.0231,  1.6744, -0.4548],
        [-1.2595, -0.3496,  2.2209],
        [-1.1794,  2.6108, -1.2826],
        [-1.3115,  2.6786, -1.2263],
        [-1.5596,  2.7557, -0.8460],
        [-1.2293,  2.5503, -1.0207],
        [-1.5249,  2.3742, -0.4214],
        [ 1.8723, -0.9890, -1.9238],
        [ 1.4085, -0.9537, -1.2859],
        [ 1.1091, -0.1662, -1.7661],
        [ 1.7246, -1.4443, -1.1983],
        [-0.5723,  1.9858, -1.4996],
        [-0.0505, -1.4916,  1.7669],
        [-1.0068, -0.5763,  2.1593],
        [-1.1289,  1.2519,  0.4317],
        [-1.5063,  2.6327, -0.8569],
        [ 0.3653, -2.0091,  1.5959],
        [-0.8133,  1.4951, -0.3326],
        [-1.3708,  2.5284, -0.84

In [9]:
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
predictions.tolist()

[[0.014338287524878979, 0.9498084783554077, 0.03585319221019745],
 [0.21031425893306732, 0.7661962509155273, 0.023489536717534065],
 [0.9518447518348694, 0.026567813009023666, 0.021587451919913292],
 [0.006743359845131636, 0.973922610282898, 0.019333966076374054],
 [0.008054638281464577, 0.9720286130905151, 0.01991666667163372],
 [0.9349943995475769, 0.028337769210338593, 0.03666779398918152],
 [0.891463577747345, 0.025747155770659447, 0.08278926461935043],
 [0.0567924939095974, 0.8429578542709351, 0.10024965554475784],
 [0.027809573337435722, 0.06908045709133148, 0.9031099081039429],
 [0.02165941707789898, 0.9588050246238708, 0.019535573199391365],
 [0.017811015248298645, 0.9627949595451355, 0.019394056871533394],
 [0.012840550392866135, 0.9609473347663879, 0.026212140917778015],
 [0.021724779158830643, 0.95151287317276, 0.02676239050924778],
 [0.01873663067817688, 0.9247810244560242, 0.056482404470443726],
 [0.9262186884880066, 0.05297951027750969, 0.020801706239581108],
 [0.86074447

In [10]:
t_df[["positive", "negative", "neutral"]] = predictions.tolist()
t_df

Unnamed: 0,id,ticker,title,url,article,date,positive,negative,neutral
0,4235,AAPL,"Microsoft, Alphabet, Meta, Apple, And Amazon L...",https://www.benzinga.com/news/earnings/22/07/2...,(Monday Market Open) Investors appear optimist...,2022-07-25,0.014338,0.949808,0.035853
1,4376,AAPL,Benzinga Before The Bell: More Amazon Workers ...,https://www.benzinga.com/news/22/08/28532545/b...,CNBCBlackRock To Pledge A$1B In Australian Bat...,2022-08-17,0.210314,0.766196,0.023490
2,3,AAPL,"Market Rebounds On Trade Optimism, Tech Bounce...",https://www.benzinga.com/node/12934767,A stronger-than-expected government report on ...,2019-01-04,0.951845,0.026568,0.021587
3,4,AAPL,American Shippers And Carriers React To Signs ...,https://www.benzinga.com/node/12938837,The first week of 2019 saw three of America's ...,2019-01-07,0.006743,0.973923,0.019334
4,5,AAPL,TD Ameritrade IMX Continued Its Dip In Decembe...,https://www.benzinga.com/node/12953011,"Declining for the third month in a row, TD Ame...",2019-01-08,0.008055,0.972029,0.019917
...,...,...,...,...,...,...,...,...,...
95,133,AAPL,Mid-Afternoon Market Update: Nu Skin Enterpris...,https://www.benzinga.com/node/13643990,"Toward the end of trading Wednesday, the Dow t...",2019-05-01,0.954219,0.021828,0.023953
96,135,AAPL,60 Biggest Movers From Yesterday,https://www.benzinga.com/node/13649199,"Loading...Loading...Gainers\nARCA biopharma, I...",2019-05-02,0.944363,0.018706,0.036930
97,137,AAPL,"After Sliding On Powell's Words, Stocks Look F...",https://www.benzinga.com/node/13652243,The Fed threw one right down the middle late W...,2019-05-02,0.034162,0.932216,0.033623
98,139,AAPL,Learn About The Everything Store To The Everyt...,https://www.benzinga.com/node/13656252,"In 2013, when Brad Stone set out to write abo...",2019-05-02,0.056778,0.024473,0.918749
