In [33]:
from pathlib import Path
from datetime import datetime
from alpaca.data import NewsClient
from alpaca.data.requests import NewsRequest
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm import tqdm
from bs4 import BeautifulSoup
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings("ignore")

# functionality prototype test

In [2]:
# create a tokenizer object and fetch the pretrained model
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

2024-12-31 05:40:26.324361: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# A headline to be used as input 
headline = "Microsoft fails to hit profit expectations"
# Pre-process input phrase
input = tokenizer(headline, padding = True, truncation = True, return_tensors='pt')
# Run inference on the tokenized phrase
output = model(**input)
# Pass model output logits through a softmax layer.
sentim_scores = torch.nn.functional.softmax(output.logits, dim=-1)

In [4]:
# (Positive_sentim, Negative_sentim, Neutral_sentim)
scores = sentim_scores.detach().numpy().reshape(-1,)
print(f"Positive score: {scores[0]}. Negative score: {scores[1]}. Neutral score: {scores[2]}")

Positive score: 0.03408437967300415. Negative score: 0.9329333305358887. Neutral score: 0.03298230841755867


# let's wrap it in a function

In [5]:
def sentim_analyzer(df, tokenizer, model):
    """
    analyze sentiment of headlines with FinBERT
    Args:
        df: A dataframe that contains headlines in a column called 'headline' .
        tokenizer: AutoTokenizer object, a pre-processing tokenizer object from Hugging Face lib.
        model: AutoModelForSequenceClassification object, a hugging face transformer model.

    Returns: The initial dataframe plus 3 sentiment features (+, -, neutral) as columns for each headline
    """
    for i in tqdm(df.index):
        try:
            headline = df.loc[i, 'headline']
        except:
            return print(f"'headline' column on row {df.index[i]} is not found.")
        # Pre-process input phrase
        input = tokenizer(headline, padding = True, truncation = True, return_tensors='pt')
        # Estimate output
        output = model(**input)
        # Pass model output logits through a softmax layer.
        predictions = torch.nn.functional.softmax(output.logits, dim=-1)
        df.loc[i, 'Positive'] = predictions[0][0].tolist()
        df.loc[i, 'Negative'] = predictions[0][1].tolist()
        df.loc[i, 'Neutral']  = predictions[0][2].tolist()
    # rearrange column order
    try:
        df = df[['date', 'stock', 'Open', 'Close', 'Volume',  'headline', 'Positive', 'Negative', 'Neutral','Price_change']]
    except:
        pass
    return df

# get financial news with alpaca

In [6]:
p = Path.cwd()
p.resolve()

PosixPath('/Users/Ethan/Library/CloudStorage/OneDrive-Personal/CourseQuant/AlgoTrading/learning/algo_trading_components/Sentiment/MyImplementations')

In [7]:
d = p.parent.parent.parent.parent.resolve() / "config/Alpaca2"
d

PosixPath('/Users/Ethan/Library/CloudStorage/OneDrive-Personal/CourseQuant/AlgoTrading/config/Alpaca2')

In [8]:
d.exists()

True

In [9]:
try:
    with open(d, 'r') as f:
        key = None
        secret = None
        for line in f:
            if line.find("Key") != -1:
                key = line.split()[2].strip()
                # print(key)
            elif line.find("Secret") != -1:
                secret = line.split()[2].strip()
                # print(secret)
except FileNotFoundError as e:
    print(f"Error: {e}")

In [10]:
news_client = NewsClient(api_key=key, secret_key=secret)

In [11]:
symbol = 'NVDA'
start = '2024-01-03'
end = '2024-12-04'
limit = 50

In [12]:
request_params = NewsRequest(start=datetime(2024, 1, 1),
                             end=datetime(2024, 12, 1),
                             sort ='desc',
                             symbols=symbol,
                             limit=50,
                             include_content=True,
                             exclude_contentless=True)

In [13]:
news_list = news_client.get_news(request_params=request_params)

In [14]:
# Convert response to a pandas DataFrame
news_list.df.columns

Index(['headline', 'source', 'url', 'summary', 'created_at', 'updated_at',
       'symbols', 'author', 'content', 'images'],
      dtype='object')

In [15]:
len(news_list.df)

50

In [16]:
news_df = news_list.df
news_df.head()

Unnamed: 0_level_0,headline,source,url,summary,created_at,updated_at,symbols,author,content,images
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
42249801,Friday's Top 5 Trending Stocks: What's Going O...,benzinga,https://www.benzinga.com/news/large-cap/24/11/...,"SMCI, Nvidia, MicoStrategy, SoFi Technologies ...",2024-11-29 18:51:59+00:00,2024-11-29 18:52:00+00:00,"[MSTR, NVDA, SMCI, SOFI, TSLA]",Erica Kollmann,"<p><strong>Super Micro Computer, Inc.</strong>...","[{'size': 'NewsImageSize.LARGE', 'url': 'https..."
42248220,10 Information Technology Stocks With Whale Al...,benzinga,https://www.benzinga.com/insights/options/24/1...,,2024-11-29 17:35:11+00:00,2024-11-29 17:35:11+00:00,"[AAPL, AMD, BTDR, CORZ, IREN, MARA, MSTR, MU, ...",Benzinga Insights,<p>This whale alert can help traders discover ...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https..."
42245224,Exploring The Competitive Space: NVIDIA Versus...,benzinga,https://www.benzinga.com/insights/news/24/11/4...,,2024-11-29 15:00:13+00:00,2024-11-29 15:00:14+00:00,[NVDA],Benzinga Insights,<p>In today's rapidly changing and highly comp...,[]
42244387,"Nvidia, Chip Stocks Gain On Reports Of Softer ...",benzinga,https://www.benzinga.com/24/11/42244387/nvidia...,Nvidia stock up on US-China semiconductor sanc...,2024-11-29 14:13:03+00:00,2024-11-29 14:13:03+00:00,"[AMAT, AMD, ASML, AVGO, INTC, KLAC, LRCX, MU, ...",Anusuya Lahiri,<p><strong>Nvidia Corp</strong> (NASDAQ:<a cla...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https..."
42241846,Nvidia And Other Chip-Stock Linked ETFs Surge ...,benzinga,https://www.benzinga.com/markets/24/11/4224184...,The deliberations have involved U.S. officials...,2024-11-29 11:55:03+00:00,2024-11-29 11:55:03+00:00,"[AMD, AVGO, NVDA, SMH, SOXL, SOXX]",Pooja Rajkumari,<p>Global chip stocks and ETFs experienced a n...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https..."


# analyze the sentiment of news in the dataframe

In [17]:
# analyze the sentiment of news in the dataframe
sa_scores = sentim_analyzer(news_df, tokenizer, model)
sa_scores

100%|██████████| 50/50 [00:02<00:00, 21.40it/s]


Unnamed: 0_level_0,headline,source,url,summary,created_at,updated_at,symbols,author,content,images,Positive,Negative,Neutral
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
42249801,Friday's Top 5 Trending Stocks: What's Going O...,benzinga,https://www.benzinga.com/news/large-cap/24/11/...,"SMCI, Nvidia, MicoStrategy, SoFi Technologies ...",2024-11-29 18:51:59+00:00,2024-11-29 18:52:00+00:00,"[MSTR, NVDA, SMCI, SOFI, TSLA]",Erica Kollmann,"<p><strong>Super Micro Computer, Inc.</strong>...","[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.030585,0.302817,0.666599
42248220,10 Information Technology Stocks With Whale Al...,benzinga,https://www.benzinga.com/insights/options/24/1...,,2024-11-29 17:35:11+00:00,2024-11-29 17:35:11+00:00,"[AAPL, AMD, BTDR, CORZ, IREN, MARA, MSTR, MU, ...",Benzinga Insights,<p>This whale alert can help traders discover ...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.083199,0.030645,0.886155
42245224,Exploring The Competitive Space: NVIDIA Versus...,benzinga,https://www.benzinga.com/insights/news/24/11/4...,,2024-11-29 15:00:13+00:00,2024-11-29 15:00:14+00:00,[NVDA],Benzinga Insights,<p>In today's rapidly changing and highly comp...,[],0.297767,0.010019,0.692214
42244387,"Nvidia, Chip Stocks Gain On Reports Of Softer ...",benzinga,https://www.benzinga.com/24/11/42244387/nvidia...,Nvidia stock up on US-China semiconductor sanc...,2024-11-29 14:13:03+00:00,2024-11-29 14:13:03+00:00,"[AMAT, AMD, ASML, AVGO, INTC, KLAC, LRCX, MU, ...",Anusuya Lahiri,<p><strong>Nvidia Corp</strong> (NASDAQ:<a cla...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.509447,0.425387,0.065166
42241846,Nvidia And Other Chip-Stock Linked ETFs Surge ...,benzinga,https://www.benzinga.com/markets/24/11/4224184...,The deliberations have involved U.S. officials...,2024-11-29 11:55:03+00:00,2024-11-29 11:55:03+00:00,"[AMD, AVGO, NVDA, SMH, SOXL, SOXX]",Pooja Rajkumari,<p>Global chip stocks and ETFs experienced a n...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.726765,0.220619,0.052616
42240097,AMD Stock Could Trump Nvidia As AI Chip Market...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Advanced Micro Devices Inc. could emerge as a ...,2024-11-29 09:27:34+00:00,2024-11-29 09:27:35+00:00,"[AMD, IBM, NVDA, ORCL]",Kaustubh Bagalkote,<p><strong>Advanced Micro Devices Inc.</strong...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.866483,0.053693,0.079824
42235319,10 Information Technology Stocks Whale Activit...,benzinga,https://www.benzinga.com/insights/options/24/1...,,2024-11-28 17:35:12+00:00,2024-11-28 17:35:12+00:00,"[AAPL, ALAB, CRM, KC, MARA, MSFT, MSTR, NVDA, ...",Benzinga Insights,<p>This whale alert can help traders discover ...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.067397,0.07132,0.861282
42234954,Will Apple Or NVIDIA Be The World's Most Valua...,benzinga,https://www.benzinga.com/markets/cryptocurrenc...,Apple and NVIDIA are frontrunners for world&#3...,2024-11-28 16:39:09+00:00,2024-11-28 16:39:10+00:00,"[AAPL, MSFT, NVDA]",Murtuza Merchant,<p><strong>Apple Inc.</strong> (NASDAQ:<a clas...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.092443,0.017852,0.889704
42233726,Market Analysis: NVIDIA And Competitors In Sem...,benzinga,https://www.benzinga.com/insights/news/24/11/4...,,2024-11-28 15:00:11+00:00,2024-11-28 15:00:11+00:00,[NVDA],Benzinga Insights,<p>In today's fast-paced and highly competitiv...,[],0.038162,0.02163,0.940208
42230493,Nvidia's Next Big Catalyst? Analyst Sees Jense...,benzinga,https://www.benzinga.com/analyst-ratings/analy...,,2024-11-28 09:10:24+00:00,2024-11-28 12:25:13+00:00,"[NVDA, TSLA]",Pooja Rajkumari,<p>Investors are eyeing a potential surge in&n...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.915404,0.014492,0.070104


In [18]:
# let's do a sanity check with a few samples
# first take out the positive news with a positivenss > 0.6
sa_scores['filter'] = sa_scores.loc[:, 'Positive'] > 0.6

In [19]:
# take out the filtered entries with good news
good_news = sa_scores.loc[:, 'headline'][sa_scores['filter']]

In [20]:
# check one entry to see if it's actually good news
good_news.iloc[0]

'Nvidia And Other Chip-Stock Linked ETFs Surge Amid Potential Easing Of US Restrictions On China'

### parse the news headlines and content and revove the html tags

In [21]:
def html_to_text(html):
    soup = BeautifulSoup(html, 'html.parser')
    text = soup.get_text()
    # clean_text = text.replace('\n', ' ').replace('\t', ' ').replace('\r', ' ')
    clean_text = text.replace('\xa0', ' ').replace('\n', ' ').replace('\t', ' ').replace('\r', ' ')
    return clean_text

sa_scores['content'] = sa_scores['content'].apply(html_to_text)
sa_scores['headline'] = sa_scores['headline'].apply(html_to_text)
sa_scores.head()

Unnamed: 0_level_0,headline,source,url,summary,created_at,updated_at,symbols,author,content,images,Positive,Negative,Neutral,filter
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
42249801,Friday's Top 5 Trending Stocks: What's Going O...,benzinga,https://www.benzinga.com/news/large-cap/24/11/...,"SMCI, Nvidia, MicoStrategy, SoFi Technologies ...",2024-11-29 18:51:59+00:00,2024-11-29 18:52:00+00:00,"[MSTR, NVDA, SMCI, SOFI, TSLA]",Erica Kollmann,"Super Micro Computer, Inc. (NASDAQ:SMCI), Nvid...","[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.030585,0.302817,0.666599,False
42248220,10 Information Technology Stocks With Whale Al...,benzinga,https://www.benzinga.com/insights/options/24/1...,,2024-11-29 17:35:11+00:00,2024-11-29 17:35:11+00:00,"[AAPL, AMD, BTDR, CORZ, IREN, MARA, MSTR, MU, ...",Benzinga Insights,This whale alert can help traders discover the...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.083199,0.030645,0.886155,False
42245224,Exploring The Competitive Space: NVIDIA Versus...,benzinga,https://www.benzinga.com/insights/news/24/11/4...,,2024-11-29 15:00:13+00:00,2024-11-29 15:00:14+00:00,[NVDA],Benzinga Insights,In today's rapidly changing and highly competi...,[],0.297767,0.010019,0.692214,False
42244387,"Nvidia, Chip Stocks Gain On Reports Of Softer ...",benzinga,https://www.benzinga.com/24/11/42244387/nvidia...,Nvidia stock up on US-China semiconductor sanc...,2024-11-29 14:13:03+00:00,2024-11-29 14:13:03+00:00,"[AMAT, AMD, ASML, AVGO, INTC, KLAC, LRCX, MU, ...",Anusuya Lahiri,Nvidia Corp (NASDAQ:NVDA) stock is up Friday a...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.509447,0.425387,0.065166,False
42241846,Nvidia And Other Chip-Stock Linked ETFs Surge ...,benzinga,https://www.benzinga.com/markets/24/11/4224184...,The deliberations have involved U.S. officials...,2024-11-29 11:55:03+00:00,2024-11-29 11:55:03+00:00,"[AMD, AVGO, NVDA, SMH, SOXL, SOXX]",Pooja Rajkumari,Global chip stocks and ETFs experienced a nota...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.726765,0.220619,0.052616,True


In [22]:
sa_scores.content.iloc[0]

"Super Micro Computer, Inc. (NASDAQ:SMCI), Nvidia Corp. (NASDAQ:NVDA), MicoStrategy, Inc. (NASDAQ:MSTR), SoFi Technologies, Inc. (NASDAQ:SOFI) and Tesla, Inc. (NASDAQ:TSLA) are the top five trending tickers on Yahoo Finance Friday. Here's a look at what grabbed retail investors' attention as much of Wall Street took the day off.  SMCI: Super Micro shares are dropping Friday as the stock continues its rollercoaster of ups and downs. Allegations of financial misconduct at SMCI, the resignation of its auditor and the potential delisting of its stock have caused shares to tumble more than 60% over the past six months. Earlier this week, the company disclosed that it had prepaid and ended its loan agreements with Cathay Bank and Bank of America N.A. Read Next: Tiger Woods, Rory McIlroy, PGA Tour, SoFi Prep ‘Golf Remixed’ For 2025 TGL Launch NVDA: Traders are hopeful that less stringent sanctions on semiconductor equipment and AI memory chips are coming, following reports that an announcemen

### check relevance

In [23]:
news = sa_scores
news['count_Nvidia'] = 0
for i in range(len(news)):
    article = news.content.iloc[i]
    countNvda = article.count('NVDA') + article.count('Nvidia')
    news.count_Nvidia.iloc[i] = countNvda
news

Unnamed: 0_level_0,headline,source,url,summary,created_at,updated_at,symbols,author,content,images,Positive,Negative,Neutral,filter,count_Nvidia
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
42249801,Friday's Top 5 Trending Stocks: What's Going O...,benzinga,https://www.benzinga.com/news/large-cap/24/11/...,"SMCI, Nvidia, MicoStrategy, SoFi Technologies ...",2024-11-29 18:51:59+00:00,2024-11-29 18:52:00+00:00,"[MSTR, NVDA, SMCI, SOFI, TSLA]",Erica Kollmann,"Super Micro Computer, Inc. (NASDAQ:SMCI), Nvid...","[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.030585,0.302817,0.666599,False,4
42248220,10 Information Technology Stocks With Whale Al...,benzinga,https://www.benzinga.com/insights/options/24/1...,,2024-11-29 17:35:11+00:00,2024-11-29 17:35:11+00:00,"[AAPL, AMD, BTDR, CORZ, IREN, MARA, MSTR, MU, ...",Benzinga Insights,This whale alert can help traders discover the...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.083199,0.030645,0.886155,False,3
42245224,Exploring The Competitive Space: NVIDIA Versus...,benzinga,https://www.benzinga.com/insights/news/24/11/4...,,2024-11-29 15:00:13+00:00,2024-11-29 15:00:14+00:00,[NVDA],Benzinga Insights,In today's rapidly changing and highly competi...,[],0.297767,0.010019,0.692214,False,4
42244387,"Nvidia, Chip Stocks Gain On Reports Of Softer ...",benzinga,https://www.benzinga.com/24/11/42244387/nvidia...,Nvidia stock up on US-China semiconductor sanc...,2024-11-29 14:13:03+00:00,2024-11-29 14:13:03+00:00,"[AMAT, AMD, ASML, AVGO, INTC, KLAC, LRCX, MU, ...",Anusuya Lahiri,Nvidia Corp (NASDAQ:NVDA) stock is up Friday a...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.509447,0.425387,0.065166,False,7
42241846,Nvidia And Other Chip-Stock Linked ETFs Surge ...,benzinga,https://www.benzinga.com/markets/24/11/4224184...,The deliberations have involved U.S. officials...,2024-11-29 11:55:03+00:00,2024-11-29 11:55:03+00:00,"[AMD, AVGO, NVDA, SMH, SOXL, SOXX]",Pooja Rajkumari,Global chip stocks and ETFs experienced a nota...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.726765,0.220619,0.052616,True,3
42240097,AMD Stock Could Trump Nvidia As AI Chip Market...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Advanced Micro Devices Inc. could emerge as a ...,2024-11-29 09:27:34+00:00,2024-11-29 09:27:35+00:00,"[AMD, IBM, NVDA, ORCL]",Kaustubh Bagalkote,Advanced Micro Devices Inc. (NASDAQ:AMD) could...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.866483,0.053693,0.079824,True,6
42235319,10 Information Technology Stocks Whale Activit...,benzinga,https://www.benzinga.com/insights/options/24/1...,,2024-11-28 17:35:12+00:00,2024-11-28 17:35:12+00:00,"[AAPL, ALAB, CRM, KC, MARA, MSFT, MSTR, NVDA, ...",Benzinga Insights,This whale alert can help traders discover the...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.067397,0.07132,0.861282,False,3
42234954,Will Apple Or NVIDIA Be The World's Most Valua...,benzinga,https://www.benzinga.com/markets/cryptocurrenc...,Apple and NVIDIA are frontrunners for world&#3...,2024-11-28 16:39:09+00:00,2024-11-28 16:39:10+00:00,"[AAPL, MSFT, NVDA]",Murtuza Merchant,Apple Inc. (NASDAQ:AAPL) and NVIDIA (NASDAQ:NV...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.092443,0.017852,0.889704,False,1
42233726,Market Analysis: NVIDIA And Competitors In Sem...,benzinga,https://www.benzinga.com/insights/news/24/11/4...,,2024-11-28 15:00:11+00:00,2024-11-28 15:00:11+00:00,[NVDA],Benzinga Insights,In today's fast-paced and highly competitive b...,[],0.038162,0.02163,0.940208,False,4
42230493,Nvidia's Next Big Catalyst? Analyst Sees Jense...,benzinga,https://www.benzinga.com/analyst-ratings/analy...,,2024-11-28 09:10:24+00:00,2024-11-28 12:25:13+00:00,"[NVDA, TSLA]",Pooja Rajkumari,Investors are eyeing a potential surge in Nvid...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.915404,0.014492,0.070104,True,11


In [24]:
news.loc[:, 'count_Nvidia'].max()

25

In [25]:
news.loc[:, 'count_Nvidia'].min()

1

#### filter articles that mention nvda 5+ times

In [26]:
news_filtered = news.loc[news['count_Nvidia'] > 5, :]
news_filtered.head()

Unnamed: 0_level_0,headline,source,url,summary,created_at,updated_at,symbols,author,content,images,Positive,Negative,Neutral,filter,count_Nvidia
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
42244387,"Nvidia, Chip Stocks Gain On Reports Of Softer ...",benzinga,https://www.benzinga.com/24/11/42244387/nvidia...,Nvidia stock up on US-China semiconductor sanc...,2024-11-29 14:13:03+00:00,2024-11-29 14:13:03+00:00,"[AMAT, AMD, ASML, AVGO, INTC, KLAC, LRCX, MU, ...",Anusuya Lahiri,Nvidia Corp (NASDAQ:NVDA) stock is up Friday a...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.509447,0.425387,0.065166,False,7
42240097,AMD Stock Could Trump Nvidia As AI Chip Market...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Advanced Micro Devices Inc. could emerge as a ...,2024-11-29 09:27:34+00:00,2024-11-29 09:27:35+00:00,"[AMD, IBM, NVDA, ORCL]",Kaustubh Bagalkote,Advanced Micro Devices Inc. (NASDAQ:AMD) could...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.866483,0.053693,0.079824,True,6
42230493,Nvidia's Next Big Catalyst? Analyst Sees Jense...,benzinga,https://www.benzinga.com/analyst-ratings/analy...,,2024-11-28 09:10:24+00:00,2024-11-28 12:25:13+00:00,"[NVDA, TSLA]",Pooja Rajkumari,Investors are eyeing a potential surge in Nvid...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.915404,0.014492,0.070104,True,11
42231266,GOP Megadonor Ken Griffin Slashes Citadel's Pa...,benzinga,https://www.benzinga.com/markets/equities/24/1...,GOP Megadonor Ken Griffin&#39;s fund Citadel A...,2024-11-28 10:49:08+00:00,2024-11-28 10:49:08+00:00,"[AAPL, NVDA, PLTR]",Rishabh Mishra,GOP Megadonor Ken Griffin‘s fund Citadel Advis...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.024042,0.935611,0.040348,False,9
42228496,Nvidia Crushes Tesla In Stock Gains — Gary Bla...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Black explained why Nvidia remains the fund&#3...,2024-11-28 03:52:42+00:00,2024-11-28 03:52:42+00:00,"[FFND, NVDA, SPY, TSLA]",Kaustubh Bagalkote,Prominent investment analyst Gary Black of The...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.662104,0.026327,0.311569,True,8


In [27]:
len(news_filtered)

17

### Inspect novolty (remove duplicates)

In [29]:
# sort the row based on article creation time, from newest to oldest
news_filtered = news_filtered.sort_values(by='created_at', ascending=False)
news_filtered = news_filtered.drop_duplicates(subset='content', keep='first')
news_filtered = news_filtered.drop_duplicates(subset='headline', keep='first')
news_filtered.reset_index(drop=True, inplace=True)
print(len(news_filtered))
news_filtered.head()

17


Unnamed: 0,headline,source,url,summary,created_at,updated_at,symbols,author,content,images,Positive,Negative,Neutral,filter,count_Nvidia
0,"Nvidia, Chip Stocks Gain On Reports Of Softer ...",benzinga,https://www.benzinga.com/24/11/42244387/nvidia...,Nvidia stock up on US-China semiconductor sanc...,2024-11-29 14:13:03+00:00,2024-11-29 14:13:03+00:00,"[AMAT, AMD, ASML, AVGO, INTC, KLAC, LRCX, MU, ...",Anusuya Lahiri,Nvidia Corp (NASDAQ:NVDA) stock is up Friday a...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.509447,0.425387,0.065166,False,7
1,AMD Stock Could Trump Nvidia As AI Chip Market...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Advanced Micro Devices Inc. could emerge as a ...,2024-11-29 09:27:34+00:00,2024-11-29 09:27:35+00:00,"[AMD, IBM, NVDA, ORCL]",Kaustubh Bagalkote,Advanced Micro Devices Inc. (NASDAQ:AMD) could...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.866483,0.053693,0.079824,True,6
2,GOP Megadonor Ken Griffin Slashes Citadel's Pa...,benzinga,https://www.benzinga.com/markets/equities/24/1...,GOP Megadonor Ken Griffin&#39;s fund Citadel A...,2024-11-28 10:49:08+00:00,2024-11-28 10:49:08+00:00,"[AAPL, NVDA, PLTR]",Rishabh Mishra,GOP Megadonor Ken Griffin‘s fund Citadel Advis...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.024042,0.935611,0.040348,False,9
3,Nvidia's Next Big Catalyst? Analyst Sees Jense...,benzinga,https://www.benzinga.com/analyst-ratings/analy...,,2024-11-28 09:10:24+00:00,2024-11-28 12:25:13+00:00,"[NVDA, TSLA]",Pooja Rajkumari,Investors are eyeing a potential surge in Nvid...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.915404,0.014492,0.070104,True,11
4,Nvidia Crushes Tesla In Stock Gains — Gary Bla...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Black explained why Nvidia remains the fund&#3...,2024-11-28 03:52:42+00:00,2024-11-28 03:52:42+00:00,"[FFND, NVDA, SPY, TSLA]",Kaustubh Bagalkote,Prominent investment analyst Gary Black of The...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.662104,0.026327,0.311569,True,8


# Ensuring novolty: drop highly correlated articles with cosine similarity

In [31]:
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(news_filtered['content'])

In [32]:
tfidf_matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 4643 stored elements and shape (17, 2256)>

In [34]:
similarity_mat = cosine_similarity(tfidf_matrix, tfidf_matrix)
similarity_df = pd.DataFrame(similarity_mat, columns=news_filtered.index, index=news_filtered.index)
similarity_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,1.0,0.254343,0.279004,0.260604,0.204051,0.183791,0.376714,0.333043,0.216897,0.27437,0.30408,0.172435,0.291078,0.292517,0.238202,0.237412,0.234117
1,0.254343,1.0,0.368571,0.355084,0.336983,0.292611,0.308831,0.423799,0.198757,0.316085,0.349983,0.191773,0.327214,0.34609,0.298861,0.259818,0.265378
2,0.279004,0.368571,1.0,0.445728,0.32583,0.30194,0.273926,0.441465,0.219582,0.319125,0.385764,0.190068,0.365234,0.408498,0.255859,0.365491,0.27912
3,0.260604,0.355084,0.445728,1.0,0.313848,0.303036,0.290125,0.428314,0.236999,0.333835,0.338055,0.202539,0.360324,0.375093,0.237137,0.272031,0.302142
4,0.204051,0.336983,0.32583,0.313848,1.0,0.234393,0.241192,0.309204,0.17908,0.265852,0.292689,0.163855,0.260636,0.303653,0.194965,0.261725,0.238515


In [35]:
threshold = 0.8
articles_to_remove = []
# iterate through each articles to find duplicates
for i, row in similarity_df.iterrows():
    duplicate_indices = row[row >= threshold].index.tolist()
    # print(duplicate_indices)
    if len(duplicate_indices) > 1:
        articles_to_remove.extend(duplicate_indices[1:])
articles_to_remove_unique = list(set(articles_to_remove))
# print(articles_to_remove_unique)
news_filtered_novel = news_filtered.drop(articles_to_remove_unique)
news_filtered_novel.head()

Unnamed: 0,headline,source,url,summary,created_at,updated_at,symbols,author,content,images,Positive,Negative,Neutral,filter,count_Nvidia
0,"Nvidia, Chip Stocks Gain On Reports Of Softer ...",benzinga,https://www.benzinga.com/24/11/42244387/nvidia...,Nvidia stock up on US-China semiconductor sanc...,2024-11-29 14:13:03+00:00,2024-11-29 14:13:03+00:00,"[AMAT, AMD, ASML, AVGO, INTC, KLAC, LRCX, MU, ...",Anusuya Lahiri,Nvidia Corp (NASDAQ:NVDA) stock is up Friday a...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.509447,0.425387,0.065166,False,7
1,AMD Stock Could Trump Nvidia As AI Chip Market...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Advanced Micro Devices Inc. could emerge as a ...,2024-11-29 09:27:34+00:00,2024-11-29 09:27:35+00:00,"[AMD, IBM, NVDA, ORCL]",Kaustubh Bagalkote,Advanced Micro Devices Inc. (NASDAQ:AMD) could...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.866483,0.053693,0.079824,True,6
2,GOP Megadonor Ken Griffin Slashes Citadel's Pa...,benzinga,https://www.benzinga.com/markets/equities/24/1...,GOP Megadonor Ken Griffin&#39;s fund Citadel A...,2024-11-28 10:49:08+00:00,2024-11-28 10:49:08+00:00,"[AAPL, NVDA, PLTR]",Rishabh Mishra,GOP Megadonor Ken Griffin‘s fund Citadel Advis...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.024042,0.935611,0.040348,False,9
3,Nvidia's Next Big Catalyst? Analyst Sees Jense...,benzinga,https://www.benzinga.com/analyst-ratings/analy...,,2024-11-28 09:10:24+00:00,2024-11-28 12:25:13+00:00,"[NVDA, TSLA]",Pooja Rajkumari,Investors are eyeing a potential surge in Nvid...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.915404,0.014492,0.070104,True,11
4,Nvidia Crushes Tesla In Stock Gains — Gary Bla...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Black explained why Nvidia remains the fund&#3...,2024-11-28 03:52:42+00:00,2024-11-28 03:52:42+00:00,"[FFND, NVDA, SPY, TSLA]",Kaustubh Bagalkote,Prominent investment analyst Gary Black of The...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.662104,0.026327,0.311569,True,8


In [37]:
print(len(news_filtered))
print(len(news_filtered_novel))

17
17


In [39]:
news_filtered_novel.sort_values(by='Positive', ascending=False)

Unnamed: 0,headline,source,url,summary,created_at,updated_at,symbols,author,content,images,Positive,Negative,Neutral,filter,count_Nvidia
3,Nvidia's Next Big Catalyst? Analyst Sees Jense...,benzinga,https://www.benzinga.com/analyst-ratings/analy...,,2024-11-28 09:10:24+00:00,2024-11-28 12:25:13+00:00,"[NVDA, TSLA]",Pooja Rajkumari,Investors are eyeing a potential surge in Nvid...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.915404,0.014492,0.070104,True,11
1,AMD Stock Could Trump Nvidia As AI Chip Market...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Advanced Micro Devices Inc. could emerge as a ...,2024-11-29 09:27:34+00:00,2024-11-29 09:27:35+00:00,"[AMD, IBM, NVDA, ORCL]",Kaustubh Bagalkote,Advanced Micro Devices Inc. (NASDAQ:AMD) could...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.866483,0.053693,0.079824,True,6
12,Jensen Huang Credits China's 'Amazing' Tech Ec...,benzinga,https://www.benzinga.com/news/global/24/11/421...,Nvidia CEO Jensen Huang emphasized the company...,2024-11-25 12:05:21+00:00,2024-11-25 12:05:22+00:00,"[AAPL, MSFT, NVDA, ORCL]",Kaustubh Bagalkote,Nvidia Corp. (NASDAQ:NVDA) CEO Jensen Huang em...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.804501,0.008274,0.187225,True,8
4,Nvidia Crushes Tesla In Stock Gains — Gary Bla...,benzinga,https://www.benzinga.com/markets/equities/24/1...,Black explained why Nvidia remains the fund&#3...,2024-11-28 03:52:42+00:00,2024-11-28 03:52:42+00:00,"[FFND, NVDA, SPY, TSLA]",Kaustubh Bagalkote,Prominent investment analyst Gary Black of The...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.662104,0.026327,0.311569,True,8
8,Nvidia Blackwell Supplier Vishay Intertechnolo...,benzinga,https://www.benzinga.com/news/global/24/11/421...,VSH stock gained after analyst Ming-Chi Kuo re...,2024-11-26 18:58:01+00:00,2024-11-26 18:58:02+00:00,"[NVDA, VSH]",Anusuya Lahiri,"Vishay Intertechnology, Inc (NYSE:VSH) stock g...","[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.65825,0.100999,0.240751,True,7
13,Raging AI Debate Could Deliver Fuel For Direxi...,benzinga,https://www.benzinga.com/trading-ideas/long-id...,,2024-11-25 10:56:14+00:00,2024-11-25 12:52:29+00:00,"[NVDA, NVDD, NVDU]",Benzinga Newsdesk,Nvidia Corp (NASDAQ:NVDA) investors have no sh...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.593031,0.036613,0.370356,False,17
14,Amazon's AI Chip Strategy Targets Nvidia's Dom...,benzinga,https://www.benzinga.com/24/11/42158933/amazon...,Amazon challenges Nvidia in AI chip market wor...,2024-11-25 10:53:41+00:00,2024-11-25 10:53:41+00:00,"[AMZN, MSFT, NVDA]",Anusuya Lahiri,Amazon.com Inc (NASDAQ:AMZN) is ramping up eff...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.560775,0.008714,0.430512,False,11
0,"Nvidia, Chip Stocks Gain On Reports Of Softer ...",benzinga,https://www.benzinga.com/24/11/42244387/nvidia...,Nvidia stock up on US-China semiconductor sanc...,2024-11-29 14:13:03+00:00,2024-11-29 14:13:03+00:00,"[AMAT, AMD, ASML, AVGO, INTC, KLAC, LRCX, MU, ...",Anusuya Lahiri,Nvidia Corp (NASDAQ:NVDA) stock is up Friday a...,"[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.509447,0.425387,0.065166,False,7
5,Nvidia's Latest Record Quarter Show Its AI Dom...,benzinga,https://www.benzinga.com/24/11/42221310/nvidia...,"Last week, Nvidia Corporation (NASDAQ: NVDA) i...",2024-11-27 18:28:19+00:00,2024-11-27 18:29:01+00:00,"[MSFT, NVDA, ORCL]",Upwallstreet,"Last week, Nvidia Corporation (NASDAQ: NVDA) i...","[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.367754,0.564188,0.068058,False,8
16,Weekend Round-Up: AI Dominates Headlines With ...,benzinga,https://www.benzinga.com/24/11/42154224/weeken...,From Nvidia&#39;s impressive Q3 earnings to El...,2024-11-24 14:00:34+00:00,2024-11-24 14:00:35+00:00,"[BABA, NVDA]",Benzinga Neuro,"The past week has been a whirlwind of news, wi...","[{'size': 'NewsImageSize.LARGE', 'url': 'https...",0.309052,0.021053,0.669895,False,9


In [40]:
news_filtered_novel.to_pickle("nvda_alpaca_news.bz2")

The remaining are useful information that can be used to develop trading alpha

# What now? 

You can make stock price prediction based on the sentiment scores and generate trading signals

In [41]:
from pathlib import Path
Path.cwd().resolve()

PosixPath('/Users/Ethan/Library/CloudStorage/OneDrive-Personal/CourseQuant/AlgoTrading/learning/algo_trading_components/Sentiment/MyImplementations')