In [48]:
!pip install feedparser
!pip install textblob spacytextblob
!python -m spacy download en_core_web_md
import requests
import feedparser
import spacy
import pandas as pd
from spacytextblob.spacytextblob import SpacyTextBlob

Collecting spacytextblob
  Downloading spacytextblob-5.0.0-py3-none-any.whl.metadata (4.8 kB)
Downloading spacytextblob-5.0.0-py3-none-any.whl (4.2 kB)
Installing collected packages: spacytextblob
Successfully installed spacytextblob-5.0.0
Collecting en-core-web-md==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl (33.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.5/33.5 MB[0m [31m52.6 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [41]:
# Load company data
nasdaq_df = pd.read_csv("https://www.nasdaqtrader.com/dynamic/symdir/nasdaqlisted.txt", sep='|')
otherlisted_df = pd.read_csv("https://www.nasdaqtrader.com/dynamic/symdir/otherlisted.txt", sep='|')
stocks_df = pd.concat([nasdaq_df, otherlisted_df])

In [52]:
nlp = spacy.load("en_core_web_md")
nlp.add_pipe("spacytextblob", last=True)

<spacytextblob.spacytextblob.SpacyTextBlob at 0x791f111c6d90>

In [43]:
# Fetch RSS feed.
url = "https://www.nasdaq.com/feed/rssoutbound?category=Stocks"
headers = {"User-Agent": "Mozilla/5.0"}
feed = feedparser.parse(requests.get(url, headers=headers).content)

In [55]:
# Extract ticker.
def extract_tickers(text):
    doc = nlp(text)
    tickers =  [ent.text for ent in doc.ents if ent.label_ == "ORG" and len(ent.text) <= 5]
    return tickers

# Get sentiment (naively).
def get_sentiment(text):
    doc = nlp(text)
    polarity = doc._.blob.polarity

    if polarity > 0:
        return "positive"
    elif polarity < 0:
        return "negative"
    else:
        return "neutral"

In [56]:
results = []
for entry in feed.entries:
    title = entry.title
    description = entry.description

    # Extract tickers from title and description.
    tickers_from_title = extract_tickers(title)
    tickers_from_description = extract_tickers(description)
    all_tickers = set(tickers_from_title + tickers_from_description)

    for ticker in all_tickers:
        df_row = stocks_df[stocks_df["Symbol"] == ticker]
        if not df_row.empty:
            company_name = df_row['Security Name'].values[0]
            sentiment_label = get_sentiment(description)
            results.append({
                'Ticker': ticker,
                'Company Name': company_name,
                'Sentiment': sentiment_label,
                'Description': description
            })

result_df = pd.DataFrame(results, columns=['Ticker', 'Company Name', 'Sentiment', 'Description'])
print(result_df)

  Ticker                            Company Name Sentiment  \
0    QQQ             Invesco QQQ Trust, Series 1   neutral   
1    CME   CME Group Inc. - Class A Common Stock  positive   
2    CME   CME Group Inc. - Class A Common Stock  negative   
3    NXT  Nextracker Inc. - Class A Common Stock  positive   

                                         Description  
0  The S&amp;P 500 Index ($SPX ) (SPY ) today is ...  
1  Live cattle futures started of Monday with str...  
2  Lean hog futures snuck out gains of a tick to ...  
3  In trading on Tuesday, shares of Nextracker In...  
