In [3]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


# Retreives the Dow Jones Industrial Average 30 stocks from CNBC

options = Options()
options.add_argument("--headless")
driver = webdriver.Firefox(options=options)

f = open('DJIA-30', 'w')

try:
    driver.get("https://www.cnbc.com/dow-30/")

    symbols = WebDriverWait(driver, 8).until(
        EC.presence_of_all_elements_located((By.XPATH, '//a[contains(@href, "quotes")]'))
    )

    symbol_texts = [symbol.text for symbol in symbols if symbol.text]

    if len(symbol_texts) == 30:
        for symbol in symbol_texts:
            f.write(f'{symbol} ')
    else:
        print('missing symbols')

except Exception as e:
    print(f"Error: {e}")

finally:
    f.close()
    driver.quit()


In [32]:
import finnhub # type: ignore
import os
from datetime import datetime, timedelta

# Need Create a pandas df to retrieve the news of each individual ticker
# Complete the write to CSV first

API_KEY = os.environ.get('FINNHUB_API_KEY') # set from conda env variable

finnhub_client = finnhub.Client(api_key=API_KEY)

SYMBOL = 'AAPL'
STOCK_DATA = []

# Init times
end_date = datetime.today()
from_date = datetime.today() - timedelta(days=365)
delta = timedelta(days=30)  # 30 day intervals

# 30 day batch collect
while from_date < end_date:
    to_date = (from_date + delta) # End of batch time
    news_chunk = finnhub_client.company_news(SYMBOL, _from=from_date.strftime('%Y-%m-%d'), to=to_date.strftime('%Y-%m-%d'))
    STOCK_DATA.extend(news_chunk)
    from_date += delta # Extend lower bound of batch

for news in STOCK_DATA:
    print(news)

{'category': 'company', 'datetime': 1686936240, 'headline': 'Apple Inc. stock outperforms competitors despite losses on the day', 'id': 120940838, 'image': 'https://images.mktw.net/im-213861/social', 'related': 'AAPL', 'source': 'MarketWatch', 'summary': 'Shares of Apple Inc. slipped 0.59% to $184.92 Friday, on what proved to be an all-around poor trading session for the stock market, with the NASDAQ Composite...', 'url': 'https://finnhub.io/api/news?id=7bd42a87ac1391074aa871fc154059847a998d3735c42cdd68a6c3f23e26171d'}
{'category': 'company', 'datetime': 1686934859, 'headline': 'SPY Triggers Our Buy Signal, But Apple Does Not (Technical Analysis Upgrade)', 'id': 120918326, 'image': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/1369021012/image_1369021012.jpg?io=getty-c-w1536', 'related': 'AAPL', 'source': 'SeekingAlpha', 'summary': 'There is an interesting divergence with SPY triggering our proprietary Buy Signal, but Apple failing to do so. Click here to read a technica

In [1]:
# Need to sort only the date of the news, the ticker symbol, and the news title
# Write it into a CSV format (Date, Stock, Headline)
import csv

useless_keys = ['category', 'id', 'image', 'related', 'summary', 'url']
order = ['Date', 'Stock', 'Headline']

CLEANED_DATA = []

for news in STOCK_DATA:
    news['Stock'] = SYMBOL
    news['datetime'] = datetime.fromtimestamp(news['datetime']).strftime('%Y-%m-%d')
    for key in useless_keys:
        del news[key]
    reordered_news = {k: STOCK_DATA[k] for k in order}
    CLEANED_DATA.append(reordered_news)
    print(news)

NameError: name 'STOCK_DATA' is not defined