In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup

# Initialize the Chrome WebDriver
# Make sure to replace 'path/to/chromedriver' with the actual path to your ChromeDriver executable
# You can download it from https://chromedriver.chromium.org/downloads

Initializes the Chrome WebDriver.
Returns:
  driver (WebDriver): Selenium WebDriver instance.

In [None]:
def initialize_driver():
  service = Service("path/to/chromedriver")  # Update this path
  driver = webdriver.Chrome(service=service)
  return driver

"""
  Automates the scraping process for Nasdaq articles and saves the results to a CSV file.
  """

In [None]:
def scrape_nasdaq_articles():
  driver = initialize_driver()

  try:
    # Step 1: Navigate to the Nasdaq homepage
    driver.get("https://www.nasdaq.com/")
    time.sleep(5)  # Wait for the page to load

    # Step 2: Click on "News + Insights"
    news_insights = driver.find_element(By.LINK_TEXT, "News + Insights")
    ActionChains(driver).move_to_element(news_insights).click().perform()
    time.sleep(3)  # Wait for the page to load

    # Step 3: Click on "Stocks"
    stocks_link = driver.find_element(By.LINK_TEXT, "Stocks")
    stocks_link.click()
    time.sleep(5)  # Wait for the Stocks page to load

    # Step 4: Scroll down to ensure all articles are loaded
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Allow time for articles to load

    # Step 5: Find all article links in the "Latest News in Stocks" section
    articles = driver.find_elements(By.CSS_SELECTOR, "a[href^='/articles']")
    article_data = []  # List to store article details

    # Loop through each article link
    for article in articles:
      article_url = article.get_attribute("href")
      article_title = article.text

      # Navigate to the article URL
      driver.get(article_url)
      time.sleep(3)  # Wait for the article page to load

      # Parse the article HTML content
      soup = BeautifulSoup(driver.page_source, "html.parser")
      html_content = soup.prettify()

      # Append data to the list
      article_data.append({
        "Article": article_title,
        "URL": article_url,
        "HTML Data": html_content
      })

    # Step 6: Save the extracted data to a CSV file
    df = pd.DataFrame(article_data)
    df.to_csv("nasdaq_articles.csv", index=False, encoding="utf-8")
    print("Data successfully saved to 'nasdaq_articles.csv'")

  finally:
    # Close the WebDriver
    driver.quit()

# Execute the scraping function

In [None]:
scrape_nasdaq_articles()