In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager  # Automatically manages ChromeDriver
import pandas as pd
import time

# Function to scrape historical stock data
def scrape_stock_data(ticker, start_date, end_date):
    # Construct the URL for historical data
    url = f"https://finance.yahoo.com/quote/{ticker}/history?period1={start_date}&period2={end_date}&interval=1d&filter=history&frequency=1d"
    
    # Set up Selenium WebDriver
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run in headless mode (no browser UI)
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.get(url)
    
    # Wait for the page to load
    time.sleep(10)  # Increase sleep time to ensure the page loads completely
    
    # Parse the page source with BeautifulSoup
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(driver.page_source, "html.parser")
    
    # Close the WebDriver
    driver.quit()
    
    # Extract the historical data table
    table = soup.find("table")
    if table is None:
        raise ValueError("Table not found. The page structure may have changed.")
    
    rows = table.find_all("tr")[1:-1]  # Skip header and dividend rows
    
    # Extract data into a list of dictionaries
    data = []
    for row in rows:
        cols = row.find_all("td")
        if len(cols) == 7:  # Ensure the row contains valid data
            data.append({
                "Date": cols[0].text.strip(),
                "Open": cols[1].text.strip(),
                "High": cols[2].text.strip(),
                "Low": cols[3].text.strip(),
                "Close": cols[4].text.strip(),
                "Adj Close": cols[5].text.strip(),
                "Volume": cols[6].text.strip()
            })
    
    return data

# Define the stock ticker and time period
ticker = "TSLA"
start_date = 1630454400  # Unix timestamp for September 1, 2021
end_date = 1661990400    # Unix timestamp for September 1, 2022

# Scrape the data
try:
    stock_data = scrape_stock_data(ticker, start_date, end_date)
    
    # Convert to a pandas DataFrame
    df = pd.DataFrame(stock_data)
    
    # Save the data to CSV, Excel, and JSON
    df.to_csv(f"{ticker}_historical_data.csv", index=False)
    df.to_excel(f"{ticker}_historical_data.xlsx", index=False)
    df.to_json(f"{ticker}_historical_data.json", orient="records", indent=4)
    
    print(f"Data for {ticker} saved to CSV, Excel, and JSON files.")
except Exception as e:
    print(f"An error occurred: {e}")

Data for TSLA saved to CSV, Excel, and JSON files.


In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

# Function to scrape historical stock data
def scrape_stock_data(ticker, start_date, end_date):
    # Construct the URL for historical data
    url = f"https://finance.yahoo.com/quote/{ticker}/history?period1={start_date}&period2={end_date}&interval=1d&filter=history&frequency=1d"
    
    # Set up Selenium WebDriver
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run in headless mode (no browser UI)
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.get(url)
    
    # Wait for the page to load
    time.sleep(10)  # Increase sleep time to ensure the page loads completely
    
    # Parse the page source with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, "html.parser")
    
    # Close the WebDriver
    driver.quit()
    
    # Extract the historical data table
    table = soup.find("table")
    if table is None:
        raise ValueError(f"Table not found for ticker {ticker}. The page structure may have changed.")
    
    rows = table.find_all("tr")[1:-1]  # Skip header and dividend rows
    
    # Extract data into a list of dictionaries
    data = []
    for row in rows:
        cols = row.find_all("td")
        if len(cols) == 7:  # Ensure the row contains valid data
            data.append({
                "Ticker": ticker,
                "Date": cols[0].text.strip(),
                "Open": cols[1].text.strip(),
                "High": cols[2].text.strip(),
                "Low": cols[3].text.strip(),
                "Close": cols[4].text.strip(),
                "Adj Close": cols[5].text.strip(),
                "Volume": cols[6].text.strip()
            })
    
    return data

# Define the list of stock tickers and time period
tickers = ["TSLA", "AAPL", "MSFT", "GOOGL", "AMZN", "FB"]
start_date = 1630454400  # Unix timestamp for September 1, 2021
end_date = 1661990400    # Unix timestamp for September 1, 2022

# Initialize an empty list to hold all data
all_data = []

# Scrape data for each ticker
for ticker in tickers:
    try:
        stock_data = scrape_stock_data(ticker, start_date, end_date)
        all_data.extend(stock_data)
        print(f"Data for {ticker} scraped successfully.")
    except Exception as e:
        print(f"An error occurred while scraping {ticker}: {e}")

# Convert the combined data to a pandas DataFrame
df = pd.DataFrame(all_data)

# Save the combined data to CSV, Excel, and JSON
df.to_csv("combined_historical_data.csv", index=False)
df.to_excel("combined_historical_data.xlsx", index=False)
df.to_json("combined_historical_data.json", orient="records", indent=4)

print("Combined data saved to CSV, Excel, and JSON files.")


Data for TSLA scraped successfully.
Data for AAPL scraped successfully.
Data for MSFT scraped successfully.
Data for GOOGL scraped successfully.
Data for AMZN scraped successfully.
Data for FB scraped successfully.
Combined data saved to CSV, Excel, and JSON files.
