# EPS scraper
The script below scrapes historical EPS data for stocks from yahoo finance. 

## Saved data format
The data is saved in EPS.csv file. The columns are as follows: Ticker, Earnings anouncment date, EPS estimated, EPS reported, Surprise (%)

### Provide tickers to the array below

In [None]:
symbols = ["AAPL"]

In [1]:
import requests
import time
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv

service = Service(ChromeDriverManager().install())

In [94]:
chrome_options = Options()
chrome_options.add_argument("--disable-search-engine-choice-screen")
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(service=service, options=chrome_options)

In [92]:
def scrape_EPS(symbol, isFirst):
    data = []
    driver.get(f"https://finance.yahoo.com/calendar/earnings?symbol={symbol}")
    
    if (isFirst): #Accept cookies if it's first visit
        accept_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept')]"))
        )
        accept_button.click()

    time.sleep(4)
    
    earnings_date_elements = driver.find_elements(By.XPATH, '//td[@aria-label="Earnings Date"]')
    esp_estimate_elements = driver.find_elements(By.XPATH, '//td[@aria-label="EPS Estimate"]')
    esp_reported_elements = driver.find_elements(By.XPATH, '//td[@aria-label="Reported EPS"]')
    esp_surprise_elements = driver.find_elements(By.XPATH, '//td[@aria-label="Surprise(%)"]')
    
    for i, element in enumerate(earnings_date_elements):
        try:
            span = element.find_element(By.TAG_NAME, 'span')
            earnings_date = span.text.strip()
            esp_estimate = esp_estimate_elements[i].text
            esp_reported = esp_reported_elements[i].text
            esp_surprise = esp_surprise_elements[i].text

            data.append({
                'symbol': symbol, 
                'earnings_date': earnings_date, 
                'esp_estimate': esp_estimate, 
                'esp_reported': esp_reported,
                'esp_surprise': esp_surprise
            })
        except:
            print(f"Scraping data for row {i} has failed.")
    return data

def save_to_csv(data, filename):
    keys = data[0].keys() if data else []
    with open(filename, 'w', newline='', encoding='utf-8') as output_file:
        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)

In [95]:
all_eps_data = []

isFirst = True
for symbol in symbols:
    print(f"Scrapping data for {symbol}")
    symbol_data = scrape_EPS(symbol, isFirst)
    all_eps_data.extend(symbol_data)
    isFirst = False
    time.sleep(5)

save_to_csv(all_eps_data, f"EPS.csv")

AAPL
Symbol: AAPL
