# EPS scraper
The script below scrapes historical EPS data for stocks from yahoo finance. 

## Saved data format
The data is saved in EPS.csv file. The columns are as follows: Ticker, Earnings anouncment date, EPS estimated, EPS reported, Surprise (%)

### 1. Tickers
List of SP500 tickers is fetched from Wikipedia

In [4]:
import pandas as pd
tickers = pd.read_html(
    'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
symbols = tickers.Symbol

### 2. Imports and helper function

In [5]:
import requests
import time
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv

service = Service(ChromeDriverManager().install())

def save_to_csv(data, filename):
    keys = data[0].keys() if data else []
    with open(filename, 'w', newline='', encoding='utf-8') as output_file:
        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
        dict_writer.writeheader()
        dict_writer.writerows(data)

### 3. Run Chrome

In [13]:
chrome_options = Options()
chrome_options.add_argument("--disable-search-engine-choice-screen")
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(service=service, options=chrome_options)

### 4. Main function

In [7]:
def scrape_EPS(symbol, isFirst):
    data = []
    driver.get(f"https://finance.yahoo.com/calendar/earnings?symbol={symbol}")
    
    if (isFirst): #Accept cookies if it's first visit
        accept_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept')]"))
        )
        accept_button.click()

    time.sleep(4)
    
    earnings_date_elements = driver.find_elements(By.XPATH, '//td[@aria-label="Earnings Date"]')
    eps_estimate_elements = driver.find_elements(By.XPATH, '//td[@aria-label="EPS Estimate"]')
    eps_reported_elements = driver.find_elements(By.XPATH, '//td[@aria-label="Reported EPS"]')
    eps_surprise_elements = driver.find_elements(By.XPATH, '//td[@aria-label="Surprise(%)"]')
    
    for i, element in enumerate(earnings_date_elements):
        try:
            span = element.find_element(By.TAG_NAME, 'span')
            earnings_date = span.text.strip()
            eps_estimate = eps_estimate_elements[i].text
            eps_reported = eps_reported_elements[i].text
            eps_surprise = eps_surprise_elements[i].text

            data.append({
                'symbol': symbol, 
                'earnings_date': earnings_date, 
                'eps_estimate': eps_estimate, 
                'eps_reported': eps_reported,
                'eps_surprise': eps_surprise
            })
        except:
            print(f"Scraping data for row {i} has failed.")
    return data

### 5. Programm execution

In [None]:
all_eps_data = []

isFirst = True
for symbol in symbols:
    symbol_data = scrape_EPS(symbol, isFirst)
    all_eps_data.extend(symbol_data)
    isFirst = False
    time.sleep(5)

save_to_csv(all_eps_data, f"datasets/EPS.csv")