In [20]:
import requests
import pandas as pd
import time
import sqlite3 as sq

In [21]:
tickers_csv = pd.read_excel('Copy of NYSE_Stocks.xlsx')
tickers_list= tickers_csv['Symbol'].tolist()

ticker_count = len(tickers_list)
print(f"Number of unique tickers: {ticker_count}")

Number of unique tickers: 8970


In [22]:
def get_earnings_history_for_ticker(ticker):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    url = f'https://finance.yahoo.com/quote/{ticker}/analysis?p={ticker}'
    response = requests.get(url, headers=headers)
    
    try:
        # Use pandas to directly extract tables from the HTML
        tables = pd.read_html(response.text)
        
        for table in tables:
            if 'Earnings History' in table.columns:
                table['Ticker'] = ticker
                
                # Rename columns to standard quarter names
                columns_mapping = {
                    table.columns[0]: 'Metrics',
                    table.columns[1]: 'Q-4',
                    table.columns[2]: 'Q-3',
                    table.columns[3]: 'Q-2',
                    table.columns[4]: 'Q-1'
                }
                table.rename(columns=columns_mapping, inplace=True)
                
                return table
    except ValueError:
        print(f"No tables found for ticker {ticker}")
    
    return pd.DataFrame()  # Return empty dataframe if table not found

def get_data_for_tickers(ticker_list, sleep_duration=5):
    all_data = pd.DataFrame()
    
    for ticker in ticker_list:
        df = get_earnings_history_for_ticker(ticker)
        all_data = pd.concat([all_data, df], ignore_index=True)
        
        time.sleep(sleep_duration)
    
    return all_data

In [23]:
df = get_data_for_tickers(tickers_list[0:20])
df

No tables found for ticker AAC
No tables found for ticker AACI
No tables found for ticker AACT
No tables found for ticker AAMC


Unnamed: 0,Metrics,Q-4,Q-3,Q-2,Q-1,Ticker
0,EPS Est.,1.39,1.3,1.26,1.36,A
1,EPS Actual,1.53,1.37,1.27,1.43,A
2,Difference,0.14,0.07,0.01,0.07,A
3,Surprise %,10.10%,5.40%,0.80%,5.10%,A
4,EPS Est.,0.19,-0.92,-0.11,-0.52,AA
5,EPS Actual,-0.33,-0.7,-0.23,-0.35,AA
6,Difference,-0.52,0.22,-0.12,0.17,AA
7,Surprise %,-273.70%,23.90%,-109.10%,32.70%,AA
8,EPS Est.,,-0.15,-0.27,-0.25,AACG
9,EPS Actual,,-0.13,-0.01,0.08,AACG


In [24]:
# Connect to SQLite database (will be created if doesn't exist)
conn = sq.connect('earnings_data.db')

# Insert data into a table called "stock_data"
df.to_sql('earnings_data', conn, if_exists='replace', index=False)

# Close the database connection
conn.close()

In [26]:
df.to_excel('earnings.xlsx', engine='xlsxwriter')