In [30]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import yfinance as yf
import time
from pathlib import Path
import os
import openpyxl
from sec_cik_mapper import StockMapper


In [2]:
# Get the current working directory
current_dir = os.getcwd()

# Resolve the parent directories
project_general_path = Path(current_dir).resolve()
print(project_general_path)

C:\Users\Adam Krupa\OneDrive\Pulpit\Investing\earnings-report-analysis


In [None]:
def get_tickers():
    # URL of the website containing the S&P 500 tickers
    url = 'https://www.slickcharts.com/sp500'

    # Fetch the page content with headers to avoid being blocked
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
    }
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        table = soup.find('table', {'class': 'table table-hover table-borderless table-sm'})
        tickers = []
        
        # Extract tickers from the table
        if table:
            for row in table.find('tbody').find_all('tr'):
                columns = row.find_all('td')
                ticker = columns[2].text.strip()  # The third column contains the ticker symbol
                tickers.append(ticker)
        else:
            print("Table not found in the page")
    else:
        print("Failed to fetch S&P 500 tickers")
        tickers = []

    return tickers
    


In [None]:
tickers = get_tickers()

In [19]:
tickers_short = tickers[:1]

In [None]:
def get_dates(tickers):
    # Initialize an empty DataFrame to store valid earnings data
    data = pd.DataFrame(columns=['Ticker', 'Date'])

    # Loop through each stock ticker
    for ticker_symbol in tickers:
        ticker = yf.Ticker(ticker_symbol)
        # Fetch the earnings history (past earnings reports)
        earnings_history = ticker.earnings_dates
        
        # Skip if no earnings data is available
        if earnings_history is None:
            print(f"{ticker_symbol}: possibly delisted; no earnings dates found")
            continue

        # Iterate through each earnings report and determine the correct date classification
        for index, row in earnings_history.iterrows():
            report_time = pd.to_datetime(row.name)  # Access the index (which is the earnings date) and convert it to datetime
            # if report_time.hour >= 16:  # After 4 PM, classify as next day because the trade based on this knowledge can effectively only be executed the day after
            #     adjusted_date = (report_time + pd.Timedelta(days=1)).date()
            # else:  # Otherwise, use the same day
            #     adjusted_date = report_time.date()

            # Add each adjusted earnings date as a separate row in the DataFrame
            new_row = {'Ticker': ticker_symbol, 'Date': report_time}
            data = pd.concat([data, pd.DataFrame([new_row])], ignore_index=True)

    return data


In [33]:
def get_CIK(data):
    mapper = StockMapper()
    ticker_to_cik = mapper.ticker_to_cik

    # Ensure tickers are in uppercase to match the mapping keys
    data['Ticker'] = data['Ticker'].str.upper()

    # Map the 'Ticker' column to CIK numbers
    data['CIK'] = data['Ticker'].map(ticker_to_cik)

    return data


In [None]:
data = get_dates(tickers_short)

  data = pd.concat([data, pd.DataFrame([new_row])], ignore_index=True)


In [35]:
print(data)

   Ticker                      Date         CIK
0    AAPL 2025-10-28 20:00:00-04:00  0000320193
1    AAPL 2025-07-29 20:00:00-04:00  0000320193
2    AAPL 2025-04-29 20:00:00-04:00  0000320193
3    AAPL 2025-01-29 19:00:00-05:00  0000320193
4    AAPL 2024-10-30 20:00:00-04:00  0000320193
5    AAPL 2024-07-31 20:00:00-04:00  0000320193
6    AAPL 2024-05-01 20:00:00-04:00  0000320193
7    AAPL 2024-01-31 19:00:00-05:00  0000320193
8    AAPL 2023-11-01 20:00:00-04:00  0000320193
9    AAPL 2023-08-02 20:00:00-04:00  0000320193
10   AAPL 2023-05-03 20:00:00-04:00  0000320193
11   AAPL 2023-02-01 19:00:00-05:00  0000320193


In [34]:
data = get_CIK(data)