In [1]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
from sec_api import ExtractorApi
import concurrent.futures
import json
from tqdm import tqdm

## List of Stocks to Analyze

| Sector | Stock 1 Name | Stock 1 Ticker | Stock 2 Name | Stock 2 Ticker |
|---|---|---|---|---|
| Technology | Apple | AAPL | Microsoft | MSFT |
| Healthcare | Johnson & Johnson | JNJ | UnitedHealth Group | UNH |
| Financials | JPMorgan Chase & Co. | JPM | Bank of America | BAC |
| Consumer Discretionary | Amazon.com | AMZN | Home Depot | HD |
| Industrials | Boeing | BA | Caterpillar | CAT |
| Consumer Staples | Procter & Gamble | PG | Coca-Cola | KO |
| Utilities | NextEra Energy | NEE | Duke Energy | DUK |
| Materials | Dow Inc. | DOW | Nucor Corporation | NUE |
| Communication Services | Verizon Communications | VZ | Comcast Corporation | CMCSA |
| Real Estate | American Tower Corporation | AMT | Prologis | PLD |
| Energy | Exxon Mobil Corporation | XOM | Chevron Corporation | CVX |tion | CVX |tion (CVX) |

# Stock Data API Call

In [None]:
# Marketstack API key
API_KEY = "(Hidden)"

# List of Stocks to Analyze
companies = [
    "AAPL", "MSFT", "JNJ", "UNH", "JPM", "BAC", "AMZN", "HD", "BA", "CAT", "PG", "KO", 
    "NEE", "DUK", "DOW", "NUE", "VZ", "CMCSA", "AMT", "PLD", "XOM", "CVX"
]

# Base URL for Marketstack API
BASE_URL = "https://api.marketstack.com/v1/eod"

# Fetches data from the Marketstack API within a specified date range
def fetch_data(symbol):
    date_to = datetime.now().strftime('%Y-%m-%d')
    date_from = (datetime.now() - timedelta(days=730)).strftime('%Y-%m-%d')
    
    params = {
        "access_key": API_KEY,
        "symbols": symbol,
        "date_from": date_from,
        "date_to": date_to,
        "limit": 1000
    }
    
    response = requests.get(BASE_URL, params=params)
    response.raise_for_status()  # Validates response status
    data = response.json()
    
    return data.get("data", None)

# Converts raw API response to a DataFrame with selected columns
def get_daily_data(symbol):
    raw_data = fetch_data(symbol)
    if raw_data:
        df = pd.DataFrame(raw_data)
        df["date"] = pd.to_datetime(df["date"])
        df = df.rename(columns={
            "open": "Open",
            "high": "High",
            "low": "Low",
            "close": "Close",
            "volume": "Volume",
        })
        df["Symbol"] = symbol
        return df[["date", "Symbol", "Open", "High", "Low", "Close", "Volume"]]
    return None

# Aggregates data for multiple stocks into a single DataFrame
def fetch_company_data(companies):
    all_data = pd.DataFrame()
    for symbol in companies:
        print(f"Fetching daily data for {symbol}")
        daily_df = get_daily_data(symbol)
        if daily_df is not None:
            all_data = pd.concat([all_data, daily_df])
    return all_data

# Fetche, organize, and save stock data
if __name__ == "__main__":
    daily_data_df = fetch_company_data(companies)
    daily_data_df = daily_data_df.rename(columns={"date": "Date"})
    daily_data_df.sort_values(by=["Symbol", "Date"], ascending=[True, False], inplace=True)
    
    daily_data_df.to_csv("daily_stock_data.csv", index=False)

Fetching daily data for AAPL
Fetching daily data for MSFT
Fetching daily data for JNJ
Fetching daily data for UNH
Fetching daily data for JPM
Fetching daily data for BAC
Fetching daily data for AMZN
Fetching daily data for HD
Fetching daily data for BA
Fetching daily data for CAT
Fetching daily data for PG
Fetching daily data for KO
Fetching daily data for NEE
Fetching daily data for DUK
Fetching daily data for DOW
Fetching daily data for NUE
Fetching daily data for VZ
Fetching daily data for CMCSA
Fetching daily data for AMT
Fetching daily data for PLD
Fetching daily data for XOM
Fetching daily data for CVX


In [3]:
# Display the DataFrame
daily_data_df.head()

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume
0,2024-11-13 00:00:00+00:00,AAPL,223.95,226.65,222.76,225.12,47686733.0
1,2024-11-12 00:00:00+00:00,AAPL,224.55,225.59,223.36,224.23,40375300.0
2,2024-11-11 00:00:00+00:00,AAPL,225.0,225.7,221.5,224.23,41949000.0
3,2024-11-08 00:00:00+00:00,AAPL,227.17,228.66,226.41,226.96,38308700.0
4,2024-11-07 00:00:00+00:00,AAPL,224.63,227.88,224.57,227.48,42083800.0


In [4]:
# Save the data to a CSV file
daily_data_df.to_csv("daily_stock_data.csv", index=False)

# 8-K Disclosure API Call

In [None]:
# List of company tickers for SEC filings
company_tickers = [
    "AAPL", "MSFT", "JNJ", "UNH", "JPM", "BAC", "AMZN", "HD", "BA", "CAT", 
    "PG", "KO", "NEE", "DUK", "DOW", "NUE", "VZ", "CMCSA", "AMT", "PLD", 
    "XOM", "CVX"
]

# API configuration
API_KEY = "Hidden"
QUERY_API_ENDPOINT = "https://api.sec-api.io"
EXTRACTOR_API_ENDPOINT = "https://api.sec-api.io/extractor"

# Define sections to extract for 8-K filings
sections_8k = [
    "1-1", "1-2", "1-3", "1-4", "1-5", "2-1", "2-2", "2-3", "2-4", "2-5", "2-6",
    "3-1", "3-2", "3-3", "4-1", "4-2", "5-1", "5-2", "5-3", "5-4", "5-5", "5-6", 
    "5-7", "5-8", "6-1", "6-2", "6-3", "6-4", "6-5", "6-6", "6-10", "7-1", "8-1", "9-1"
]

# Calculate the date two years ago
two_years_ago = (datetime.now() - timedelta(days=2*365)).strftime('%Y-%m-%d')
current_date = datetime.now().strftime('%Y-%m-%d')
date_range = f"filedAt:[{two_years_ago} TO {current_date}]"

# List to collect extracted data
extracted_8k_data = []

# Outer loop to iterate over each ticker with progress bar
for ticker in tqdm(company_tickers, desc="Processing Tickers"):
    # Define query parameters for recent 8-K filings within the last 2 years
    query_parameters_8k = {
        "query": f"ticker:{ticker} AND formType:\"8-K\" AND {date_range}",
        "from": "0",
        "size": "50",
        "sort": [{"filedAt": {"order": "desc"}}]
    }
    headers = {"Authorization": API_KEY}
    
    # Retrieve 8-K filings
    response_8k = requests.post(QUERY_API_ENDPOINT, json=query_parameters_8k, headers=headers)
    filings_8k = response_8k.json().get("filings", [])
    
    # Inner loop to extract sections for each 8-K filing with progress bar
    for filing in tqdm(filings_8k, desc=f"Extracting Sections for {ticker}", leave=False):
        filing_detail_url = filing["linkToFilingDetails"]
        accession_number = filing["accessionNo"]
        disclosure_date = filing["filedAt"]
        
        for section_code in sections_8k:
            extractor_parameters = {
                "url": filing_detail_url,
                "item": section_code,
                "type": "text",
                "token": API_KEY
            }
            section_response = requests.get(EXTRACTOR_API_ENDPOINT, params=extractor_parameters)
            section_content = section_response.text.strip()  # Remove extra whitespace
            
            # Append extracted data only if content is not empty or "processing"
            if section_content and section_content.lower() != "processing":
                extracted_8k_data.append({
                    "ticker": ticker,
                    "filing_type": "8-K",
                    "filing_url": filing_detail_url,
                    "accession_number": accession_number,
                    "disclosure_date": disclosure_date,
                    "section": section_code,
                    "content": section_content
                })

# Convert collected data to DataFrame
eight_k_sections_df = pd.DataFrame(extracted_8k_data)


Processing Tickers:   0%|          | 0/22 [00:00<?, ?it/s]
Extracting Sections for AAPL:   0%|          | 0/15 [00:00<?, ?it/s][A
Extracting Sections for AAPL:   7%|▋         | 1/15 [00:25<05:57, 25.56s/it][A
Extracting Sections for AAPL:  13%|█▎        | 2/15 [00:50<05:26, 25.14s/it][A
Extracting Sections for AAPL:  20%|██        | 3/15 [01:19<05:24, 27.00s/it][A
Extracting Sections for AAPL:  27%|██▋       | 4/15 [01:44<04:48, 26.23s/it][A
Extracting Sections for AAPL:  33%|███▎      | 5/15 [02:14<04:36, 27.70s/it][A
Extracting Sections for AAPL:  40%|████      | 6/15 [02:42<04:08, 27.64s/it][A
Extracting Sections for AAPL:  47%|████▋     | 7/15 [03:09<03:38, 27.30s/it][A
Extracting Sections for AAPL:  53%|█████▎    | 8/15 [03:36<03:10, 27.24s/it][A
Extracting Sections for AAPL:  60%|██████    | 9/15 [04:03<02:43, 27.33s/it][A
Extracting Sections for AAPL:  67%|██████▋   | 10/15 [04:29<02:14, 26.97s/it][A
Extracting Sections for AAPL:  73%|███████▎  | 11/15 [04:58<01:49, 2

In [11]:
# Replace hyphens with periods in the 'section' column
eight_k_sections_df['section'] = eight_k_sections_df['section'].str.replace('-', '.', regex=False)

# Display the DataFrame 
eight_k_sections_df.head()

Unnamed: 0,ticker,filing_type,filing_url,accession_number,disclosure_date,section,content
0,AAPL,8-K,https://www.sec.gov/Archives/edgar/data/320193...,0000320193-24-000120,2024-10-31T16:30:25-04:00,2.2,Item 2.02 Results of Operations and Financial ...
1,AAPL,8-K,https://www.sec.gov/Archives/edgar/data/320193...,0000320193-24-000120,2024-10-31T16:30:25-04:00,9.1,Item 9.01 Financial Statements and Exhibits. \...
2,AAPL,8-K,https://www.sec.gov/Archives/edgar/data/320193...,0001140361-24-040659,2024-09-10T09:06:34-04:00,7.1,Item 7.01 Regulation FD Disclosure.\n\nOn Augu...
3,AAPL,8-K,https://www.sec.gov/Archives/edgar/data/320193...,0001140361-24-038601,2024-08-26T17:20:33-04:00,5.2,Item 5.02 Departure of Directors or Certain Of...
4,AAPL,8-K,https://www.sec.gov/Archives/edgar/data/320193...,0001140361-24-038403,2024-08-23T16:30:44-04:00,5.3,Item 5.03 Amendments to Articles of Incorporat...


In [12]:
# Save the data to a CSV file with all columns as text
eight_k_sections_df.to_csv("form_8k_disclosures.csv", index=False)