In [4]:
import requests
import pandas as pd
from typing import List, Dict, Any
import time
import json

class SECDataExtractor:
    def __init__(self, api_key: str):
        self.api_key = api_key

    def get_filings(self, limit: int = 200) -> List[Dict[str, Any]]:
        """
        Fetch SEC filings using the EDGAR API endpoint.
        
        Args:
            limit (int): Number of filings to retrieve (default: 100)
            
        Returns:
            List[Dict]: List of filing data
        """
        try:
            # Construct the query according to documentation
            payload = {
                "query": "formType:\"10-K\" OR formType:\"10-Q\"",
                "from": 0,
                "size": limit,
                "sort": [{"filedAt": {"order": "desc"}}]
            }

            # Make request to the EDGAR endpoint
            url = "https://api.sec-api.io/?token=" + self.api_key
            headers = {
                'Content-Type': 'application/json'
            }

            print(f"Making request to: {url}")
            print(f"With payload: {json.dumps(payload, indent=2)}")
            
            response = requests.post(url, headers=headers, json=payload)

            if response.status_code == 429:
                retry_after = int(response.headers.get('Retry-After', 60))
                print(f"Rate limit hit. Waiting {retry_after} seconds...")
                time.sleep(retry_after)
                return self.get_filings(limit)

            response.raise_for_status()
            
            data = response.json()
            
            # Process the filings
            processed_filings = []
            for filing in data.get('filings', []):
                processed_filings.append({
                    'accessionNo': filing.get('accessionNo', ''),
                    'cik': filing.get('cik', ''),
                    'ticker': filing.get('ticker', ''),
                    'companyName': filing.get('companyName', ''),
                    'formType': filing.get('formType', ''),
                    'filedAt': filing.get('filedAt', ''),
                    'filing_url': filing.get('linkToFilingDetails', '')
                })
            
            return processed_filings

        except requests.exceptions.RequestException as e:
            print(f"Error fetching SEC filings: {e}")
            if hasattr(e, 'response'):
                print(f"Status code: {e.response.status_code}")
                if hasattr(e.response, 'text'):
                    print(f"Response text: {e.response.text}")
            return []

    def save_to_csv(self, filings: List[Dict[str, Any]], filename: str = 'sec_filings.csv'):
        """
        Save the filing data to a CSV file.
        
        Args:
            filings (List[Dict]): List of filing data
            filename (str): Output filename
        """
        if not filings:
            print("No data to save")
            return
            
        df = pd.DataFrame(filings)
        df.to_csv(filename, index=False)
        print(f"Data saved to {filename}")

def main():
    # Your API key
    API_KEY = "b3eacaa72e15fec1ef69bbd2a71dcbb790fdf6ab38c852436df88f306b98d2d0"
    
    # Initialize the extractor
    extractor = SECDataExtractor(API_KEY)
    
    # Get the filings
    print("Fetching SEC filings...")
    filings = extractor.get_filings(limit=200)
    
    if filings:
        print(f"\nRetrieved {len(filings)} filings")
        
        # Display first few entries
        print("\nSample of retrieved data:")
        for filing in filings[:5]:
            print(f"\nAccession No: {filing['accessionNo']}")
            print(f"CIK: {filing['cik']}")
            print(f"Ticker: {filing['ticker']}")
            print(f"Company: {filing['companyName']}")
            print(f"Form Type: {filing['formType']}")
            print(f"Filed At: {filing['filedAt']}")
            print(f"Filing URL: {filing['filing_url']}")
        
        # Save to CSV
        extractor.save_to_csv(filings)
    else:
        print("\nNo filings retrieved. Make sure:")
        print("1. Your API key is valid")
        print("2. You have an active subscription")
        print("3. You're not exceeding rate limits")

if __name__ == "__main__":
    main()

Fetching SEC filings...
Making request to: https://api.sec-api.io/?token=b3eacaa72e15fec1ef69bbd2a71dcbb790fdf6ab38c852436df88f306b98d2d0
With payload: {
  "query": "formType:\"10-K\" OR formType:\"10-Q\"",
  "from": 0,
  "size": 200,
  "sort": [
    {
      "filedAt": {
        "order": "desc"
      }
    }
  ]
}

Retrieved 200 filings

Sample of retrieved data:

Accession No: 0001628280-25-005486
CIK: 1702780
Ticker: ATUS
Company: Altice USA, Inc.
Form Type: 10-K
Filed At: 2025-02-13T17:29:28-05:00
Filing URL: https://www.sec.gov/Archives/edgar/data/1702780/000162828025005486/atus-20241231.htm

Accession No: 0000927066-25-000012
CIK: 927066
Ticker: DVA
Company: DAVITA INC.
Form Type: 10-K
Filed At: 2025-02-13T17:25:47-05:00
Filing URL: https://www.sec.gov/Archives/edgar/data/927066/000092706625000012/dva-20241231.htm

Accession No: 0001411685-25-000019
CIK: 1411685
Ticker: VTGN
Company: Vistagen Therapeutics, Inc.
Form Type: 10-Q
Filed At: 2025-02-13T17:25:15-05:00
Filing URL: https:

In [1]:
import pandas as pd
import requests
import os
from typing import Optional
import time
from urllib.parse import quote

class SECPDFDownloader:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.base_url = "https://api.sec-api.io/filing-reader"
        self.failed_downloads = []

    def download_pdf(self, filing_url: str, output_dir: str, filename: Optional[str] = None) -> bool:
        """
        Download a filing as PDF using the SEC API.
        
        Args:
            filing_url (str): URL of the filing
            output_dir (str): Directory to save the PDF
            filename (str, optional): Custom filename for the PDF
            
        Returns:
            bool: True if download successful, False otherwise
        """
        try:
            # Construct the API URL
            params = {
                'token': self.api_key,
                'url': filing_url
            }
            
            print(f"Downloading: {filing_url}")
            response = requests.get(self.base_url, params=params)
            
            if response.status_code == 429:  # Rate limit
                retry_after = int(response.headers.get('Retry-After', 60))
                print(f"Rate limit hit. Waiting {retry_after} seconds...")
                time.sleep(retry_after)
                return self.download_pdf(filing_url, output_dir, filename)
            
            response.raise_for_status()
            
            # Generate filename if not provided
            if not filename:
                parts = filing_url.split('/')
                filename = f"filing_{parts[-1].replace('.htm', '')}.pdf"
            
            # Ensure .pdf extension
            if not filename.endswith('.pdf'):
                filename += '.pdf'
            
            # Create output directory if it doesn't exist
            os.makedirs(output_dir, exist_ok=True)
            
            # Save the PDF
            output_path = os.path.join(output_dir, filename)
            with open(output_path, 'wb') as f:
                f.write(response.content)
            
            print(f"Successfully downloaded: {output_path}")
            return True
            
        except requests.exceptions.RequestException as e:
            print(f"Error downloading {filing_url}: {e}")
            if hasattr(e, 'response') and hasattr(e.response, 'text'):
                print(f"Response text: {e.response.text}")
            self.failed_downloads.append((filing_url, str(e)))
            return False
        except Exception as e:
            print(f"Unexpected error downloading {filing_url}: {e}")
            self.failed_downloads.append((filing_url, str(e)))
            return False

def main():
    # Your API key
    API_KEY = "b3eacaa72e15fec1ef69bbd2a71dcbb790fdf6ab38c852436df88f306b98d2d0"
    
    # Create downloader instance
    downloader = SECPDFDownloader(API_KEY)
    
    # Output directory for PDFs
    output_dir = "sec_filings_pdf"
    
    try:
        # Read the CSV file
        df = pd.read_csv('sec_filings.csv')
        
        print(f"Found {len(df)} filings in CSV file")
        
        # Create counters
        successful = 0
        failed = 0
        
        # Download each filing
        for index, row in df.iterrows():
            try:
                filing_url = row['filing_url']
                if pd.isna(filing_url) or not filing_url:
                    print(f"Skipping row {index}: No filing URL")
                    continue
                
                # Create filename using company name and form type
                safe_company_name = "".join(x for x in row['companyName'] if x.isalnum() or x in [' ', '-', '_'])
                filename = f"{safe_company_name}_{row['formType']}_{row['filedAt'][:10]}.pdf"
                
                # Download the PDF
                if downloader.download_pdf(filing_url, output_dir, filename):
                    successful += 1
                else:
                    failed += 1
                
                # Add a small delay to avoid rate limits
                time.sleep(1)
                
            except Exception as e:
                print(f"Error processing row {index}: {e}")
                failed += 1
                continue
        
        print(f"\nDownload summary:")
        print(f"Successfully downloaded: {successful} / {len(df)} filings")
        print(f"Failed downloads: {failed}")
        print(f"PDFs saved in: {os.path.abspath(output_dir)}")
        
        if downloader.failed_downloads:
            print("\nFailed downloads details:")
            for url, error in downloader.failed_downloads:
                print(f"URL: {url}")
                print(f"Error: {error}\n")
        
    except FileNotFoundError:
        print("Error: sec_filings.csv not found in the current directory!")
        print(f"Current directory: {os.getcwd()}")
        print("Please make sure the CSV file exists in this location.")
    except pd.errors.EmptyDataError:
        print("Error: The CSV file is empty!")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

if __name__ == "__main__":
    main()

Found 200 filings in CSV file
Downloading: https://www.sec.gov/Archives/edgar/data/1702780/000162828025005486/atus-20241231.htm
Successfully downloaded: sec_filings_pdf\Altice USA Inc_10-K_2025-02-13.pdf
Downloading: https://www.sec.gov/Archives/edgar/data/927066/000092706625000012/dva-20241231.htm
Successfully downloaded: sec_filings_pdf\DAVITA INC_10-K_2025-02-13.pdf
Downloading: https://www.sec.gov/Archives/edgar/data/1411685/000141168525000019/vtgn-20241231.htm
Successfully downloaded: sec_filings_pdf\Vistagen Therapeutics Inc_10-Q_2025-02-13.pdf
Downloading: https://www.sec.gov/Archives/edgar/data/704172/000149315225006484/formnt10-q.htm
Successfully downloaded: sec_filings_pdf\PHI GROUP INC_NT 10-Q_2025-02-13.pdf
Downloading: https://www.sec.gov/Archives/edgar/data/1506307/000150630725000008/kmi-20241231.htm
Successfully downloaded: sec_filings_pdf\KINDER MORGAN INC_10-K_2025-02-13.pdf
Downloading: https://www.sec.gov/Archives/edgar/data/1409970/000140997025000009/lc-20241231.htm