In [None]:
# !pip install sec_edgar_downloader

In [1]:
import os
import requests
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
FMP_API_KEY = os.getenv("FMP_API_KEY")

In [None]:
FMP_API_KEY

In [40]:

import pdfkit
# Ensure the directory exists
os.makedirs('data', exist_ok=True)

# Path to the wkhtmltopdf executable
path_to_wkhtmltopdf = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'

# URL of the SEC filing
url = 'https://www.sec.gov/Archives/edgar/data/1018724/000095010901500823/d10q.htm'

# Path to save the PDF
pdf_path = 'data/sec_filing.pdf'

# Configuration
config = pdfkit.configuration(wkhtmltopdf=path_to_wkhtmltopdf)

# Convert the URL to PDF
try:
    pdfkit.from_url(url, pdf_path, configuration=config)
    print(f"PDF generated and saved at {pdf_path}")
except Exception as e:
    print(f"PDF generation failed: {e}")


PDF generated and saved at data/sec_filing.pdf


In [9]:
#!/usr/bin/env python
import os
import json
import re
import pdfkit
import logging
from datetime import datetime

try:
    from urllib.request import urlopen
except ImportError:
    from urllib2 import urlopen

import certifi

logging.basicConfig(level=logging.INFO)

def get_jsonparsed_data(url):
    response = urlopen(url, cafile=certifi.where())
    data = response.read().decode("utf-8")
    return json.loads(data)

def is_within_quarter(date_str, year, start_month, end_month):
    date_parts = date_str.split('-')
    date_year = date_parts[0]
    date_month = int(date_parts[1])
    return date_year == year and start_month <= date_month <= end_month

def which_fiscal_year(date_str, fiscal_year):
    filing_date = datetime.strptime(date_str.split()[0], "%Y-%m-%d")
    filing_month = filing_date.month
    fiscal_year = int(fiscal_year)

    if (filing_month <= 3):
        fiscal_year += 1
    
    return str(fiscal_year)


def document_downloader(pipeline_output):
    api_key = os.getenv('FMP_API_KEY')
    
    if api_key is None:
        raise ValueError("No API key found. Please set the FMP_API_KEY environment variable.")

    document_type_mapping = {
        "Form 10K": "10-k",
        "Form 10Q": "10-q"
    }

    quarter_map = {
        "Q1": (1, 3),
        "Q2": (4, 6),
        "Q3": (7, 9),
        "Q4": (10, 12)
    }

    filing_type = document_type_mapping.get(pipeline_output.get('document_type'))
    if not filing_type:
        raise ValueError(f"Unsupported document type: {pipeline_output.get('document_type')}")

    ticker = pipeline_output.get('ticker')
    if not ticker:
        raise ValueError("Ticker is required.")

    year_quarter = pipeline_output.get('year')
    year_star = pipeline_output.get('year')
    if not year_quarter:
        raise ValueError("Year/Quarter is required.")

    url = f"https://financialmodelingprep.com/api/v3/sec_filings/{ticker}?type={filing_type}&page=0&apikey={api_key}"

    try:
        fmp_results = get_jsonparsed_data(url)
    except Exception as e:
        raise RuntimeError(f"Failed to fetch data from FMP: {e}")

    if filing_type == "10-k":
        year_quarter = which_fiscal_year(fmp_results[0]['fillingDate'],year_quarter)
        filtered_fmp_results = [i for i in fmp_results if re.search(year_quarter, i['fillingDate'])]
        print(filtered_fmp_results)
        year = year_star
        output_dir = os.path.join("data", "sec-edgar-filings", ticker, filing_type, year)
    elif filing_type == "10-q":
        year, quarter = year_quarter.split()
        start_month, end_month = quarter_map[quarter]
        filtered_fmp_results = [i for i in fmp_results if is_within_quarter(i['fillingDate'], year, start_month, end_month)]
        print(filtered_fmp_results)
        output_dir = os.path.join("data", "sec-edgar-filings", ticker, filing_type, year, quarter)

    if not filtered_fmp_results:
        raise ValueError("No matching SEC filings found.")

    os.makedirs(output_dir, exist_ok=True)

    path_to_wkhtmltopdf = os.getenv('WKHTMLTOPDF_PATH', r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe')
    if not os.path.exists(path_to_wkhtmltopdf):
        raise FileNotFoundError(f"wkhtmltopdf not found at {path_to_wkhtmltopdf}")

    url = filtered_fmp_results[0]['finalLink']
    pdf_path = os.path.join(output_dir, 'primary_document.pdf')

    config = pdfkit.configuration(wkhtmltopdf=path_to_wkhtmltopdf)

    try:
        pdfkit.from_url(url, pdf_path, configuration=config)
        logging.info(f"PDF generated and saved at {pdf_path}")
    except Exception as e:
        logging.error(f"PDF generation failed: {e}")
        raise




In [70]:
# #!/usr/bin/env python
# import os
# import json
# import re
# import pdfkit
# import logging
# from datetime import datetime

# try:
#     from urllib.request import urlopen
# except ImportError:
#     from urllib2 import urlopen

# import certifi

# # Set up logging
# logging.basicConfig(level=logging.INFO)

# def get_jsonparsed_data(url):
#     response = urlopen(url, cafile=certifi.where())
#     data = response.read().decode("utf-8")
#     return json.loads(data)

# def is_within_quarter(date_str, year, start_month, end_month):
#     date_parts = date_str.split('-')
#     date_year = date_parts[0]
#     date_month = int(date_parts[1])
#     return date_year == year and start_month <= date_month <= end_month

# def which_fiscal_year(date_str, fiscal_year):
#     filing_date = datetime.strptime(date_str.split()[0], "%Y-%m-%d")
#     filing_month = filing_date.month
#     fiscal_year = int(fiscal_year)

#     if filing_month <= 3:
#         fiscal_year += 1
    
#     return str(fiscal_year)

# def document_downloader(pipeline_output):
#     api_key = os.getenv('FMP_API_KEY')
    
#     if api_key is None:
#         raise ValueError("No API key found. Please set the FMP_API_KEY environment variable.")

#     document_type_mapping = {
#         "Form 10K": "10-k",
#         "Form 10Q": "10-q"
#     }

#     quarter_map = {
#         "Q1": (1, 3),
#         "Q2": (4, 6),
#         "Q3": (7, 9),
#         "Q4": (10, 12)
#     }

#     filing_type = document_type_mapping.get(pipeline_output.get('document_type'))
#     if not filing_type:
#         raise ValueError(f"Unsupported document type: {pipeline_output.get('document_type')}")

#     ticker = pipeline_output.get('ticker')
#     if not ticker:
#         raise ValueError("Ticker is required.")

#     year_quarter = pipeline_output.get('year')
#     if not year_quarter:
#         raise ValueError("Year/Quarter is required.")

#     url = f"https://financialmodelingprep.com/api/v3/sec_filings/{ticker}?type={filing_type}&page=0&apikey={api_key}"

#     try:
#         fmp_results = get_jsonparsed_data(url)
#     except Exception as e:
#         raise RuntimeError(f"Failed to fetch data from FMP: {e}")

#     if filing_type == "10-k":
#         year_quarter = which_fiscal_year(fmp_results[0]['fillingDate'], year_quarter)
#         filtered_fmp_results = [i for i in fmp_results if re.search(year_quarter, i['fillingDate'])]
#         year = year_quarter
#         output_dir = os.path.join("data", "sec-edgar-filings", filtered_fmp_results[0]['cik'], filing_type, year)
#     elif filing_type == "10-q":
#         year, quarter = year_quarter.split()
#         start_month, end_month = quarter_map[quarter]
#         filtered_fmp_results = [i for i in fmp_results if is_within_quarter(i['fillingDate'], year, start_month, end_month)]
#         output_dir = os.path.join("data", "sec-edgar-filings", filtered_fmp_results[0]['cik'], filing_type, year, quarter)

#     if not filtered_fmp_results:
#         raise ValueError("No matching SEC filings found.")

#     os.makedirs(output_dir, exist_ok=True)

#     path_to_wkhtmltopdf = os.getenv('WKHTMLTOPDF_PATH', r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe')
#     if not os.path.exists(path_to_wkhtmltopdf):
#         raise FileNotFoundError(f"wkhtmltopdf not found at {path_to_wkhtmltopdf}")

#     url = filtered_fmp_results[0]['finalLink']
#     pdf_path = os.path.join(output_dir, 'primary_document.pdf')

#     config = pdfkit.configuration(wkhtmltopdf=path_to_wkhtmltopdf)

#     try:
#         pdfkit.from_url(url, pdf_path, configuration=config)
#         logging.info(f"PDF generated and saved at {pdf_path}")
#     except Exception as e:
#         logging.error(f"PDF generation failed: {e}")
#         raise




In [5]:
which_fiscal_year("2023-10-03 00:00:00","2023")

'2023'

In [13]:
def check_document_exists(pipeline_output):
    document_type_mapping = {
        "Form 10K": "10-k",
        "Form 10Q": "10-q"
    }

    quarter_map = {
        "Q1": (1, 3),
        "Q2": (4, 6),
        "Q3": (7, 9),
        "Q4": (10, 12)
    }

    filing_type = document_type_mapping.get(pipeline_output.get('document_type'))
    if not filing_type:
        raise ValueError(f"Unsupported document type: {pipeline_output.get('document_type')}")

    ticker = pipeline_output.get('ticker')
    if not ticker:
        raise ValueError("Ticker is required.")

    year_quarter = pipeline_output.get('year')
    if not year_quarter:
        raise ValueError("Year/Quarter is required.")
    
    if filing_type == "10-k":
        output_dir = os.path.join("data", "sec-edgar-filings", ticker, filing_type, year_quarter)
    elif filing_type == "10-q":
        year, quarter = year_quarter.split()
        start_month, end_month = quarter_map[quarter]
        output_dir = os.path.join("data", "sec-edgar-filings", ticker, filing_type, year, quarter)

    pdf_path = os.path.join(output_dir, 'primary_document.pdf')

    if os.path.exists(pdf_path):
        logging.info(f"PDF already exists at {pdf_path}. Skipping download.")
        return True
    
    return False

In [16]:
# Example usage
pipeline_output = {'ticker': 'AAPL', 'document_type': 'Form 10K', 'year': '2003'}
if not check_document_exists(pipeline_output):
    document_downloader(pipeline_output)

  response = urlopen(url, cafile=certifi.where())


[{'symbol': 'AAPL', 'fillingDate': '2003-12-19 00:00:00', 'acceptedDate': '2003-12-19 17:25:45', 'cik': '0000320193', 'type': '10-K', 'link': 'https://www.sec.gov/Archives/edgar/data/320193/000104746903041604/0001047469-03-041604-index.htm', 'finalLink': 'https://www.sec.gov/Archives/edgar/data/320193/000104746903041604/a2124888z10-k.htm'}]


INFO:root:PDF generated and saved at data\sec-edgar-filings\AAPL\10-k\2003\primary_document.pdf


In [37]:
import re
pipeline_output = {'ticker': 'AMZN', 'document_type': 'Form 10K', 'year': '2023'}
filter_fmp_results = [i for i in fmp_results if re.search(pipeline_output['year'], i['fillingDate'])]
print(filter_fmp_results)

[{'symbol': 'AMZN', 'fillingDate': '2023-10-27 00:00:00', 'acceptedDate': '2023-10-26 18:36:51', 'cik': '0001018724', 'type': '10-Q', 'link': 'https://www.sec.gov/Archives/edgar/data/1018724/000101872423000018/0001018724-23-000018-index.htm', 'finalLink': 'https://www.sec.gov/Archives/edgar/data/1018724/000101872423000018/amzn-20230930.htm'}, {'symbol': 'AMZN', 'fillingDate': '2023-08-04 00:00:00', 'acceptedDate': '2023-08-03 18:24:35', 'cik': '0001018724', 'type': '10-Q', 'link': 'https://www.sec.gov/Archives/edgar/data/1018724/000101872423000012/0001018724-23-000012-index.htm', 'finalLink': 'https://www.sec.gov/Archives/edgar/data/1018724/000101872423000012/amzn-20230630.htm'}, {'symbol': 'AMZN', 'fillingDate': '2023-04-28 00:00:00', 'acceptedDate': '2023-04-27 18:24:51', 'cik': '0001018724', 'type': '10-Q', 'link': 'https://www.sec.gov/Archives/edgar/data/1018724/000101872423000008/0001018724-23-000008-index.htm', 'finalLink': 'https://www.sec.gov/Archives/edgar/data/1018724/0001018

In [73]:
import requests

api_key = ''
url = f'https://financialmodelingprep.com/api/v3/quote/AAPL?apikey={api_key}'

response = requests.get(url)

if response.status_code == 200:
    print("API key is working.")
    print(response.json())
else:
    print("Failed to retrieve data. Check your API key and try again.")
    print(response.text)


API key is working.
[{'symbol': 'AAPL', 'name': 'Apple Inc.', 'price': 228.88, 'changesPercentage': -2.5296, 'change': -5.94, 'dayLow': 226.64, 'dayHigh': 231.4599, 'yearHigh': 237.23, 'yearLow': 164.08, 'marketCap': 3509668808000, 'priceAvg50': 203.5336, 'priceAvg200': 186.58105, 'exchange': 'NASDAQ', 'volume': 55692237, 'avgVolume': 68137873, 'open': 229.45, 'previousClose': 234.82, 'eps': 6.43, 'pe': 35.6, 'earningsAnnouncement': '2024-08-01T20:00:00.000+0000', 'sharesOutstanding': 15334100000, 'timestamp': 1721246401}]
