# SEC Filings with Mule Bot

This notebook demonstrates the different formats that can be used for a SEC filing and when it is used as input to an LLM.

In [20]:
import os
from pathlib import Path

os.chdir(Path.cwd().parent)

# References

- https://github.com/dgunning/edgartools
- https://py-xbrl.readthedocs.io/en/latest/usage.html
- https://github.com/john-friedman/datamule-python
- https://john-friedman.github.io/datamule-python/usage/mulebot.html

MuleBot uses gpt-4o-mini to chat with SEC filings
- https://github.com/john-friedman/datamule-python/blob/main/datamule/datamule/mulebot/mulebot.py

In [21]:
# from finmas.data.sec.tool import SECSemanticSearchTool
from IPython.display import Markdown, display
from IPython.core.display import HTML
from edgar import Company
from datamule import parse_textual_filing

In [22]:
TICKER = "NVDA"

In [23]:
filing = Company(TICKER).get_filings(form="10-Q").latest(1)

In [None]:
filing.document.url

## Markdown

In [25]:
with open(f"{TICKER}_filing_markdown", "w") as f:
    f.write(filing.markdown())

## JSON

In [26]:
json_content = parse_textual_filing(filing.document.url, return_type="json")

In [None]:
from pprint import pprint

pprint(json_content)

## JSON -> HTML

In [28]:
from datamule.filing_viewer.filing_viewer import json_to_html

In [None]:
html_from_json = json_to_html(json_content)
display(HTML(html_from_json))

## HTML -> TEXT

In [30]:
import html2text

In [None]:
h = html2text.HTML2Text()
h.ignore_tables = True

text_content = h.handle(html_from_json)

In [None]:
display(Markdown(text_content))