In [15]:
!pip install sec-api



In [26]:
import torch
from transformers import AutoTokenizer, T5ForConditionalGeneration
from sec_api import QueryApi, ExtractorApi

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

SEC_API_KEY = os.environ.get("SEC_API_KEY")
if not SEC_API_KEY:
    raise RuntimeError("SEC_API_KEY not found in .env file. Please add it to your .env file.")

TICKERS = ['AAPL', 'MSFT', 'JPM', 'PG', 'JNJ', 'XOM']

In [28]:
print("Loading T5 model and tokenizer...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
print("Model loaded successfully onto GPU.")

Loading T5 model and tokenizer...
Using device: cuda
Model loaded successfully onto GPU.


In [30]:
all_summaries = {}
for ticker in TICKERS:
    try:
        print(f"\n--- Processing {ticker} ---")

        queryApi = QueryApi(api_key=SEC_API_KEY)
        query = {
          "query": { "query_string": { "query": f"ticker:{ticker} AND formType:\"10-K\"" } },
          "from": "0", "size": "1", "sort": [{ "filedAt": { "order": "desc" } }]
        }
        filings = queryApi.get_filings(query)
        filing_url = filings['filings'][0]['linkToFilingDetails']

        extractorApi = ExtractorApi(SEC_API_KEY)
        mda_text = extractorApi.get_section(filing_url=filing_url, section="7", return_type="text")


        summary_input_text = "summarize: " + mda_text[:4000]
        inputs = tokenizer.encode(summary_input_text, return_tensors="pt", max_length=1024, truncation=True).to(device)
        summary_ids = model.generate(inputs, max_length=2500, min_length=1000, length_penalty=2.0, num_beams=4, early_stopping=True)
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        print(f"✅ SUMMARY FOR {ticker}:")
        print(summary)
        all_summaries[ticker] = summary

    except Exception as e:
        error_message = f"Could not process {ticker}. Reason: {e}"
        print(error_message)
        all_summaries[ticker] = "Summary could not be generated due to document complexity or API error."



--- Processing AAPL ---
✅ SUMMARY FOR AAPL:
the Company&#8217;s fiscal year is the 52- or 53-week period that ends on the last Saturday of September. the Company&#8217;s fiscal year is the 52- or 53-week period that ends on the last Saturday of September. the weakness in the yen relative to the U.S. dollar had an unfavorable year-over-year impact on Greater China net sales during 2024 compared to 2023 due primarily to higher net sales of Services. the weakness in &&&#8217&&&##8217.. 'the.. &#. &#. &#8217.... the...................................................................................................................................................................................................................................................................................................................................................................................................................................

--- Processing MSFT ---
✅ SUMMARY FOR MSFT:
the following Man