In [None]:
from src.custom_agents.planner_agent import PlannerAgent

planner_agent = PlannerAgent()


In [None]:
await planner_agent.execute("Make a SWOT analysis for apple Inc.")

In [None]:
from src.custom_agents.base import BaseAgent
import os 
agent = BaseAgent("test bot", "", os.getenv(""))
await agent.execute("Make a SWOT analysis for apple Inc.")

In [None]:
import os
from dotenv import load_dotenv
from openai import AzureOpenAI, AsyncAzureOpenAI

_ = load_dotenv()

client = AsyncAzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)

In [None]:
from agents import Agent, Runner, OpenAIChatCompletionsModel

agent = Agent(
    name="Assistant",
    instructions="You are a helpful assistant",
    model=OpenAIChatCompletionsModel(
        model=os.getenv("GPT4O_MINI_DEPLOYMENT"),
        openai_client=client,
    )
)

In [None]:
result = await Runner.run(
    agent,
    "Make a SWOT analysis for apple Inc.",
)

In [None]:
result.raw_responses

In [None]:
from custom_agents.swot_agent import SWOTAgent

planner_agent = SWOTAgent()
resutl = await planner_agent.execute("Make a SWOT analysis for apple Inc.")

In [None]:
for output in resutl.final_output:
    print(output)

In [None]:
from custom_agents.base_agent import BaseAgent

base_agent = BaseAgent(
    name="Chatbot",
)

In [None]:
result = await base_agent.execute("Wie alt wurde Albert Einstein am 14. März 1879?")

In [None]:
result.final_output

In [None]:
# from dotenv import load_dotenv
# import os
from manager import SWOTAnalysisManager

# load_dotenv()

# print(os.getenv("O3_MINI_DEPLOYMENT"))

swot_manager = SWOTAnalysisManager()

In [None]:
result = await swot_manager.run("Make a very detailed SWOT analysis for SAP and use data from 2025.")

# Baue den SWOTAnalysisManager

## Aufbau
1. Planner
2. Searcher
3. SWOT

### 1. Planner

In [None]:
from agents import Runner

query = "Make a very detailed SWOT analysis for SAP and use data from 2025."

In [None]:
from custom_agents.planner_agent import PlannerAgent, WebSearchPlan, WebSearchItem
from openai.types.responses import ResponseTextDeltaEvent

planner_agent = PlannerAgent()
planner_result = planner_agent.execute_streamed(f"Query: {query}")

In [None]:
for plan in planner_result.final_output_as(WebSearchPlan).searches:
    print(f"{plan.query} ----> {plan.reason}")

### 2. Search

In [None]:
from custom_agents.search_agent import SearchAgent

search_agent = SearchAgent()

In [None]:
async def search(item: WebSearchItem):
    input_data = f"Search term: {item.query}\nReason: {item.reason}\n"
    result = await search_agent.execute(input_data)
    return result.final_output
    

In [None]:
import asyncio
from agents import custom_span

tasks = [asyncio.create_task(search(item)) for item in planner_result.final_output_as(WebSearchPlan).searches]

with custom_span("Search for all items"):
    # await asyncio.gather(*tasks)
    search_resutls = []
    for task in asyncio.as_completed(tasks):
        result = await task
        search_resutls.append(result)

In [None]:
search_resutls

### 3. SWOT Analysis

In [None]:
from custom_agents.swot_agent import SWOTAgent

swot_agent = SWOTAgent()

input_data = f"Original query: {query}\nSummarized search results: {search_resutls}"

swot_result = await swot_agent.execute(input_data, max_turns=20)

In [None]:
swot_result.final_output.swot_summary

### 4. Test New Manager Class

In [None]:
from manager import SWOTAnalysisManager

query = "Make a very detailed SWOT analysis for SAP and use data from 2025."

swot_manager = SWOTAnalysisManager()
swot_result = await swot_manager.execute_streamed(query)

In [None]:
for event in swot_result.stream_events():
    if isinstance(event, ResponseTextDeltaEvent):
        print(event.delta.content, end="")
    else:
        print(event)

# New Idea
## Aufbau
1. Planner ohne Stream
2. Search ohne Stream
3. SWOT mit Stream

In [None]:
from manager import SWOTAnalysisManager

agent = SWOTAnalysisManager()

query = "Make a SWOT analysis for Deutsche Telekom"

In [None]:
planner_result = await agent.planner_agent.execute(f"Query: {query}")

In [None]:
n = 1
planner_answer = ""
for item in planner_result.final_output.searches:
    planner_answer += f"{n}. Search Query: {item.query} <--> Reason {item.reason}\n"
    n += 1

In [None]:
planner_answer 

# 10k-Filings Retrieval

In [None]:
# 1. Import the necessary functions from edgartools
from edgar import *

# 2. Tell the SEC who you are
set_identity("mike@indigo.com")

# 3. Start using the library
filings = get_filings()

In [None]:
# Competitor of SAP
competitors = {
    "Competitors_by_Sector": {
        "Apple Inc.": "AAPL",
        "Microsoft Corporation": "MSFT",
        "NVIDIA Corporation": "NVDA",
        "Broadcom Inc.": "AVGO",
        "Oracle Corporation": "ORCL",
        "Salesforce, Inc.": "CRM",
        "Cisco Systems, Inc.": "CSCO",
        "International Business Machines Corporation": "IBM",
        "Palantir Technologies Inc.": "PLTR",
        "Accenture plc": "ACN"
    },
    "Competitors_by_Industry": {
        "Salesforce, Inc.": "CRM",
        "Intuit Inc.": "INTU",
        "ServiceNow, Inc.": "NOW",
        "Adobe Inc.": "ADBE",
        "Uber Technologies, Inc.": "UBER",
        "Automatic Data Processing, Inc.": "ADP",
        "AppLovin Corporation": "APP",
        "Cadence Design Systems, Inc.": "CDNS",
        "MicroStrategy Incorporated": "MSTR",
        "Workday, Inc.": "WDAY"
    }
}

### Retrieve latest 3 10k forms

In [None]:
# 1. Import the necessary functions from edgartools
from edgar import set_identity, get_filings, Company

def retrieve_tenk_forms(company_ticker: str, top_k: int, path: str):
    set_identity("mike@indigo.com")
    # filings = get_filings()
    company = Company(company_ticker)
    tenk_filings = company.get_filings(form="10-K")

    i = 1
    for filing in tenk_filings:
        if i <= top_k:
            filing_name = filing.attachments[1].document.split(sep=".")[0]
            filing.attachments[1].download(path=f"{path}/{filing_name}.html")
        i += 1
    

In [None]:
top_k = 3
for company_name, ticker in competitors["Competitors_by_Sector"].items():
    print(f"Safe top {top_k} 10k forms for {company_name}")
    retrieve_tenk_forms(ticker, top_k, "./10k-data")


In [None]:
top_k = 3
for company_name, ticker in competitors["Competitors_by_Industry"].items():
    print(f"Safe top {top_k} 10k forms for {company_name}")
    retrieve_tenk_forms(ticker, top_k, "./10k-data")

In [None]:
retrieve_tenk_forms('SAP', 4, "./10k-data")

In [None]:
company = Company("CRM") # Sales

In [None]:
tenk_filings = company.get_filings(form="10-K")

In [None]:
i = 1
for filing in tenk_filings:
    if i <= 3:
        filing_name = filing.attachments[1].document.split(sep=".")[0]
        filing.attachments[1].download(path=f"./10k-data/{filing_name}.html")
    i += 1

In [None]:
filings.attachments[1].download("./")

In [None]:
filing = filings.attachments[1]

In [None]:
type(filing)

In [None]:
type(filings)

# 10k Filings Chunking, Embedding and Storing

### SAP competitors dict

In [None]:
# Competitor of SAP
competitors = {
    "Competitors_by_Sector": {
        "Apple Inc.": "AAPL",
        "Microsoft Corporation": "MSFT",
        "NVIDIA Corporation": "NVDA",
        "Broadcom Inc.": "AVGO",
        "Oracle Corporation": "ORCL",
        "Salesforce, Inc.": "CRM",
        "Cisco Systems, Inc.": "CSCO",
        "International Business Machines Corporation": "IBM",
        "Palantir Technologies Inc.": "PLTR",
        "Accenture plc": "ACN"
    },
    "Competitors_by_Industry": {
        "Salesforce, Inc.": "CRM",
        "Intuit Inc.": "INTU",
        "ServiceNow, Inc.": "NOW",
        "Adobe Inc.": "ADBE",
        "Uber Technologies, Inc.": "UBER",
        "Automatic Data Processing, Inc.": "ADP",
        "AppLovin Corporation": "APP",
        "Cadence Design Systems, Inc.": "CDNS",
        "MicroStrategy Incorporated": "MSTR",
        "Workday, Inc.": "WDAY"
    }
}

### Parse HTML file

In [None]:
from bs4 import BeautifulSoup, Tag

def split_into_pages(html_content):
    """
    Splits the HTML content into pages using BeautifulSoup.
    
    Looks for an <hr> tag with a style attribute that includes 
    'page-break-after:always' and uses these as page delimiters.
    """
    soup = BeautifulSoup(html_content, 'html.parser')
    pages = []
    current_page_elements = []

    # Try to get the body if available; else use the entire soup
    container = soup.body if soup.body else soup

    # Iterate over all top-level elements in the container
    for element in container.contents:
        # Check if the element is a Tag and whether it is a page break
        if isinstance(element, Tag) and element.name == 'hr':
            style = element.get('style', '')
            if 'page-break-after:always' in style:
                # Page break encountered; add the current page to the pages list
                page_html = ''.join(str(item) for item in current_page_elements).strip()
                if page_html:
                    pages.append(page_html)
                # Reset the current page elements for the next page
                current_page_elements = []
                continue
        
        # Otherwise, add the element to the current page elements
        current_page_elements.append(element)

    # Append the last page (if any content remains)
    page_html = ''.join(str(item) for item in current_page_elements).strip()
    if page_html:
        pages.append(page_html)
    return pages

def process_pages(pages: list):
    processed_pages = []
    for page in pages:
        soup = BeautifulSoup(page, "html.parser")
        text = soup.get_text()
        if "Table of Contents" == text[:len("Table of Contents")]:
            processed_pages.append(text[len("Table of Contents"):])
        else:
            processed_pages.append(text)
    return processed_pages

def process_file(file_path):
    """
    Reads an HTML file, splits its content into pages using BeautifulSoup,
    and returns a list containing the HTML of each page.
    """
    try:
        with open(file_path, encoding="utf-8") as f:
            html_content = f.read()
        pages = split_into_pages(html_content)
        processed_pages = process_pages(pages)
        return pages, processed_pages
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return []

### Process HTML files

In [None]:
# process_aapl_file.py

import os
from bs4 import BeautifulSoup
from ingest.chunking import chunk_text
from ingest.embedding import generate_embedding
from vectorstore.azure_search_client import upload_documents_to_index

def process_html_file(file_path: str, company_name: str, max_chunk_size: int = 20000):
    """
    Reads an HTML file, extracts text using BeautifulSoup,
    splits it into smaller chunks, computes embeddings for each chunk,
    and returns a list of documents ready to be uploaded.
    """
    # # Read the HTML file from disk
    # with open(file_path, 'r', encoding='utf-8') as f:
    #     html_content = f.read()
    
    # # Use BeautifulSoup to extract plain text
    # soup = BeautifulSoup(html_content, "html.parser")
    # text = soup.get_text(separator="\n")
    
    # # Split the extracted text into chunks (default: chunk_size=1000, overlap=100)
    # chunks = chunk_text(text)
    _, chunks = process_file(file_path)
    file_name = file_path.split("/")[-1][:-5]
    print(f"Processing file {file_name}")
    year = int(file_name[-8:-4])
    documents = []
    token_text_ratios = []
    # Process each chunk: compute embedding and create a document structure
    for index, chunk in enumerate(chunks, start=1):
        # Compute the embedding for this text chunk
        print(f"-- Processing chunk {index}/{len(chunks)} for {file_name}")
        if len(chunk) >= max_chunk_size:
            for i in range(0, len(chunk), max_chunk_size):
                sub_chunk = chunk[i:i+max_chunk_size]
                embedding, token_text_ratio = generate_embedding(sub_chunk)
                # Create a unique identifier for the chunk
                doc_id = f"{file_name}_{index}_{i//max_chunk_size}"
                document = {
                    "id": doc_id,
                    "data": sub_chunk,
                    "dataVector": embedding,
                    # Additional metadata can be added below as needed:
                    "metadata": f"10k filing page {index} for {company_name}",
                    "year": year,
                    "companyname": company_name,
                    "tenkpage": index
                }
                documents.append(document)
                token_text_ratios.append(token_text_ratio)
        else:
            embedding, token_text_ratio = generate_embedding(chunk)
            # Create a unique identifier for the chunk
            doc_id = f"{file_name}_{index}"
            document = {
                "id": doc_id,
                "data": chunk,
                "dataVector": embedding,
                # Additional metadata can be added below as needed:
                "metadata": f"10k filing page {index} for {company_name}",
                "year": year,
                "companyname": company_name,
                "tenkpage": index
            }
            documents.append(document)
            token_text_ratios.append(token_text_ratio)
    
    return documents, token_text_ratios

In [None]:
import os

# Path to the folder containing the HTML files
folder_path = "./10k-data"

def find_html_files(folder_path: str, ticker: str):

    # Iterate through all files in the folder and find those containing 'aapl'
    files = [
        file_name for file_name in os.listdir(folder_path)
        if file_name.endswith(".html") and ticker in file_name.lower()
    ]
    return files

In [None]:
# Read the file and create a list of entries
with open("./processed_files.txt", "r") as file:
    processed_files_list = [line.strip() for line in file.readlines()]

# Print the list to verify
print(processed_files_list)

In [None]:
# file_path = "./10k-data/avgo-20231029.html"
folder_path = "./10k-data"
# file_name = file_path.split("/")[-1][:-5]
# process_html_file("./10k-data/wday-20230131.html", "Workday, Inc.")

file_token_text_ratios = {}
for key, sap_competitors in competitors.items():
    print(f"Processing {key}")
    for competitor, ticker in sap_competitors.items():
        files = find_html_files(folder_path, ticker.lower())
        for file in files:
            if file in processed_files_list:
                print(f"File {file} already processed. Skipping...")
                continue
            file_path = folder_path + "/" + file
            documents, token_text_rations = process_html_file(file_path, competitor, 20000)
            file_token_text_ratios[file_path] = token_text_rations
            print(f"documents: {documents}")
            try:
                upload_documents_to_index(documents=documents)
                with open("./processed_files.txt", "a") as processed_file:
                    processed_file.write(file + "\n")
            except Exception as e:
                print(f"Pass file {file_path}")


In [None]:
# Path to your local HTML file
file_path = "aapl-20240928.html"

# Process the HTML file into chunked documents with embeddings
documents = process_html_file(file_path)
print(f"Processed {len(documents)} chunks from {file_path}.")

# Upload the processed documents to your Azure Cognitive Search index
upload_documents_to_index(documents)
print("Documents uploaded to Azure Search index.")

In [None]:
import os

# Path to the folder containing the HTML files
folder_path = "./10k-data"

# Iterate through all files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith(".html"):  # Check if the file is an HTML file
        file_path = os.path.join(folder_path, file_name)
        print(f"Processing file: {file_path}")
        # Add your processing logic here

In [None]:
int(file_name[-8:-4])

# Read HTML files

In [None]:
from bs4 import BeautifulSoup

file_path = "./10k-data/crm-20250131.html"

with open(file_path, 'r', encoding='utf-8') as f:
    html_content = f.read()

# Use BeautifulSoup to extract plain text
soup = BeautifulSoup(html_content, "html.parser")
text = soup.get_text(separator="\n")

In [None]:
text

### Parse HTML by pages

In [None]:
from bs4 import BeautifulSoup, Tag

def split_into_pages(html_content):
    """
    Splits the HTML content into pages using BeautifulSoup.
    
    Looks for an <hr> tag with a style attribute that includes 
    'page-break-after:always' and uses these as page delimiters.
    """
    soup = BeautifulSoup(html_content, 'html.parser')
    pages = []
    current_page_elements = []

    # Try to get the body if available; else use the entire soup
    container = soup.body if soup.body else soup

    # Iterate over all top-level elements in the container
    for element in container.contents:
        # Check if the element is a Tag and whether it is a page break
        if isinstance(element, Tag) and element.name == 'hr':
            style = element.get('style', '')
            if 'page-break-after:always' in style:
                # Page break encountered; add the current page to the pages list
                page_html = ''.join(str(item) for item in current_page_elements).strip()
                if page_html:
                    pages.append(page_html)
                # Reset the current page elements for the next page
                current_page_elements = []
                continue
        
        # Otherwise, add the element to the current page elements
        current_page_elements.append(element)

    # Append the last page (if any content remains)
    page_html = ''.join(str(item) for item in current_page_elements).strip()
    if page_html:
        pages.append(page_html)
    return pages

def process_pages(pages: list):
    processed_pages = []
    for page in pages:
        soup = BeautifulSoup(page, "html.parser")
        text = soup.get_text()
        if "Table of Contents" == text[:len("Table of Contents")]:
            processed_pages.append(text[len("Table of Contents"):])
    return processed_pages

def process_file(file_path):
    """
    Reads an HTML file, splits its content into pages using BeautifulSoup,
    and returns a list containing the HTML of each page.
    """
    try:
        with open(file_path, encoding="utf-8") as f:
            html_content = f.read()
        pages = split_into_pages(html_content)
        processed_pages = process_pages(pages)
        return pages, processed_pages
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return []

In [None]:
file_path = "./10k-data/crm-20250131.html"
# file_path = "./10k-data/wday-20250131.html"
pages, process_pages = process_file(file_path)

In [None]:
for page in pages:
    soup = BeautifulSoup(page, "html.parser")
    text = soup.get_text()#[len("Table of Contents"):]
    # text[:len("Table of Contents")]
    if "Table of Contents" == text[:len("Table of Contents")]:
        print(text[len("Table of Contents"):])

### Parse PDF

In [None]:
#!/usr/bin/env python3
import os
import argparse
from PyPDF2 import PdfReader
import numpy as np

def parse_pdf(file_path, start_string="Integrated  Report  2024   \nTo Our  \nStakeholders  Consolidated Group \nManagement Report  Consolidated Financial \nStatements IFRS  Additional  \nInformation  \n \n"):
    """
    Extract text from a PDF file using PyPDF2.
    Returns a string containing the concatenated text of all pages.
    """
    try:
        reader = PdfReader(file_path)
        text = ""
        pages = []
        for page in reader.pages:
            pages.append(page.extract_text().replace(start_string, ""))
            # page_text = page.extract_text()
            # if page_text:
            #     text += page_text + "\n"
        return pages
    except Exception as e:
        print(f"Error parsing PDF {file_path}: {e}")
        return ""

In [None]:
from ingest.embedding import generate_embedding
from vectorstore.azure_search_client import upload_documents_to_index

def process_pdf_file(file_path: str, company_name: str, max_chunk_size: int = 20000):
    """
    Reads an pdf file, extracts text,
    splits it into smaller chunks, computes embeddings for each chunk,
    and returns a list of documents ready to be uploaded.
    """
    chunks = parse_pdf(file_path)
    file_name = file_path.split("/")[-1][:-4]
    year = int(file_name[:4])
    print(f"Processing file {file_name}")
    documents = []
    token_text_ratios = []
    # Process each chunk: compute embedding and create a document structure
    for index, chunk in enumerate(chunks, start=1):
        # Compute the embedding for this text chunk
        print(f"-- Processing chunk {index}/{len(chunks)} for {file_name}")
        if len(chunk) >= max_chunk_size:
            for i in range(0, len(chunk), max_chunk_size):
                sub_chunk = chunk[i:i+max_chunk_size]
                embedding, token_text_ratio = generate_embedding(sub_chunk)
                # Create a unique identifier for the chunk
                doc_id = f"{file_name}_{index}_{i//max_chunk_size}"
                document = {
                    "id": doc_id,
                    "data": sub_chunk,
                    "dataVector": embedding,
                    # Additional metadata can be added below as needed:
                    "metadata": f"10k filing page {index} for {company_name}",
                    "year": year,
                    "companyname": company_name,
                    "tenkpage": index
                }
                documents.append(document)
                token_text_ratios.append(token_text_ratio)
        else:
            embedding, token_text_ratio = generate_embedding(chunk)
            # Create a unique identifier for the chunk
            doc_id = f"{file_name}_{index}"
            document = {
                "id": doc_id,
                "data": chunk,
                "dataVector": embedding,
                # Additional metadata can be added below as needed:
                "metadata": f"10k filing page {index} for {company_name}",
                "year": year,
                "companyname": company_name,
                "tenkpage": index
            }
            documents.append(document)
            token_text_ratios.append(token_text_ratio)
    
    return documents, token_text_ratios

In [None]:
file_path = "./10k-data/2023-SAP-Integrated-Report.pdf"

documents, token_text_rations = process_pdf_file(file_path, 'SAP', 20000)
print(f"documents: {documents} and token_text_rations: {token_text_rations}")
upload_documents_to_index(documents=documents)

In [None]:
mean_token_text_ratio = np.mean(token_text_rations)
print(f"Mean token text ratio: {mean_token_text_ratio}")

# RAG Bot for 10k Forms

## RAG

In [None]:
import os
from custom_agents.config import settings
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient

llm = AzureChatOpenAI(
    openai_api_version=settings.azure_openai_api_version,
    openai_api_key=settings.azure_openai_api_key,
    azure_endpoint=settings.azure_openai_endpoint,
    azure_deployment=settings.gpt4o_mini_deployment,
    temperature=0,
    max_tokens=500,
)

credential = AzureKeyCredential(settings.azure_search_api_key)
search_client = SearchClient(endpoint=settings.azure_search_service_endpoint,
                      index_name=settings.azure_search_index_name,
                      credential=credential)


In [None]:
from typing import List
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever  

class AzureSearchRetriever(BaseRetriever):
    # declare these as Pydantic fields:
    search_client: SearchClient
    k: int = 5

    def _get_relevant_documents(self, query: str) -> List[Document]:
        results = self.search_client.search(search_text=query, top=self.k)
        docs: List[Document] = []
        for r in results:
            docs.append(
                Document(
                    page_content=r["data"],
                    metadata={"company": r["companyname"], "year": r["year"], "tenkpage": r["tenkpage"], "metadata": r["metadata"]},
                )
            )
        return docs


retriever = AzureSearchRetriever(search_client=search_client, k=5)

In [None]:
from langchain_core.prompts import PromptTemplate 

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a company analysis research planner. Given a request for SWOT analysis, 
you are given a search query to retrieve chunks of 10k filings, which are stored in a vector storage.
Provide a summary of the search results and then answer the question based on the context.
Include financial data if available in the context. 
Identify the company name and year from the context and state them in the answer.

Context:
{context}

Question: {question}
""",
)


In [None]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",               # or "map_reduce", "refine", etc.
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True,
)

In [None]:
query = "Apple Total Assets 2023 Balance Sheet"
result = qa_chain.invoke({"query": query})

print("▶️ Answer:\n", result["result"])
print("\n📑 Sources:")
for doc in result["source_documents"]:
    print(f"- {doc.metadata['company']} ({doc.metadata['year']}, {doc.metadata['tenkpage']}): {doc.page_content[:500]}…")


### Planner LLM

In [None]:
import os
from custom_agents.config import settings
from langchain_openai import AzureChatOpenAI
from langchain.chains import LLMChain

planner_llm = AzureChatOpenAI(
    openai_api_version=settings.azure_openai_api_version,
    openai_api_key=settings.azure_openai_api_key,
    azure_endpoint=settings.azure_openai_endpoint,
    azure_deployment=settings.gpt4o_mini_deployment,
    temperature=0,
    max_tokens=500,
)

planner_prompt = PromptTemplate(
    input_variables=["user_query"],
    template="""
You are a company analysis research planner. Given a request for SWOT analysis, 
generate document search queries that help to gather comprehensive information on the company. 
You are generating search queries to search in 10k filings. 
Think about what financial information is relevant to the SWOT analysis and how it can be extracted from the 10k filings.
Provide between 5 and 15 detailed and relevant search queries, each clearly justified. 
The queries should be specific to the company and the year of the 10k filings.
The queries should ALWAYS be in the format WITHOUT any justification: ['search query 1', 'search query 2', ...]
The queries should be relevant to the following user query:
{user_query}
""",
)

planner = planner_prompt | planner_llm

In [None]:
result = planner.invoke({"user_query": "Make a SWOT analysis for Apple in 2025."})

In [None]:
import ast 
search_queries = ast.literal_eval(result.content)

for search_query in search_queries:
    result = qa_chain.invoke({"query": search_query})
    print(f"Query: {result['query']} - Result: {result['result']}")
    for doc in result["source_documents"]:
        print(f"- {doc.metadata['company']} ({doc.metadata['year']}, {doc.metadata['tenkpage']}): {doc.page_content[:500]}…")