<a href="https://colab.research.google.com/github/Joel-Mk/Financial-Report-Market-Insights-Generator/blob/main/Financial_Report.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# financial_rag_demo.py

import os
import yfinance as yf
import requests
from PyPDF2 import PdfReader
from openai import OpenAI
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer



In [None]:
OPENAI_API_KEY = "your_api_key_here"
MODEL_NAME = "text-embedding-3-small"
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

client = OpenAI()
embedder = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
def download_sec_report():
    return "sample_report.pdf"


In [None]:
def parse_pdf_to_chunks(pdf_path, chunk_size=300):
    reader = PdfReader(pdf_path)
    full_text = ""
    for page in reader.pages:
        full_text += page.extract_text() + "\n"
    words = full_text.split()
    chunks = [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
    return chunks


In [None]:
def create_faiss_index(chunks):
    embeddings = embedder.encode(chunks)
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings))
    return index, chunks

In [None]:
def retrieve_chunks(query, index, chunks, top_k=3):
    q_emb = embedder.encode([query])
    D, I = index.search(np.array(q_emb), top_k)
    return [chunks[i] for i in I[0]]

In [None]:
def ask_llm(query, context):
    prompt = f"Use the following context to answer the question:\n\n{context}\n\nQuestion: {query}"
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

In [None]:
def get_stock_summary(ticker):
    stock = yf.Ticker(ticker)
    hist = stock.history(period="1mo")
    return f"{ticker} latest close: {hist['Close'][-1]:.2f} USD"

In [None]:
if __name__ == "__main__":

    pdf_file = download_sec_report_dummy()


    chunks = parse_pdf_to_chunks(pdf_file)

    index, chunks = create_faiss_index(chunks)


    query = "Summarize the company's revenue performance"
    relevant_chunks = retrieve_chunks(query, index, chunks)


    answer = ask_llm(query, "\n".join(relevant_chunks))

    stock_info = get_stock_summary("AAPL")

    print("\n--- LLM Answer ---\n", answer)
    print("\n--- Stock Summary ---\n", stock_info)