# üöÄ Financial Report Analyzer (PDF Edition)

This notebook analyzes quarterly financial reports and generates a professional **PDF Report**, CSV, and Summary.

### Instructions
1. Put your PDF files (Q1, Q2, etc.) into a folder (e.g., `data/Bank_A`).
2. Run the cells below.
3. Enter your API Key and Folder Name when prompted.

In [None]:
# 1. Install Required Libraries
!pip install pdfplumber matplotlib pandas fpdf langchain-google-genai langchain-core

In [None]:
# 2. Imports
import pdfplumber
import json
import os
import getpass
import pandas as pd
import matplotlib.pyplot as plt
from fpdf import FPDF
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage

In [None]:
# 3. Logic Class (EarningsAnalyzer)
class EarningsAnalyzer:
    def __init__(self, api_key):
        self.llm = ChatGoogleGenerativeAI(
            model="gemini-2.5-flash", 
            google_api_key=api_key,
            temperature=0
        )

    def extract_text(self, file_path):
        """Reads the ENTIRE PDF from a local path."""
        text = ""
        try:
            with pdfplumber.open(file_path) as pdf:
                for page in pdf.pages:
                    extract = page.extract_text()
                    if extract: text += extract
            return text
        except Exception as e:
            print(f"‚ùå Error reading {file_path}: {e}")
            return None

    def clean_json(self, raw_output):
        text = raw_output.replace("```json", "").replace("```", "").strip()
        try:
            return json.loads(text)
        except:
            return None

    ### this is the section to update to pick out specific parts of the report
    ### try this out with various versions
    # Steps to Customize It
    # Locate the "REQUIRED JSON STRUCTURE" section in your code.
    # Add your line: e.g., "number_of_employees": 0,.
    # Run the cell to update the class definition.
    # Re-run the analysis loop. The results dictionary will now include your new data points for every quarter.
    #‚ö†Ô∏èImportant Note on "Missing Data"
    # Not every quarterly report contains every data point.
    # Example: "Employee Count" (Headcount) is often only reported in the Annual (Q4) report, not in Q1, Q2, or Q3.
    # Result: If you ask for it in Q1, the AI will likely return 0 or null. You can handle this in your summary by checking if value > 0.
    ###
    ###
    
    def analyze_full_report(self, text, q_name):
        prompt = f"""
        You are a financial analyst. Extract data from this {q_name} report.
        
        CRITICAL RULES:
        1. Ignore "Year Ended" columns. ONLY use "Three Months Ended" (Quarterly).
        2. Return ONLY a valid JSON object. No intro text.
        3. If a data point is not mentioned, return 0.
        
        REQUIRED JSON STRUCTURE:
        {{
            "quarterly_revenue_bn": 0.0,
            "eps": 0.0,
            "net_interest_income_millions": 0,
            "dividend_per_share": 0.0,
            "assets_under_supervision_bn": 0.0,
            "total_headcount": 0,
            "write_offs_millions": 0.0,
            "credit_loss_provisions_millions": 0.0,
            "operating_expenses_bn": 0.0
        }}
        
        REPORT TEXT:
        {text}
        """
        return self.llm.invoke([HumanMessage(content=prompt)]).content

    def generate_summary(self, data):
        context = json.dumps(data, indent=2)
        prompt = f"""
        Write a professional executive summary for these quarterly results.
        Focus on Revenue Trend, Headcount changes, and Write-offs/Risk.
        
        Data: {context}
        """
        return self.llm.invoke([HumanMessage(content=prompt)]).content

    def save_to_pdf(self, folder_path, report_name, summary_text, results):
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=12)
        
        # Title
        pdf.set_font("Arial", 'B', 16)
        pdf.cell(200, 10, txt=f"Financial Analysis: {report_name}", ln=True, align='C')
        pdf.ln(10)
        
        # Executive Summary Section
        pdf.set_font("Arial", 'B', 14)
        pdf.cell(200, 10, txt="Executive Summary", ln=True, align='L')
        pdf.set_font("Arial", size=11)
        pdf.multi_cell(0, 7, txt=summary_text)
        pdf.ln(10)
        
        # Data Table Section
        pdf.set_font("Arial", 'B', 14)
        pdf.cell(200, 10, txt="Quarterly Metrics", ln=True, align='L')
        pdf.ln(5)
        
        # Create Table Header
        pdf.set_font("Arial", 'B', 10)
        quarters = sorted(results.keys())
        metrics = list(results[quarters[0]].keys())
        
        # Header Row
        pdf.cell(60, 10, "Metric", 1)
        for q in quarters:
            pdf.cell(30, 10, q, 1)
        pdf.ln()
        
        # Data Rows
        pdf.set_font("Arial", size=10)
        for metric in metrics:
            # Clean up metric name (e.g., 'quarterly_revenue_bn' -> 'Revenue Bn')
            clean_metric = metric.replace("_", " ").title()
            pdf.cell(60, 10, clean_metric, 1)
            for q in quarters:
                val = str(results[q].get(metric, 0))
                pdf.cell(30, 10, val, 1)
            pdf.ln()

        # Save
        filename = os.path.join(folder_path, f"{report_name}_Report.pdf")
        pdf.output(filename)
        return filename

In [None]:
# 4. Configuration
GOOGLE_API_KEY = getpass.getpass("üîë Enter your Google API Key: ")
folder_path = input("üìÇ Enter the folder path containing PDFs (e.g., my_financial_reports): ").strip()
report_name = input("üìù Enter a name for this analysis output (e.g., Bank_A_2025): ").strip()

In [None]:
# 5. Run Analysis & Generate PDF
if os.path.isdir(folder_path):
    files_to_process = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.pdf')]
    print(f"\n‚úÖ Found {len(files_to_process)} PDF files in '{folder_path}'")
else:
    print(f"‚ùå Error: Folder '{folder_path}' does not exist.")
    files_to_process = []

if files_to_process and GOOGLE_API_KEY:
    analyzer = EarningsAnalyzer(GOOGLE_API_KEY)
    results = {}

    print("üöÄ Starting Analysis...")
    for filename in files_to_process:
        q_name = os.path.basename(filename).replace(".pdf", "")
        print(f"   üìÑ Analyzing {q_name}...")
        
        text = analyzer.extract_text(filename)
        if text:
            try:
                raw_json = analyzer.analyze_full_report(text, q_name)
                data = analyzer.clean_json(raw_json)
                if data:
                    results[q_name] = data
            except Exception as e:
                print(f"      ‚ùå Error: {e}")

    if results:
        # 1. Save CSV
        df = pd.DataFrame(results).T
        csv_path = os.path.join(folder_path, f"{report_name}.csv")
        df.to_csv(csv_path)
        print(f"\n‚úÖ CSV Saved: {csv_path}")
        
        # 2. Generate Summary
        print("   üìù Generating Summary...")
        summary_text = analyzer.generate_summary(results)
        
        # 3. Save PDF Report
        print("   üìÑ Generating PDF Report...")
        try:
            pdf_path = analyzer.save_to_pdf(folder_path, report_name, summary_text, results)
            print(f"‚úÖ PDF Report Saved: {pdf_path}")
        except Exception as e:
            print(f"   ‚ùå PDF Error: {e}")
            
        # Display Data
        display(df)

In [None]:
# 6. Visualizations
if results:
    quarters = sorted(results.keys())
    revs = [results[q].get('quarterly_revenue_bn', 0) for q in quarters]
    headcount = [results[q].get('total_headcount', 0) for q in quarters]
    write_offs = [results[q].get('write_offs_millions', 0) for q in quarters]
    expenses = [results[q].get('operating_expenses_bn', 0) for q in quarters]

    fig, axs = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle(f'Financial Dashboard: {report_name}', fontsize=16)

    axs[0, 0].bar(quarters, revs, color='#4285F4')
    axs[0, 0].set_title('Revenue ($bn)')
    
    axs[0, 1].bar(quarters, expenses, color='#EA4335')
    axs[0, 1].set_title('Operating Expenses ($bn)')

    axs[1, 0].plot(quarters, headcount, marker='o', color='#34A853', linewidth=2)
    axs[1, 0].set_title('Total Headcount')
    axs[1, 0].grid(True, alpha=0.3)

    axs[1, 1].bar(quarters, write_offs, color='#FBBC05')
    axs[1, 1].set_title('Write-offs ($m)')

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()