In [1]:
import sys
import os
import pandas as pd
from pprint import pprint

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

print("Project root:", PROJECT_ROOT)


Project root: c:\Users\ADMIN\Documents\My Documents\MyDataAnalysis\Financial statement analysis\financial-analysis-pipeline


In [2]:
from orchestrator.orchestrator import afap_run, ANALYSIS_PROFILES, AFAP_OUTPUT_KEYS


In [3]:
financials_df = pd.read_csv("../data/cleaned/financial_statements.csv")

financials_df.head()

Unnamed: 0,Company,Year,FS Category,FS Subcategory,Statement,Amount
0,Acme Manufacturing Ltd,2020,Assets,Current Assets,Balance Sheet,3109667
1,Acme Manufacturing Ltd,2020,Assets,Non-Current Assets,Balance Sheet,905812
2,Acme Manufacturing Ltd,2020,Equity,Equity,Balance Sheet,2152630
3,Acme Manufacturing Ltd,2020,Expenses,COGS,Income Statement,373114
4,Acme Manufacturing Ltd,2020,Expenses,Finance Costs,Income Statement,2929304


In [4]:
AIF_PROFILES = [
    "full_diagnostic",
    "solvency_focus",
    "liquidity_focus",
    "performance_focus",
    "risk_scan",
    "going_concern_screen"
]


In [5]:
def validate_output_contract(outputs: dict):
    missing = set(AFAP_OUTPUT_KEYS) - set(outputs.keys())
    assert not missing, f"Missing output keys: {missing}"


In [6]:
def validate_ai_interpretation(ai_output):
    assert isinstance(ai_output, list)
    assert len(ai_output) > 0
    for rec in ai_output:
        assert "Company" in rec
        assert "Year" in rec
        assert "interpretation" in rec


In [10]:
results_by_profile = {}

for profile_name in ANALYSIS_PROFILES.keys():
    print(f"\n--- Running profile: {profile_name} ---")

    # Run AFAP with mock AI for fast validation
    outputs = afap_run(
        financials_df=financials_df,
        analysis_profile=profile_name,
        use_mock_ai=True  # avoids long API calls
    )

    # Validate that all expected keys are present
    try:
        validate_output_contract(outputs)
        print("✅ Output contract validated successfully.")
    except AssertionError as e:
        print("❌ Output contract validation failed:", e)

    # Validate AI interpretation format (even if mocked)
    try:
        validate_ai_interpretation(outputs["ai_interpretation"])
        print("✅ AI interpretation structure validated successfully.")
    except AssertionError as e:
        print("❌ AI interpretation validation failed:", e)

    # Store results for this profile
    results_by_profile[profile_name] = outputs

print("\n✅ All profiles executed successfully (mock AI)")



--- Running profile: full_diagnostic ---
✅ ratio_engine output validated successfully.
✅ trend_engine output validated successfully.
✅ cash_flow_engine output validated successfully.
✅ anomaly_efficiency_engine output validated successfully.
✅ solvency_engine output validated successfully.
✅ Output contract validated successfully.
✅ AI interpretation structure validated successfully.

--- Running profile: solvency_focus ---
✅ ratio_engine output validated successfully.
✅ solvency_engine output validated successfully.
✅ Output contract validated successfully.
✅ AI interpretation structure validated successfully.

--- Running profile: liquidity_focus ---
✅ ratio_engine output validated successfully.
✅ trend_engine output validated successfully.
✅ cash_flow_engine output validated successfully.
✅ Output contract validated successfully.
✅ AI interpretation structure validated successfully.

--- Running profile: performance_focus ---
✅ ratio_engine output validated successfully.
✅ trend_en

In [7]:
AFAP_OUTPUT_KEYS = [
    "profile_used",
    "ratios",
    "trend",
    "cash_flow",
    "anomaly",
    "solvency",
    "composite_risk",
    "ai_interpretation"
]

# initialize outputs
outputs = {k: [] for k in AFAP_OUTPUT_KEYS if k != "profile_used"}
print(outputs)


{'ratios': [], 'trend': [], 'cash_flow': [], 'anomaly': [], 'solvency': [], 'composite_risk': [], 'ai_interpretation': []}


In [8]:
outputs["profile_used"] = "solvency_focus"
outputs["ai_interpretation"] = [{"Company": "TestCo", "Year": 2025, "interpretation": "MOCK"}]

# validate contract
missing = set(AFAP_OUTPUT_KEYS) - set(outputs.keys())
assert not missing, f"Missing output keys: {missing}"
print("Contract validation passed!")

Contract validation passed!


In [9]:
outputs = afap_run(
    financials_df=financials_df,
    analysis_profile="solvency_focus"
)

validate_output_contract(outputs)
print("Outputs keys:", outputs.keys())


✅ ratio_engine output validated successfully.
✅ solvency_engine output validated successfully.
Outputs keys: dict_keys(['ratios', 'trend', 'cash_flow', 'anomaly', 'solvency', 'composite_risk', 'ai_interpretation', 'profile_used'])


In [11]:
results_by_profile = {}

for profile_name in ANALYSIS_PROFILES.keys():
    print(f"\n--- Running profile: {profile_name} ---")

    # Run AFAP with real AI
    outputs = afap_run(
        financials_df=financials_df,
        analysis_profile=profile_name,
        use_mock_ai=False  # real LLM output
    )

    # Validate that all expected keys are present
    try:
        validate_output_contract(outputs)
        print("✅ Output contract validated successfully.")
    except AssertionError as e:
        print("❌ Output contract validation failed:", e)

    # Validate AI interpretation format
    try:
        validate_ai_interpretation(outputs["ai_interpretation"])
        print("✅ AI interpretation structure validated successfully.")
    except AssertionError as e:
        print("❌ AI interpretation validation failed:", e)

    # Optionally: print which engines actually ran
    engines_ran = [k for k in outputs.keys() if k in AFAP_OUTPUT_KEYS and outputs[k]]
    print("Engines executed:", engines_ran)

    # Store results for this profile
    results_by_profile[profile_name] = outputs

print("\n✅ All profiles executed successfully with real AI")



--- Running profile: full_diagnostic ---
✅ ratio_engine output validated successfully.
✅ trend_engine output validated successfully.
✅ cash_flow_engine output validated successfully.
✅ anomaly_efficiency_engine output validated successfully.
✅ solvency_engine output validated successfully.
✅ Output contract validated successfully.
✅ AI interpretation structure validated successfully.
Engines executed: ['ratios', 'trend', 'cash_flow', 'anomaly', 'solvency', 'composite_risk', 'ai_interpretation', 'profile_used']

--- Running profile: solvency_focus ---
✅ ratio_engine output validated successfully.
✅ solvency_engine output validated successfully.
✅ Output contract validated successfully.
✅ AI interpretation structure validated successfully.
Engines executed: ['ratios', 'solvency', 'ai_interpretation', 'profile_used']

--- Running profile: liquidity_focus ---
✅ ratio_engine output validated successfully.
✅ trend_engine output validated successfully.
✅ cash_flow_engine output validated suc

In [12]:
# Example: full_diagnostic profile
profile = "full_diagnostic"
outputs = results_by_profile[profile]

# Print the top-level keys
print("Output keys for profile:", outputs.keys())

# Print AI interpretation text for the first company/year
if outputs["ai_interpretation"]:
    print("\n--- AI Interpretation ---")
    print(outputs["ai_interpretation"][0]["interpretation"])


Output keys for profile: dict_keys(['ratios', 'trend', 'cash_flow', 'anomaly', 'solvency', 'composite_risk', 'ai_interpretation', 'profile_used'])

--- AI Interpretation ---
summary:
- This diagnostic reviews Acme Manufacturing Ltd for fiscal year 2020 using the provided ratios. The company shows strong gross and operating margins but material weaknesses in profitability after non-operating items and in coverage of interest expense. Leverage is near a conservative limit and returns are negative. The following conservative benchmark thresholds were applied for the purpose of highlighting metrics in this report: current_ratio < 1.5 (liquidity concern), quick_ratio < 1.5 (liquidity concern), debt_equity > 1.0 (elevated leverage), interest_coverage < 1.0 (insufficient earnings to cover interest), net_margin < 0 (loss), roa < 0 (negative asset returns), roe < 0 (negative equity returns). Metrics that exceed these conservative thresholds are explicitly highlighted below.
- Metrics exceeding 

# AFAP Orchestrator Validation Notebook
## Profile-by-Profile Simulation & Contract Assurance

### Purpose
This notebook validates the AFAP orchestrator across all supported
Analysis Interpretation Frameworks (AIFs).

Each profile is treated as an **independent client case**, ensuring:
- Correct engine selection
- Stable output contracts
- Profile-aware AI interpretation
- Reproducible, auditable execution

This notebook is intentionally structured to support:
- Engineering validation
- Audit defensibility
- Investor / stakeholder review


In [2]:
# ---------------------------------------------------------------
# Environment setup
# ---------------------------------------------------------------

import sys
import os
import pandas as pd
from pprint import pprint

# Ensure project root is on PYTHONPATH
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

print("Project root resolved to:", PROJECT_ROOT)


Project root resolved to: c:\Users\ADMIN\Documents\My Documents\MyDataAnalysis\Financial statement analysis\financial-analysis-pipeline


### Step 1: Import AFAP Orchestrator & Frozen Contracts

This cell imports:
- The AFAP orchestrator
- The frozen output contract
- All supported analysis profiles

These are treated as **non-negotiable system interfaces**.


In [3]:
from orchestrator.orchestrator import (
    afap_run,
    ANALYSIS_PROFILES,
    AFAP_OUTPUT_KEYS
)

pprint(ANALYSIS_PROFILES)


{'full_diagnostic': {'engines': ['ratio',
                                 'trend',
                                 'cash_flow',
                                 'anomaly',
                                 'solvency',
                                 'composite_risk'],
                     'metrics_scope': 'all'},
 'going_concern_screen': {'engines': ['ratio',
                                      'trend',
                                      'solvency',
                                      'composite_risk'],
                          'metrics_scope': 'critical_only'},
 'liquidity_focus': {'engines': ['ratio',
                                 'trend',
                                 'cash_flow',
                                 'composite_risk'],
                     'metrics_scope': ['current_ratio',
                                       'quick_ratio',
                                       'cash_ratio']},
 'performance_focus': {'engines': ['ratio', 'trend'],
                    

### Step 2: Load Financial Statements

This dataset simulates a **single recurring client**
across multiple years, allowing trend and risk analysis.


In [4]:
financials_df = pd.read_csv("../data/cleaned/financial_statements.csv")
financials_df.head()


Unnamed: 0,Company,Year,FS Category,FS Subcategory,Statement,Amount
0,Acme Manufacturing Ltd,2020,Assets,Current Assets,Balance Sheet,3109667
1,Acme Manufacturing Ltd,2020,Assets,Non-Current Assets,Balance Sheet,905812
2,Acme Manufacturing Ltd,2020,Equity,Equity,Balance Sheet,2152630
3,Acme Manufacturing Ltd,2020,Expenses,COGS,Income Statement,373114
4,Acme Manufacturing Ltd,2020,Expenses,Finance Costs,Income Statement,2929304


### Step 3: Define Validation Functions

These validators ensure:
- Output contract integrity
- AI interpretation structural consistency

They deliberately avoid business logic
and only validate **system correctness**.


In [5]:
def validate_output_contract(outputs: dict):
    """
    Ensures all required AFAP output keys are present.
    """
    missing = set(AFAP_OUTPUT_KEYS) - set(outputs.keys())
    assert not missing, f"Missing output keys: {missing}"


def validate_ai_interpretation(ai_output):
    """
    Validates AI interpretation structure (schema-level).
    """
    assert isinstance(ai_output, list)
    assert len(ai_output) > 0

    for rec in ai_output:
        assert "Company" in rec
        assert "Year" in rec
        assert "interpretation" in rec


### Step 4: Define Independent Client Analysis Cases

Each profile below represents a **realistic engagement scenario**:
- Targeted diagnostics
- Focused decision support
- Reduced computational scope

Each will be run **independently**.


In [6]:
CLIENT_CASES = [
    "full_diagnostic",
    "solvency_focus",
    "liquidity_focus",
    "performance_focus",
    "risk_scan",
    "going_concern_screen"
]


## Step 5: Execute a Single AFAP Profile (Case Simulation)

This cell is the **canonical execution pattern**.
It will be reused for every profile.

We:
1. Run the orchestrator
2. Validate the output contract
3. Validate AI interpretation
4. Inspect executed engines
5. Review sample outputs


In [7]:
# Select profile to simulate
profile_name = "solvency_focus"

print(f"\n--- Running AFAP Case: {profile_name} ---")

outputs = afap_run(
    financials_df=financials_df,
    analysis_profile=profile_name,
    use_mock_ai=False  # toggle True for fast dry-runs
)

# --- Contract validation ---
validate_output_contract(outputs)
print("✅ Output contract validated")

# --- AI validation ---
validate_ai_interpretation(outputs["ai_interpretation"])
print("✅ AI interpretation structure validated")

# --- Inspect executed engines ---
engines_ran = [
    k for k in AFAP_OUTPUT_KEYS
    if k in outputs and outputs[k]
]

print("Engines executed:", engines_ran)



--- Running AFAP Case: solvency_focus ---
✅ ratio_engine output validated successfully.
✅ solvency_engine output validated successfully.
✅ Output contract validated
✅ AI interpretation structure validated
Engines executed: ['profile_used', 'ratios', 'solvency', 'ai_interpretation']


### Step 6: Inspect AI Interpretation Output

This validates **semantic usability**, not correctness.


In [8]:
sample_interp = outputs["ai_interpretation"][0]

print("Company:", sample_interp["Company"])
print("Year:", sample_interp["Year"])
print("\n--- AI Interpretation ---\n")
print(sample_interp["interpretation"])


Company: Acme Manufacturing Ltd
Year: 2020

--- AI Interpretation ---

summary
Acme Manufacturing Ltd (2020) shows a mixed solvency profile. Liquidity ratios (current and quick) are marginally above a conservative liquidity threshold; profitability margins are uneven with high gross and operating margins but a material negative net margin; leverage is near a conservative debt/equity limit; interest coverage is severely weak; returns on assets and equity are negative. The pattern indicates operational profitability at gross and operating levels but insufficient bottom‑line income to support debt servicing and deliver positive returns to equity.

key_risks
- Interest coverage (0.2777) — exceeds conservative concern threshold: far below a conservative minimum of 3.0, indicating acute difficulty meeting interest obligations from operating earnings.
- Net margin (-1.5461) — negative, breaching a conservative floor of 0; indicates losses after all expenses and taxes.
- Return on assets (ROA 

### Step 7: Inspect Numerical Engine Outputs

This confirms:
- Ratios exist
- Engines populated data
- Schema consistency


In [9]:
if outputs.get("ratios"):
    display(pd.DataFrame(outputs["ratios"]).head())

if outputs.get("solvency"):
    display(pd.DataFrame(outputs["solvency"]).head())


Unnamed: 0,engine,Company,Year,metrics,flags,severity,explanation
0,ratio_engine,Acme Manufacturing Ltd,2020,"{'current_ratio': 1.5876421245028514, 'quick_r...",{},stable,Canonical financial ratios
1,ratio_engine,Acme Manufacturing Ltd,2021,"{'current_ratio': 3.0794864428132476, 'quick_r...",{},stable,Canonical financial ratios
2,ratio_engine,Acme Manufacturing Ltd,2022,"{'current_ratio': 3.4109333800409956, 'quick_r...",{},stable,Canonical financial ratios
3,ratio_engine,Acme Manufacturing Ltd,2023,"{'current_ratio': 4.67288599769455, 'quick_rat...",{},stable,Canonical financial ratios
4,ratio_engine,Banyan Retail Co,2020,"{'current_ratio': 2.513537113097586, 'quick_ra...",{},stable,Canonical financial ratios


Unnamed: 0,engine,Company,Year,metrics,flags,severity,explanation
0,solvency_engine,Acme Manufacturing Ltd,2020,"{'debt_equity': 0.9825237035626188, 'interest_...","{'high_leverage': False, 'weak_coverage': True}",watch,Capital structure shows solvency risk.
1,solvency_engine,Acme Manufacturing Ltd,2021,"{'debt_equity': 1.3584280927054282, 'interest_...","{'high_leverage': False, 'weak_coverage': True}",watch,Capital structure shows solvency risk.
2,solvency_engine,Acme Manufacturing Ltd,2022,"{'debt_equity': 0.6969581313037093, 'interest_...","{'high_leverage': False, 'weak_coverage': False}",stable,Solvency position acceptable.
3,solvency_engine,Acme Manufacturing Ltd,2023,"{'debt_equity': 1.4765422770808412, 'interest_...","{'high_leverage': False, 'weak_coverage': False}",stable,Solvency position acceptable.
4,solvency_engine,Banyan Retail Co,2020,"{'debt_equity': 1.0475669775251273, 'interest_...","{'high_leverage': False, 'weak_coverage': False}",stable,Solvency position acceptable.


## Step 8: Full System Validation (All Profiles)

This cell simulates **multiple client engagements**,
ensuring no profile breaks the orchestrator.


In [10]:
results_by_profile = {}

for profile_name in CLIENT_CASES:
    print(f"\n--- Executing profile: {profile_name} ---")

    outputs = afap_run(
        financials_df=financials_df,
        analysis_profile=profile_name,
        use_mock_ai=False
    )

    validate_output_contract(outputs)
    validate_ai_interpretation(outputs["ai_interpretation"])

    engines_ran = [
        k for k in AFAP_OUTPUT_KEYS
        if k in outputs and outputs[k]
    ]

    print("Engines executed:", engines_ran)

    results_by_profile[profile_name] = outputs

print("\n✅ All AFAP profiles validated successfully")



--- Executing profile: full_diagnostic ---
✅ ratio_engine output validated successfully.
✅ trend_engine output validated successfully.
✅ cash_flow_engine output validated successfully.
✅ anomaly_efficiency_engine output validated successfully.
✅ solvency_engine output validated successfully.
Engines executed: ['profile_used', 'ratios', 'trend', 'cash_flow', 'anomaly', 'solvency', 'composite_risk', 'ai_interpretation']

--- Executing profile: solvency_focus ---
✅ ratio_engine output validated successfully.
✅ solvency_engine output validated successfully.
Engines executed: ['profile_used', 'ratios', 'solvency', 'ai_interpretation']

--- Executing profile: liquidity_focus ---
✅ ratio_engine output validated successfully.
✅ trend_engine output validated successfully.
✅ cash_flow_engine output validated successfully.
Engines executed: ['profile_used', 'ratios', 'trend', 'cash_flow', 'composite_risk', 'ai_interpretation']

--- Executing profile: performance_focus ---
✅ ratio_engine output v

### Step 9: Compare AI Interpretations Across Profiles

This demonstrates **profile-aware reasoning differences**.


In [11]:
for profile, outputs in results_by_profile.items():
    interp = outputs["ai_interpretation"][0]
    print(f"\n=== {profile.upper()} ===")
    print(interp["interpretation"][:600], "...")



=== FULL_DIAGNOSTIC ===
summary:
Acme Manufacturing Ltd — 2020. The company exhibits a solvent short-term position (current_ratio 1.59, quick_ratio 1.59) and a strong gross margin (0.867). However, profitability and coverage measures raise material concerns: net margin is negative (-1.546), return on assets and equity are negative (ROA -1.083, ROE -2.021), and interest coverage is very low (0.278). Leverage is moderate-to-high (debt_equity 0.983). Operating margin is positive (0.289) indicating core operations generate profit before non-operating items and financing costs, but net loss indicates those non-operating/f ...

=== SOLVENCY_FOCUS ===
summary
Acme Manufacturing Ltd — 2020 snapshot. Liquidity appears adequate on a single-period basis (current ratio and quick ratio ≈ 1.59). Profitability and solvency indicators show material weaknesses: net profitability is negative, return measures are negative, and interest coverage is well below commonly used conservative safety limits. Lev