# Financial Metrics Calculation (Ground Truth)

Input:
- Clean pandas DataFrame from file_parser

Output:
- Dictionary of financial metrics

Rules:
- Deterministic math only
- No AI
- No risk judgement yet



In [1]:
import pandas as pd
from typing import Dict

In [2]:
REQUIRED_COLUMNS = {
    "date",
    "category",
    "description",
    "amount",
    "type"   # credit / debit
}


In [3]:
def validate_dataframe(df: pd.DataFrame):
    missing = REQUIRED_COLUMNS - set(df.columns)
    if missing:
        raise ValueError(f"Missing required columns: {missing}")


In [4]:
REQUIRED_COLUMNS = {"date", "category", "amount", "type"}

In [5]:
def validate_dataframe(df: pd.DataFrame):
    missing = REQUIRED_COLUMNS - set(df.columns)
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

In [6]:
def normalize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    df["amount"] = pd.to_numeric(df["amount"], errors="coerce").fillna(0)
    df["type"] = df["type"].str.lower().str.strip()
    df["category"] = df["category"].str.strip()

    return df


In [7]:
def calculate_total_revenue(df: pd.DataFrame) -> float:
    revenue_df = df[df["type"] == "credit"]
    return revenue_df["amount"].sum()


In [8]:
def calculate_total_expenses(df: pd.DataFrame) -> float:
    expense_df = df[df["type"] == "debit"]
    return expense_df["amount"].sum()


In [9]:
def calculate_net_cashflow(df: pd.DataFrame) -> float:
    revenue = calculate_total_revenue(df)
    expenses = calculate_total_expenses(df)
    return revenue - expenses


In [10]:
def category_breakdown(df: pd.DataFrame) -> Dict[str, float]:
    grouped = df.groupby("category")["amount"].sum()
    return grouped.to_dict()


In [11]:
def monthly_cashflow(df: pd.DataFrame) -> Dict[str, float]:
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df["month"] = df["date"].dt.to_period("M").astype(str)

    monthly = df.groupby("month").apply(
        lambda x: x[x["type"] == "credit"]["amount"].sum()
                - x[x["type"] == "debit"]["amount"].sum()
    )

    return monthly.to_dict()


In [12]:
def compute_financial_metrics(df: pd.DataFrame) -> Dict:
    validate_dataframe(df)
    df = normalize_dataframe(df)

    metrics = {
        "total_revenue": calculate_total_revenue(df),
        "total_expenses": calculate_total_expenses(df),
        "net_cashflow": calculate_net_cashflow(df),
        "category_breakdown": category_breakdown(df),
        "monthly_cashflow": monthly_cashflow(df)
    }

    return metrics


In [13]:
#test code 
import os
def parse_csv(file_path: str) -> pd.DataFrame:
    """
    Parse CSV financial data into a DataFrame
    """
    try:
        df = pd.read_csv(file_path)
        return df
    except Exception as e:
        raise ValueError(f"CSV parsing failed: {e}")
def parse_file(file_path: str) -> pd.DataFrame:
    """
    Detect file type and parse accordingly
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError("File does not exist")

    extension = file_path.split(".")[-1].lower()

    if extension == "csv":
        return parse_csv(file_path)


    else:
        raise ValueError("Unsupported file format")

# Using your already tested CSV
df = parse_file(r"E:\financial-health-ai\notebook\sample_finance.csv")

metrics = compute_financial_metrics(df)
metrics


{'total_revenue': np.float64(9000.0),
 'total_expenses': np.float64(1500.0),
 'net_cashflow': np.float64(7500.0),
 'category_breakdown': {'Expenses': 1500.0,
  'Receivables': 1500.0,
  'Revenue': 7500.0},
 'monthly_cashflow': {'2024-01': 7500.0}}

# STEP 3: Financial Risk Engine (Rule-Based)

Purpose:
- Convert calculated financial metrics into understandable risk signals
- Evaluate credit risk, cash flow risk, and financial stability

Key Principles:
- 100% deterministic logic
- No AI, no guessing, no hallucination
- Transparent rules (judge-friendly)
- Same logic works across industries

Input:
- Output dictionary from compute_financial_metrics()

Output:
- Risk levels + explanations (machine-readable)


In [14]:
RISK_THRESHOLDS = {
    "profit_margin": {
        "high": 0.20,
        "medium": 0.10
    },
    "cashflow": {
        "negative": 0
    },
    "expense_ratio": {
        "high": 0.80,
        "medium": 0.60
    }
}


In [15]:
#Profitability Risk Assessment
def assess_profitability_risk(total_revenue: float, total_expenses: float):
    if total_revenue == 0:
        return {
            "level": "High",
            "reason": "No revenue recorded"
        }

    profit_margin = (total_revenue - total_expenses) / total_revenue

    if profit_margin < RISK_THRESHOLDS["profit_margin"]["medium"]:
        level = "High"
    elif profit_margin < RISK_THRESHOLDS["profit_margin"]["high"]:
        level = "Medium"
    else:
        level = "Low"

    return {
        "level": level,
        "profit_margin": round(profit_margin, 2),
        "reason": f"Profit margin at {round(profit_margin * 100, 1)}%"
    }


In [16]:
#Cash Flow Risk Assessment
def assess_cashflow_risk(net_cashflow: float):
    if net_cashflow < RISK_THRESHOLDS["cashflow"]["negative"]:
        return {
            "level": "High",
            "reason": "Negative cash flow"
        }
    elif net_cashflow == 0:
        return {
            "level": "Medium",
            "reason": "Break-even cash flow"
        }
    else:
        return {
            "level": "Low",
            "reason": "Positive cash flow"
        }


In [17]:
#Expense Load Risk
def assess_expense_risk(total_revenue: float, total_expenses: float):
    if total_revenue == 0:
        return {
            "level": "High",
            "reason": "Expenses without revenue"
        }

    expense_ratio = total_expenses / total_revenue

    if expense_ratio > RISK_THRESHOLDS["expense_ratio"]["high"]:
        level = "High"
    elif expense_ratio > RISK_THRESHOLDS["expense_ratio"]["medium"]:
        level = "Medium"
    else:
        level = "Low"

    return {
        "level": level,
        "expense_ratio": round(expense_ratio, 2),
        "reason": f"Expenses are {round(expense_ratio * 100, 1)}% of revenue"
    }


In [18]:
def aggregate_risk_levels(risks: dict):
    levels = [r["level"] for r in risks.values()]

    if "High" in levels:
        return "High"
    elif "Medium" in levels:
        return "Medium"
    else:
        return "Low"


In [19]:
def evaluate_financial_risk(metrics: dict) -> dict:
    profitability_risk = assess_profitability_risk(
        metrics["total_revenue"],
        metrics["total_expenses"]
    )

    cashflow_risk = assess_cashflow_risk(
        metrics["net_cashflow"]
    )

    expense_risk = assess_expense_risk(
        metrics["total_revenue"],
        metrics["total_expenses"]
    )

    risks = {
        "profitability": profitability_risk,
        "cashflow": cashflow_risk,
        "expense_load": expense_risk
    }

    overall_risk = aggregate_risk_levels(risks)

    return {
        "overall_risk": overall_risk,
        "risk_breakdown": risks
    }


In [20]:
#test code 
# Step 1: Parse file
df = parse_file(r"E:\financial-health-ai\notebook\sample_finance.csv")

# Step 2: Compute metrics
metrics = compute_financial_metrics(df)

# Step 3: Evaluate risk
risk_report = evaluate_financial_risk(metrics)

risk_report


{'overall_risk': 'Low',
 'risk_breakdown': {'profitability': {'level': 'Low',
   'profit_margin': np.float64(0.83),
   'reason': 'Profit margin at 83.3%'},
  'cashflow': {'level': 'Low', 'reason': 'Positive cash flow'},
  'expense_load': {'level': 'Low',
   'expense_ratio': np.float64(0.17),
   'reason': 'Expenses are 16.7% of revenue'}}}

## STEP 4: AI Financial Analyst (Narrative & Recommendations)

In this step, we introduce AI in a **controlled and safe way**.

Key principles:
- AI does NOT compute numbers
- AI does NOT change data
- AI only explains, summarizes, and recommends
- Deterministic logic (metrics + risk engine) remains the source of truth

Input to AI:
- Computed financial metrics
- Detected risk flags
- High-level business context

Output from AI:
- Plain-English financial summary
- Risk explanations
- Actionable recommendations for SME owners


In [30]:
from typing import TypedDict, List
from langgraph.graph import StateGraph, START, END
from langchain.chat_models import init_chat_model
from langchain.messages import SystemMessage, HumanMessage
import os
from dotenv import load_dotenv

# -----------------------------
# Load environment variables
# -----------------------------
load_dotenv()

llm = init_chat_model(
    "google_genai:gemini-2.5-flash",
    api_key=os.getenv("GOOGLE_API_KEY"),
    max_output_tokens=400,   # üëà keep each node small
)

# -----------------------------
# Graph State
# -----------------------------
class FinancialAIState(TypedDict):
    metrics_context: str
    risk_context: str
    health_summary: str
    risk_explanation: str
    suggestions: str
    final_report: str


# -----------------------------
# Node 1: Health Summary
# -----------------------------
def health_summary_node(state: FinancialAIState):
    prompt = (
        "You are a financial analyst for Indian SMEs.\n"
        "Explain the overall financial health clearly and simply.\n\n"
        f"{state['metrics_context']}"
    )

    response = llm.invoke([
        SystemMessage(content="Explain only what is visible in the data."),
        HumanMessage(content=prompt)
    ])

    return {"health_summary": response.content}


# -----------------------------
# Node 2: Risk Explanation
# -----------------------------
def risk_explanation_node(state: FinancialAIState):
    prompt = (
        "Explain the following financial risks in simple language.\n"
        "Do not introduce new risks.\n\n"
        f"{state['risk_context']}"
    )

    response = llm.invoke([
        SystemMessage(content="Explain risks conservatively and clearly."),
        HumanMessage(content=prompt)
    ])

    return {"risk_explanation": response.content}


# -----------------------------
# Node 3: Improvement Suggestions
# -----------------------------
def suggestions_node(state: FinancialAIState):
    prompt = (
        "Based on the metrics and risks below, suggest practical improvements.\n"
        "Focus on cash flow, cost control, and credit readiness.\n\n"
        f"{state['metrics_context']}\n\n{state['risk_context']}"
    )

    response = llm.invoke([
        SystemMessage(content="Give realistic, SME-friendly advice."),
        HumanMessage(content=prompt)
    ])

    return {"suggestions": response.content}


# -----------------------------
# Node 4: Final Compiler (NO analysis)
# -----------------------------
def final_compiler_node(state: FinancialAIState):
    final_text = f"""
üìä OVERALL FINANCIAL HEALTH
{state['health_summary']}

‚ö†Ô∏è RISK ANALYSIS
{state['risk_explanation']}

‚úÖ IMPROVEMENT RECOMMENDATIONS
{state['suggestions']}
"""
    return {"final_report": final_text.strip()}


# -----------------------------
# Build LangGraph
# -----------------------------
graph = StateGraph(FinancialAIState)

graph.add_node("health_summary", health_summary_node)
graph.add_node("risk_explanation", risk_explanation_node)
graph.add_node("suggestions", suggestions_node)
graph.add_node("final_compiler", final_compiler_node)

# Parallel execution
graph.add_edge(START, "health_summary")
graph.add_edge(START, "risk_explanation")
graph.add_edge(START, "suggestions")

graph.add_edge("health_summary", "final_compiler")
graph.add_edge("risk_explanation", "final_compiler")
graph.add_edge("suggestions", "final_compiler")

graph.add_edge("final_compiler", END)

workflow = graph.compile()


In [31]:
state_input = {
    "metrics_context": build_ai_context(clean_metrics, []),
    "risk_context": "\n".join(risk_flags),
    "health_summary": "",
    "risk_explanation": "",
    "suggestions": "",
    "final_report": ""
}

final_state = workflow.invoke(state_input)

print("\n===== AI FINANCIAL SUMMARY =====\n")
print(final_state["final_report"])



===== AI FINANCIAL SUMMARY =====

üìä OVERALL FINANCIAL HEALTH
Based on the data provided:

The SME shows a **strong financial position

‚ö†Ô∏è RISK ANALYSIS
Here's an explanation of those financial risks in simple, clear, and

‚úÖ IMPROVEMENT RECOMMENDATIONS
This is an excellent starting point! Your business is showing strong financial health with high


In [32]:
# =============================
# STEP 4: LANGGRAPH AI TEST
# =============================

# Step 1: Parse file (already implemented)
df = parse_file(r"E:\financial-health-ai\notebook\sample_finance.csv")

# Step 2: Compute metrics
metrics = compute_financial_metrics(df)

# Step 3: Evaluate risk
risk_report = evaluate_financial_risk(metrics)

# -----------------------------
# Prepare AI-safe inputs
# -----------------------------
clean_metrics = normalize_metrics(metrics)
risk_flags = extract_risk_flags(risk_report)

metrics_context = build_ai_context(clean_metrics, [])
risk_context = "\n".join(risk_flags)

# -----------------------------
# DEBUG: Verify deterministic outputs
# -----------------------------
print("\n================ DEBUG =================")
print("RAW METRICS DICT:\n", metrics)
print("\nRISK REPORT:\n", risk_report)

print("\n================ AI INPUT =================")
print("METRICS CONTEXT:\n", metrics_context)
print("\nRISK CONTEXT:\n", risk_context)
print("=========================================\n")

# -----------------------------
# LangGraph State Input
# -----------------------------
state_input = {
    "metrics_context": metrics_context,
    "risk_context": risk_context,
    "health_summary": "",
    "risk_explanation": "",
    "suggestions": "",
    "final_report": ""
}

# -----------------------------
# Run LangGraph
# -----------------------------
final_state = workflow.invoke(state_input)

# -----------------------------
# DEBUG: Node-level outputs
# -----------------------------
print("\n=========== NODE OUTPUTS ===========")
print("\nHEALTH SUMMARY:\n", final_state["health_summary"])
print("\nRISK EXPLANATION:\n", final_state["risk_explanation"])
print("\nSUGGESTIONS:\n", final_state["suggestions"])
print("===================================")

# -----------------------------
# Final AI Report
# -----------------------------
print("\n=========== FINAL REPORT ===========\n")
print(final_state["final_report"])



RAW METRICS DICT:
 {'total_revenue': np.float64(9000.0), 'total_expenses': np.float64(1500.0), 'net_cashflow': np.float64(7500.0), 'category_breakdown': {'Expenses': 1500.0, 'Receivables': 1500.0, 'Revenue': 7500.0}, 'monthly_cashflow': {'2024-01': 7500.0}}

RISK REPORT:
 {'overall_risk': 'Low', 'risk_breakdown': {'profitability': {'level': 'Low', 'profit_margin': np.float64(0.83), 'reason': 'Profit margin at 83.3%'}, 'cashflow': {'level': 'Low', 'reason': 'Positive cash flow'}, 'expense_load': {'level': 'Low', 'expense_ratio': np.float64(0.17), 'reason': 'Expenses are 16.7% of revenue'}}}

METRICS CONTEXT:
 FINANCIAL METRICS:
- total_revenue: 9000.0
- total_expenses: 1500.0
- net_cashflow: 7500.0
- category_breakdown: {'Expenses': 1500.0, 'Receivables': 1500.0, 'Revenue': 7500.0}
- monthly_cashflow: {'2024-01': 7500.0}

DETECTED RISK FLAGS:
- No major risks detected

RISK CONTEXT:
 Profitability risk: Low (Profit margin at 83.3%)
Cashflow risk: Low (Positive cash flow)
Expense_load r