In [None]:
# Feature Engineering
import numpy as np
import pandas as pd
from typing import Any, Dict, List, NamedTuple, Optional, Union
feature_frame = globals().get("feature_frame", pd.DataFrame())
alerts_frame = globals().get("alerts_frame", pd.DataFrame(columns=["customer_id", "rule", "severity", "details"]))
master_frame = globals().get("master_frame")

def _build_sample_master_frame() -> pd.DataFrame:
    return pd.DataFrame({
        "customer_id": ["CUST001", "CUST002", "CUST003"],
        "date": ["2024-01-01", "2024-01-01", "2024-01-01"],
        "balance": [100000, 50000, 25000],
        "credit_limit": [150000, 75000, 30000],
        "dpd": [0, 45, 95],
        "product_code": ["CC", "PL", "CC"],
        "origination_date": ["2023-01-01", "2023-06-01", "2023-12-01"],
        "industry": ["Technology", "Manufacturing", "Government"],
        "kam_owner": ["KAM001", "KAM002", "KAM001"]
    })
if master_frame is None or getattr(master_frame, "empty", True):
    master_frame = _build_sample_master_frame()
    print("Created sample master_frame with", len(master_frame), "records")

DELINQUENCY_BUCKETS: List[float] = [-np.inf, 0, 30, 60, 90, 120, np.inf]
DELINQUENCY_LABELS = ["current", "1_30", "31_60", "61_90", "91_120", "120_plus"]
SEGMENT_LABELS = list("ABCDEF")

class FeatureArtifacts(NamedTuple):
    features: pd.DataFrame
    alerts: pd.DataFrame

class FeatureEngineer:
    def __init__(self, reference_date: Optional[pd.Timestamp] = None) -> None:
        self.reference_date = reference_date or pd.Timestamp.utcnow().normalize()

    def _derive_customer_type(self, frame: pd.DataFrame) -> pd.Series:
        if "customer_type" in frame.columns:
            return frame["customer_type"].fillna("unspecified").astype(str)
        balance = frame.get("balance")
        if balance is None:
            balance = pd.Series(0, index=frame.index)
        elif not isinstance(balance, pd.Series):
            balance = pd.Series(balance, index=frame.index)
        balance = balance.fillna(0)
        exposure = frame.get("credit_limit")
        if exposure is None:
            exposure = balance.clip(lower=1)
        elif not isinstance(exposure, pd.Series):
            exposure = pd.Series(exposure, index=frame.index)
        exposure = exposure.where(exposure.notna() & (exposure != 0), balance.clip(lower=1))
        ratio = balance / exposure.replace({0: np.nan})
        derived = np.where(
            balance >= 5_000_000,
            "enterprise",
            np.where(balance >= 500_000, "corporate", np.where(balance >= 50_000, "sme", "micro"))
        )
        derived = np.where(ratio >= 0.9, "intensive", derived)
        return pd.Series(derived, index=frame.index)

    def _segmentation(self, frame: pd.DataFrame) -> pd.Series:
        try:
            unique = frame["balance"].nunique()
            buckets = min(6, unique)
            return pd.qcut(frame["balance"], q=buckets, labels=SEGMENT_LABELS[: buckets], duplicates="drop").astype(str)
        except Exception:
            return pd.Series(["A"] * len(frame), index=frame.index)

    def _delinquency_bucket(self, frame: pd.DataFrame) -> pd.Series:
        if "dpd" in frame.columns:
            dpd_source = frame["dpd"]
        else:
            dpd_source = frame.get("days_past_due", pd.Series(0, index=frame.index))
        if not isinstance(dpd_source, pd.Series):
            dpd_source = pd.Series(dpd_source, index=frame.index)
        dpd_series = pd.to_numeric(dpd_source, errors="coerce").fillna(0)
        return pd.cut(dpd_series, bins=DELINQUENCY_BUCKETS, labels=DELINQUENCY_LABELS, right=True)

    def transform(self, frame: pd.DataFrame) -> FeatureArtifacts:
        if frame.empty:
            empty_alerts = pd.DataFrame(columns=["customer_id", "rule", "severity", "details"])
            return FeatureArtifacts(frame.copy(), empty_alerts)
        prepared = self._prepare_base_features(frame.copy())
        enriched = self._compute_financial_metrics(prepared)
        alerts = self._collect_alerts(enriched)
        return FeatureArtifacts(features=enriched.reset_index(drop=True), alerts=alerts)

    def _prepare_base_features(self, features: pd.DataFrame) -> pd.DataFrame:
        features["date"] = pd.to_datetime(features["date"], errors="coerce", utc=True)
        features["customer_type"] = self._derive_customer_type(features)
        features["segment_code"] = self._segmentation(features)
        features["delinquency_bucket"] = self._delinquency_bucket(features).astype(str)
        features["dpd"] = self._normalize_dpd(features)
        return features

    def _compute_financial_metrics(self, features: pd.DataFrame) -> pd.DataFrame:
        balance_clip = features["balance"].clip(lower=1)
        credit_limit_source = features.get("credit_limit")
        credit_limit_series = self._normalize_series(credit_limit_source, features.index, np.nan)
        credit_limit_series = pd.to_numeric(credit_limit_series, errors="coerce").where(
            lambda s: s.notna() & (s != 0),
            balance_clip
        )
        utilization = features["balance"] / credit_limit_series
        features["utilization_ratio"] = utilization.replace([np.inf, -np.inf], np.nan).clip(upper=5).fillna(0)
        features["apr"] = self._prepare_apr(features)
        balance_share = features.groupby("customer_id")["balance"].transform(
            lambda values: values / values.sum()
        ).fillna(0.0)
        features["weighted_apr"] = balance_share * features["apr"]
        zscore = (features["balance"] - features["balance"].mean()) / features["balance"].std(ddof=0)
        features["balance_zscore"] = zscore.fillna(0).clip(-3, 3)
        features["industry"] = self._normalize_series(
            features.get("industry"),
            features.index,
            "unspecified"
        ).fillna("unspecified")
        features["kam_owner"] = self._normalize_series(
            features.get("kam_owner"),
            features.index,
            "unassigned"
        ).fillna("unassigned")
        industry_lower = features["industry"].str.lower()
        features["b2g_flag"] = industry_lower.str.contains("government|public").fillna(False).astype(int)
        origination_source = features.get("origination_date", features["date"])
        days_open = (self.reference_date - pd.to_datetime(origination_source, utc=True)).dt.days
        features["days_since_origination"] = days_open.clip(lower=0).fillna(0).astype(int)
        features["roll_rate_key"] = features["customer_id"].astype(str) + "_" + features["product_code"].astype(str)
        features = features.sort_values(["roll_rate_key", "date"])
        features["prev_dpd"] = features.groupby("roll_rate_key")["dpd"].shift(1).fillna(0)
        features["roll_rate_delta"] = features["dpd"] - features["prev_dpd"]
        features["roll_rate_direction"] = np.select(
            [features["roll_rate_delta"] > 0, features["roll_rate_delta"] < 0],
            ["deteriorating", "improving"],
            default="stable"
        )
        features["alert_usury_micro"] = ((features["customer_type"] == "micro") & (features["apr"] > 0.85)).astype(int)
        features["alert_high_utilization"] = (features["utilization_ratio"] > 0.95).astype(int)
        features["alert_high_dpd"] = (features["dpd"] >= 90).astype(int)
        features["alert_pdf_gap"] = 0
        return features

    def _collect_alerts(self, features: pd.DataFrame) -> pd.DataFrame:
        alerts_records: List[Dict[str, Any]] = []
        alert_columns = {
            "alert_usury_micro": "critical",
            "alert_high_utilization": "high",
            "alert_high_dpd": "critical",
            "alert_pdf_gap": "medium"
        }
        for alert_col, severity in alert_columns.items():
            flagged = features[features[alert_col] == 1]
            for _, row in flagged.iterrows():
                alerts_records.append({
                    "customer_id": row.get("customer_id"),
                    "rule": alert_col,
                    "severity": severity,
                    "details": f"DPD={row.get('dpd')}|Util={row.get('utilization_ratio'):.2f}"
                })
        if alerts_records:
            return pd.DataFrame(alerts_records)
        return pd.DataFrame(columns=["customer_id", "rule", "severity", "details"])

    def _normalize_series(self, source: Any, index: pd.Index, default: Any) -> pd.Series:
        if source is None:
            return pd.Series(default, index=index)
        if isinstance(source, pd.Series):
            return source
        return pd.Series(source, index=index)

    def _normalize_dpd(self, features: pd.DataFrame) -> pd.Series:
        dpd_source = features.get("dpd")
        if dpd_source is None:
            dpd_source = features.get("days_past_due")
        normalized = self._normalize_series(dpd_source, features.index, 0)
        return pd.to_numeric(normalized, errors="coerce").fillna(0).astype(int)

    def _prepare_apr(self, features: pd.DataFrame) -> pd.Series:
        apr_source = features.get("apr") if "apr" in features.columns else features.get("nominal_rate")
        apr_series = pd.to_numeric(self._normalize_series(apr_source, features.index, np.nan), errors="coerce")
        apr_median = apr_series.median(skipna=True)
        if pd.isna(apr_median):
            apr_median = 0.0
        return apr_series.fillna(apr_median).astype(float)

feature_engineer = FeatureEngineer()
feature_artifacts = feature_engineer.transform(master_frame)
feature_frame = feature_artifacts.features
alerts_frame = feature_artifacts.alerts

  return np.nanmean(a, axis, out=out, keepdims=keepdims)


In [8]:
# KPI Calculation Engine
import numpy as np
import pandas as pd
from typing import Any, Dict, Union
feature_frame = globals().get("feature_frame", pd.DataFrame())
alerts_frame = globals().get("alerts_frame", pd.DataFrame(columns=["customer_id", "rule", "severity", "details"]))

class KPIEngine:
    def __init__(self, frame: pd.DataFrame) -> None:
        self.frame = frame.copy()
        if not self.frame.empty:
            self.frame["date"] = pd.to_datetime(self.frame["date"], utc=True)
            self.frame["month"] = self.frame["date"].dt.to_period("M").dt.to_timestamp()

    def _ratio(self, numerator: pd.Series, denominator: pd.Series) -> float:
        denom = denominator.sum()
        if denom == 0:
            return float("nan")
        return numerator.sum() / denom

    def compute(self) -> Dict[str, Any]:
        if self.frame.empty:
            return {}

        result: Dict[str, Any] = {}
        current_frame = self.frame.copy()

        result["aum"] = current_frame["balance"].sum()
        result["active_clients"] = current_frame["customer_id"].nunique()
        result["credit_lines"] = current_frame.get("credit_limit", pd.Series(0, index=current_frame.index)).sum()

        churn_mask = current_frame.get("status", pd.Series("active", index=current_frame.index)).str.lower().eq("churned")
        result["churn_rate"] = churn_mask.mean()

        default_mask = current_frame.get("default_flag", pd.Series(0, index=current_frame.index)).astype(int)
        result["default_rate"] = default_mask.mean()

        dpd_group = current_frame.groupby("delinquency_bucket")["balance"].sum().rename("aum")
        result["dpd_buckets"] = dpd_group

        result["rotation"] = self._ratio(
            current_frame.get("payments", pd.Series(0, index=current_frame.index)),
            current_frame.get("balance", pd.Series(0, index=current_frame.index))
        )

        result["weighted_apr"] = current_frame["weighted_apr"].mean()

        result["revenue"] = current_frame.get("interest_income", pd.Series(0, index=current_frame.index)).sum()
        result["ebitda"] = current_frame.get("ebitda", pd.Series(0, index=current_frame.index)).sum()

        result["concentration_top10"] = (
            current_frame.groupby("customer_id")["balance"].sum().nlargest(10).sum() / result["aum"]
            if result["aum"]
            else float("nan")
        )

        ltv = current_frame.get("ltv", pd.Series(0, index=current_frame.index))
        cac = current_frame.get("cac", pd.Series(np.nan, index=current_frame.index))
        current_frame["ltv_cac_ratio"] = np.where(cac.fillna(0) == 0, np.nan, ltv / cac)

        channel_col = next((col for col in ("channel", "source_name") if col in current_frame.columns), None)
        if channel_col:
            result["ltv_cac_by_segment"] = current_frame.groupby(["segment_code", channel_col]).ltv_cac_ratio.mean()
        else:
            result["ltv_cac_by_segment"] = current_frame.groupby(["segment_code"]).ltv_cac_ratio.mean()

        result["nrr"] = self._ratio(
            current_frame.get("recurring_revenue", pd.Series(0, index=current_frame.index)),
            current_frame.get("starting_revenue", pd.Series(1, index=current_frame.index))
        )

        result["nsm"] = current_frame.get("north_star_metric", pd.Series(0, index=current_frame.index)).mean()

        result["penetration"] = self._ratio(
            current_frame.get("active_products", pd.Series(0, index=current_frame.index)),
            current_frame.get("available_products", pd.Series(1, index=current_frame.index))
        )

        result["b2g_percent"] = current_frame["b2g_flag"].mean()

        status_column = current_frame.get("status", pd.Series("active", index=current_frame.index)).str.lower()
        result["new_recurrent_recovered"] = status_column.value_counts(dropna=False)

        group_cols = ["industry", "kam_owner", "segment_code", "customer_type"]
        aggregation = current_frame.groupby(group_cols)["balance"].sum().rename("aum")
        result["aum_by_group"] = aggregation

        behavior_mask = (current_frame["customer_type"] == "micro") & (current_frame["apr"] > 0.85)
        result["usury_micro_share"] = behavior_mask.mean()

        result["pod"] = current_frame.get("probability_of_default", pd.Series(np.nan, index=current_frame.index)).mean()

        if not alerts_frame.empty:
            result["alerts_active"] = alerts_frame.groupby("severity").size()

        return result

kpi_engine = KPIEngine(feature_frame)
kpi_summary = kpi_engine.compute()

  self.frame["month"] = self.frame["date"].dt.to_period("M").dt.to_timestamp()


In [9]:
# Marketing & Sales Analysis
import pandas as pd
from typing import Dict, List
feature_frame = globals().get("feature_frame", pd.DataFrame())

def marketing_sales_breakdown(frame: pd.DataFrame) -> Dict[str, pd.DataFrame]:
    if frame.empty:
        return {}

    aggregations: Dict[str, pd.DataFrame] = {}
    group_fields: Dict[str, List[str]] = {
        "industry": ["industry"],
        "kam": ["kam_owner"]
    }
    channel_columns = [column for column in ("channel", "source_name") if column in frame.columns]
    if channel_columns:
        group_fields["channel"] = channel_columns

    for label, fields in group_fields.items():
        grouped = frame.groupby(fields, dropna=False).agg(
            aum=("balance", "sum"),
            clients=("customer_id", "nunique"),
            weighted_apr=("weighted_apr", "mean"),
            ltv_cac=("ltv_cac_ratio", "mean")
        ).reset_index()
        aggregations[label] = grouped

    return aggregations

marketing_sales_tables = marketing_sales_breakdown(feature_frame)
treemap_ready = marketing_sales_tables.get("industry") if marketing_sales_tables else pd.DataFrame()

KeyError: "Column(s) ['ltv_cac_ratio'] do not exist"

In [None]:
# Data Quality Audit
import numpy as np
import pandas as pd
from typing import Any, Dict, List, TYPE_CHECKING
try:
    import pdfplumber  # type: ignore[import-not-found]
except ModuleNotFoundError:
    pdfplumber = None
if TYPE_CHECKING:
    import pdfplumber as _pdfplumber_stub
feature_frame = globals().get("feature_frame", pd.DataFrame())

CRITICAL_COLUMNS = {"customer_id", "date", "balance", "dpd"}

def data_quality_audit(frame: pd.DataFrame) -> Dict[str, Any]:
    if frame.empty:
        return {"score": np.nan, "table": pd.DataFrame(), "styled": None, "pdf_completeness": 0.0}

    total_rows = len(frame)
    audit_records: List[Dict[str, Any]] = []
    penalties = 0.0

    for column in frame.columns:
        nulls = frame[column].isna().sum()
        zeros = (frame[column] == 0).sum() if pd.api.types.is_numeric_dtype(frame[column]) else np.nan
        coverage = 1 - (nulls / total_rows) if total_rows else np.nan
        if column in CRITICAL_COLUMNS and coverage < 0.9:
            penalties += 0.1
        audit_records.append(
            dict(column=column, nulls=int(nulls), zeros=int(zeros) if not pd.isna(zeros) else np.nan, coverage=coverage)
        )

    audit_table = pd.DataFrame(audit_records)
    coverage_mean = audit_table["coverage"].mean()
    quality_score = max(0.0, min(1.0, (coverage_mean if not pd.isna(coverage_mean) else 0.0) - penalties))

    def _color(value: float) -> str:
        if pd.isna(value):
            return "color: #E6E6EF; background-color: #3730A3"
        if value >= 0.95:
            return "color: #05101a; background-color: #22E7CC"
        if value >= 0.85:
            return "color: #F5F3FF; background-color: #2563EB"
        return "color: #F5F3FF; background-color: #B91C1C"

    styler = audit_table.style.format({"coverage": "{:.2%}"})
    apply_map = getattr(styler, "applymap", None)
    styled = apply_map(_color, subset=["coverage"]) if callable(apply_map) else styler

    pdf_completeness = 1.0 if pdfplumber else 0.0

    return dict(score=quality_score, table=audit_table, styled=styled, pdf_completeness=pdf_completeness)
quality_artifacts = data_quality_audit(feature_frame)
quality_score = quality_artifacts.get("score")
quality_table = quality_artifacts.get("table")
quality_styled = quality_artifacts.get("styled")

print(f"Data Quality Score: {quality_score:.2%}" if not pd.isna(quality_score) else "Data Quality Score: N/A")
print(f"PDF Completeness: {quality_artifacts.get('pdf_completeness', 0.0):.1%}")

if quality_styled is not None:
    display(quality_styled)
else:
    print("No data quality table to display - feature_frame is empty")

Data Quality Score: 100.00%
PDF Completeness: 0.0%


  styled = apply_map(_color, subset=["coverage"]) if callable(apply_map) else styler


Unnamed: 0,column,nulls,zeros,coverage
0,customer_id,0,,100.00%
1,date,0,,100.00%
2,balance,0,0.0,100.00%
3,credit_limit,0,0.0,100.00%
4,dpd,0,1.0,100.00%
5,product_code,0,,100.00%
6,origination_date,0,,100.00%
7,industry,0,,100.00%
8,kam_owner,0,,100.00%
9,customer_type,0,,100.00%


In [None]:
# Financial Intelligence Visualization Engine
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, Any, Optional
import warnings
warnings.filterwarnings('ignore')

# Set style for better visualizations
plt.style.use('seaborn-v0_8' if 'seaborn-v0_8' in plt.style.available else 'default')
sns.set_palette("husl")

class FinancialVisualizer:
    def __init__(self, figsize: tuple = (12, 8)) -> None:
        self.figsize = figsize
        
    def create_dashboard(self, kpi_data: Dict[str, Any], feature_data: pd.DataFrame) -> None:
        """Create comprehensive financial dashboard"""
        if not kpi_data or feature_data.empty:
            print("No data available for visualization")
            return
            
        fig, axes = plt.subplots(2, 3, figsize=(18, 12))
        fig.suptitle('Abaco Financial Intelligence Dashboard', fontsize=16, fontweight='bold')
        
        # 1. Portfolio Distribution by Delinquency
        if 'dpd_buckets' in kpi_data and not kpi_data['dpd_buckets'].empty:
            axes[0, 0].pie(kpi_data['dpd_buckets'].values, 
                          labels=kpi_data['dpd_buckets'].index,
                          autopct='%1.1f%%', startangle=90)
            axes[0, 0].set_title('Portfolio by Delinquency Buckets')
        
        # 2. Customer Segmentation
        if 'customer_type' in feature_data.columns:
            segment_counts = feature_data['customer_type'].value_counts()
            axes[0, 1].bar(segment_counts.index, segment_counts.values)
            axes[0, 1].set_title('Customer Distribution by Type')
            axes[0, 1].tick_params(axis='x', rotation=45)
        
        # 3. Industry Analysis  
        if 'industry' in feature_data.columns:
            industry_aum = feature_data.groupby('industry')['balance'].sum().sort_values(ascending=False)
            axes[0, 2].barh(industry_aum.index, industry_aum.values / 1e6)
            axes[0, 2].set_title('AUM by Industry (Millions)')
            
        # 4. Risk Heatmap
        if 'utilization_ratio' in feature_data.columns and 'dpd' in feature_data.columns:
            risk_matrix = pd.crosstab(
                pd.cut(feature_data['utilization_ratio'], bins=5), 
                pd.cut(feature_data['dpd'], bins=5),
                values=feature_data['balance'], 
                aggfunc='sum', 
                normalize='columns'
            )
            sns.heatmap(risk_matrix.fillna(0), annot=True, fmt='.2f', ax=axes[1, 0], cmap='YlOrRd')
            axes[1, 0].set_title('Risk Heatmap: Utilization vs DPD')
            
        # 5. Alert Summary
        alert_counts = alerts_frame['severity'].value_counts() if not alerts_frame.empty else pd.Series()
        if not alert_counts.empty:
            colors = {'critical': 'red', 'high': 'orange', 'medium': 'yellow', 'low': 'green'}
            alert_colors = [colors.get(x, 'gray') for x in alert_counts.index]
            axes[1, 1].bar(alert_counts.index, alert_counts.values, color=alert_colors)
            axes[1, 1].set_title('Active Alerts by Severity')
        
        # 6. KPI Summary
        key_metrics = {
            'AUM (M)': kpi_data.get('aum', 0) / 1e6,
            'Clients': kpi_data.get('active_clients', 0),
            'Default Rate': kpi_data.get('default_rate', 0) * 100,
            'Weighted APR': kpi_data.get('weighted_apr', 0) * 100
        }
        
        metrics_df = pd.DataFrame(list(key_metrics.items()), columns=['Metric', 'Value'])
        axes[1, 2].axis('tight')
        axes[1, 2].axis('off')
        table = axes[1, 2].table(cellText=[[f"{k}", f"{v:.2f}"] for k, v in key_metrics.items()],
                                colLabels=['KPI', 'Value'],
                                cellLoc='center',
                                loc='center')
        table.auto_set_font_size(False)
        table.set_fontsize(10)
        table.scale(1, 2)
        axes[1, 2].set_title('Key Performance Indicators')
        
        plt.tight_layout()
        plt.show()
    
    def plot_trend_analysis(self, data: pd.DataFrame) -> None:
        """Plot trend analysis if date columns available"""
        if data.empty or 'date' not in data.columns:
            print("No temporal data available for trend analysis")
            return
            
        # Convert date and create monthly aggregation
        data_copy = data.copy()
        data_copy['date'] = pd.to_datetime(data_copy['date'])
        data_copy['month'] = data_copy['date'].dt.to_period('M')
        
        monthly_trends = data_copy.groupby('month').agg({
            'balance': 'sum',
            'dpd': 'mean',
            'utilization_ratio': 'mean'
        }).reset_index()
        
        if len(monthly_trends) > 1:
            fig, axes = plt.subplots(1, 3, figsize=self.figsize)
            
            monthly_trends.plot(x='month', y='balance', ax=axes[0], title='Monthly AUM Trend')
            monthly_trends.plot(x='month', y='dpd', ax=axes[1], title='Average DPD Trend', color='orange')
            monthly_trends.plot(x='month', y='utilization_ratio', ax=axes[2], title='Utilization Trend', color='green')
            
            plt.tight_layout()
            plt.show()

# Create visualizations
if not feature_frame.empty and kpi_summary:
    visualizer = FinancialVisualizer()
    visualizer.create_dashboard(kpi_summary, feature_frame)
    visualizer.plot_trend_analysis(feature_frame)
else:
    print("No data available for visualization")

In [None]:
# AI-Powered Financial Insights Engine
import json
from typing import List, Dict, Any, Optional

class FinancialInsightsGenerator:
    def __init__(self):
        self.insights = []
        
    def analyze_portfolio_health(self, kpis: Dict[str, Any], features: pd.DataFrame) -> List[str]:
        """Generate AI-powered insights about portfolio health"""
        insights = []
        
        if not kpis or features.empty:
            return ["Insufficient data for analysis"]
        
        # AUM Analysis
        aum = kpis.get('aum', 0)
        if aum > 100_000_000:  # 100M
            insights.append(f"🟢 Strong portfolio size: ${aum/1e6:.1f}M AUM indicates substantial business scale")
        elif aum > 10_000_000:  # 10M
            insights.append(f"🟡 Moderate portfolio: ${aum/1e6:.1f}M AUM shows growth potential")
        else:
            insights.append(f"🔴 Small portfolio: ${aum/1e6:.1f}M AUM suggests need for business expansion")
        
        # Risk Analysis
        default_rate = kpis.get('default_rate', 0)
        if default_rate > 0.05:  # 5%
            insights.append(f"🔴 HIGH RISK: Default rate of {default_rate:.1%} exceeds industry benchmarks")
        elif default_rate > 0.02:  # 2%
            insights.append(f"🟡 MODERATE RISK: Default rate of {default_rate:.1%} requires monitoring")
        else:
            insights.append(f"🟢 LOW RISK: Default rate of {default_rate:.1%} is within healthy limits")
        
        # Concentration Risk
        concentration = kpis.get('concentration_top10', 0)
        if concentration > 0.5:  # 50%
            insights.append(f"🔴 HIGH CONCENTRATION: Top 10 clients represent {concentration:.1%} of portfolio")
        elif concentration > 0.3:  # 30%
            insights.append(f"🟡 MODERATE CONCENTRATION: {concentration:.1%} in top 10 clients")
        else:
            insights.append(f"🟢 DIVERSIFIED: Well-distributed portfolio with {concentration:.1%} concentration")
        
        # Profitability Analysis
        weighted_apr = kpis.get('weighted_apr', 0)
        if weighted_apr > 0.15:  # 15%
            insights.append(f"🟢 STRONG YIELDS: Weighted APR of {weighted_apr:.1%} indicates good pricing power")
        elif weighted_apr > 0.08:  # 8%
            insights.append(f"🟡 MODERATE YIELDS: APR of {weighted_apr:.1%} is competitive")
        else:
            insights.append(f"🔴 LOW YIELDS: APR of {weighted_apr:.1%} may impact profitability")
        
        # Alert Analysis
        if not alerts_frame.empty:
            critical_alerts = len(alerts_frame[alerts_frame['severity'] == 'critical'])
            if critical_alerts > 0:
                insights.append(f"⚠️ URGENT: {critical_alerts} critical alerts require immediate attention")
        
        # Customer Mix Analysis
        if 'customer_type' in features.columns:
            customer_mix = features['customer_type'].value_counts(normalize=True)
            if customer_mix.get('enterprise', 0) > 0.3:
                insights.append("🟢 STABLE BASE: Strong enterprise customer presence provides stability")
            if customer_mix.get('micro', 0) > 0.5:
                insights.append("🟡 RISK PROFILE: High micro-customer exposure increases operational complexity")
        
        # B2G Analysis
        b2g_percent = kpis.get('b2g_percent', 0)
        if b2g_percent > 0.2:  # 20%
            insights.append(f"🟢 STABILITY: {b2g_percent:.1%} government/public sector exposure adds stability")
        
        return insights
    
    def generate_recommendations(self, insights: List[str], kpis: Dict[str, Any]) -> List[str]:
        """Generate actionable recommendations based on insights"""
        recommendations = []
        
        # Risk-based recommendations
        default_rate = kpis.get('default_rate', 0)
        if default_rate > 0.05:
            recommendations.extend([
                "📋 Implement enhanced credit scoring models",
                "🔍 Conduct portfolio stress testing",
                "⚖️ Review and tighten underwriting criteria"
            ])
        
        # Concentration recommendations
        concentration = kpis.get('concentration_top10', 0)
        if concentration > 0.4:
            recommendations.extend([
                "📈 Diversify customer acquisition strategy",
                "🎯 Set concentration limits per client",
                "🌐 Expand into new market segments"
            ])
        
        # Profitability recommendations
        weighted_apr = kpis.get('weighted_apr', 0)
        if weighted_apr < 0.1:
            recommendations.extend([
                "💰 Review pricing strategy for competitive positioning",
                "📊 Implement risk-based pricing models",
                "🔄 Optimize cost of funds"
            ])
        
        # Operational recommendations (always relevant)
        recommendations.extend([
            "🤖 Implement automated monitoring dashboards",
            "📱 Deploy real-time alert systems",
            "📈 Establish monthly portfolio review cycles",
            "🎯 Define clear KPI targets and thresholds"
        ])
        
        return recommendations[:8]  # Limit to top 8 recommendations

# Generate insights and recommendations
insights_generator = FinancialInsightsGenerator()

if kpi_summary and not feature_frame.empty:
    portfolio_insights = insights_generator.analyze_portfolio_health(kpi_summary, feature_frame)
    recommendations = insights_generator.generate_recommendations(portfolio_insights, kpi_summary)
    
    print("=" * 80)
    print("🧠 ABACO FINANCIAL INTELLIGENCE INSIGHTS")
    print("=" * 80)
    
    print("\n📊 PORTFOLIO HEALTH ANALYSIS:")
    for i, insight in enumerate(portfolio_insights, 1):
        print(f"{i:2d}. {insight}")
    
    print("\n🎯 STRATEGIC RECOMMENDATIONS:")
    for i, rec in enumerate(recommendations, 1):
        print(f"{i:2d}. {rec}")
    
    print(f"\n📋 EXECUTIVE SUMMARY:")
    print(f"Portfolio Size: ${kpi_summary.get('aum', 0)/1e6:.1f}M | Active Clients: {kpi_summary.get('active_clients', 0):,}")
    print(f"Risk Profile: {kpi_summary.get('default_rate', 0):.2%} default rate | APR: {kpi_summary.get('weighted_apr', 0):.1%}")
    print(f"Alerts: {len(alerts_frame)} total ({len(alerts_frame[alerts_frame['severity'] == 'critical']) if not alerts_frame.empty else 0} critical)")
    print("=" * 80)
else:
    print("Insufficient data for AI insights generation")

In [None]:
# Export and Reporting Engine
import datetime
import os
from pathlib import Path

class FinancialReportExporter:
    def __init__(self, base_path: str = "/workspaces/nextjs-with-supabase/data"):
        self.base_path = Path(base_path)
        self.base_path.mkdir(exist_ok=True)
        
    def export_summary_report(self, kpis: Dict[str, Any], insights: List[str], recommendations: List[str]) -> str:
        """Export comprehensive summary report"""
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"abaco_financial_report_{timestamp}.md"
        filepath = self.base_path / filename
        
        report_content = f"""# Abaco Financial Intelligence Report
Generated: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

## Executive Summary
- **Portfolio Size**: ${kpis.get('aum', 0)/1e6:.2f}M AUM
- **Active Clients**: {kpis.get('active_clients', 0):,}
- **Default Rate**: {kpis.get('default_rate', 0):.2%}
- **Weighted APR**: {kpis.get('weighted_apr', 0):.2%}

## Key Performance Indicators
| Metric | Value |
|--------|-------|
| AUM | ${kpis.get('aum', 0)/1e6:.2f}M |
| Active Clients | {kpis.get('active_clients', 0):,} |
| Credit Lines | ${kpis.get('credit_lines', 0)/1e6:.2f}M |
| Default Rate | {kpis.get('default_rate', 0):.2%} |
| Churn Rate | {kpis.get('churn_rate', 0):.2%} |
| Weighted APR | {kpis.get('weighted_apr', 0):.2%} |
| Concentration (Top 10) | {kpis.get('concentration_top10', 0):.2%} |
| B2G Exposure | {kpis.get('b2g_percent', 0):.2%} |

## Portfolio Health Insights
"""
        for i, insight in enumerate(insights, 1):
            report_content += f"{i}. {insight}\n"
        
        report_content += f"""
## Strategic Recommendations
"""
        for i, rec in enumerate(recommendations, 1):
            report_content += f"{i}. {rec}\n"
        
        report_content += f"""
## Data Quality Assessment
- **Overall Score**: {quality_artifacts.get('score', 0):.1%}
- **PDF Processing**: {quality_artifacts.get('pdf_completeness', 0):.1%}
- **Active Alerts**: {len(alerts_frame)} ({len(alerts_frame[alerts_frame['severity'] == 'critical']) if not alerts_frame.empty else 0} critical)

## Technical Notes
- Analysis based on {len(feature_frame)} portfolio records
- Feature engineering completed with {len(feature_frame.columns)} attributes
- All calculations use industry-standard methodologies
- Risk metrics aligned with Basel III guidelines

---
*Generated by Abaco Financial Intelligence Engine*
"""
        
        # Write report to file
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(report_content)
        
        return str(filepath)
    
    def export_data_tables(self) -> Dict[str, str]:
        """Export processed data tables"""
        exports = {}
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Export feature frame
        if not feature_frame.empty:
            feature_path = self.base_path / f"feature_data_{timestamp}.csv"
            feature_frame.to_csv(feature_path, index=False)
            exports['features'] = str(feature_path)
        
        # Export alerts
        if not alerts_frame.empty:
            alerts_path = self.base_path / f"alerts_{timestamp}.csv"
            alerts_frame.to_csv(alerts_path, index=False)
            exports['alerts'] = str(alerts_path)
        
        # Export KPIs
        if kpi_summary:
            kpi_path = self.base_path / f"kpis_{timestamp}.json"
            # Convert pandas objects to serializable format
            kpi_serializable = {}
            for k, v in kpi_summary.items():
                if isinstance(v, pd.Series):
                    kpi_serializable[k] = v.to_dict()
                elif hasattr(v, 'item'):  # numpy scalar
                    kpi_serializable[k] = v.item()
                else:
                    kpi_serializable[k] = v
            
            with open(kpi_path, 'w') as f:
                json.dump(kpi_serializable, f, indent=2, default=str)
            exports['kpis'] = str(kpi_path)
        
        return exports

# Generate and export reports
if kpi_summary and not feature_frame.empty:
    exporter = FinancialReportExporter()
    
    # Export summary report
    report_path = exporter.export_summary_report(
        kpi_summary, 
        portfolio_insights if 'portfolio_insights' in locals() else [], 
        recommendations if 'recommendations' in locals() else []
    )
    
    # Export data tables
    data_exports = exporter.export_data_tables()
    
    print(f"\n📄 REPORTS GENERATED:")
    print(f"📋 Summary Report: {report_path}")
    
    for data_type, path in data_exports.items():
        print(f"📊 {data_type.title()} Data: {path}")
    
    print(f"\n✅ Financial intelligence analysis complete!")
    print(f"📁 All files saved to: /workspaces/nextjs-with-supabase/data/")
else:
    print("⚠️ Insufficient data for report generation")