In [0]:
from pyspark.sql.functions import count, sum, abs, round

# Volume Analysis by Portability Type
volumetric_analysis = (spark.table("cvm_p210.gold_cvm210_analytics")
    .filter("is_portabilidade = 'Sim'")
    .groupBy("classificacao_negocio")
    .agg(
        count("CNPJ_FUNDO").alias("fund_count"),
        round(sum(abs("fluxo_liquido")), 2).alias("total_volume_moved")
    )
    .orderBy("total_volume_moved", ascending=False)
)

print("📊 Executive Summary: Portability Movements (January/2026)")
display(volumetric_analysis)

📊 Executive Summary: Portability Movements (January/2026)


business_classification,fund_count,total_volume_moved
Inbound Portability (Success),671,50816910970.77
Defensive Portability (Loss),516,38006682987.62


In [0]:
# ==========================================================================================================
# STRATEGIC NOTE - PORTABILITY INTELLIGENCE (CVM 210)
# ==========================================================================================================
# CONTEXT: Although CVM does not provide direct identification of origin/destination Payment Institutions (PIs)
# due to banking secrecy, this pipeline reconstructs portability logic through anomalous net flows.
#
# IMPORTANCE OF THE SOLUTION:
# 1. SHIELDING (RETENTION): Early identification of 'Outbound Portability' is vital for the survival 
#    of the asset manager. Losing net assets (PL) directly impacts the Management Fee and fund health.
#    The data generated here allows the specialist team to act preventatively to retain the investor.
#
# 2. PROSPECTING (ATTACK): Identifying inflow flows allows the CRM and Specialist team to act at the 
#    moment of highest investor confidence, consolidating the relationship and seeking 'Share of Wallet'.
#
# 3. CRM INTEGRATION: This code is not just a report, it is a task engine for the commercial team.


# The correct approach would be to have the client's agency and account to send the client who requested portability to their investment specialist.
# ==========================================================================================================


from pyspark.sql.functions import col, abs

# 1. Filtering Critical Targets (Massive Outflow Only)
df_outflow_risk = (spark.table("cvm_p210.gold_cvm210_analytics")
    .filter("classificacao_negocio = 'Portabilidade Defensiva (Perda)'")
    .orderBy(col("fluxo_liquido").asc()) # From largest loss to smallest
)

# 2. Professional Email Body Simulation for the Specialist
def generate_commercial_email_body(fund_list):
    text = f"""
    SUBJECT: [URGENT] CVM 210 Monitoring - Asset Outflow Alert
    
    Dear Investment Specialist,

    Automated portability monitoring has identified ATYPICAL OUTFLOW flows in the latest reporting periods. 
    This behavior suggests portability movements (CVM 210) that could severely impact the managed Net Assets (PL).

    AT-RISK FUNDS DATA:
    {fund_list}

    RECOMMENDED ACTION:
    - Prioritize contact with the shareholders of these funds via CRM.
    - Evaluate if the outflow is linked to recent performance or competing market movements.
    - Activate retention protocol (Capital Shielding) to mitigate further redemptions.

    Proactivity at this stage is our main tool to ensure the stability of the assets under management.

    Sincerely,
    Data Intelligence Bot - Databricks
    """
    return text

# Collecting the top 3 worst cases for the example
top_risks = df_outflow_risk.limit(3).toPandas()
list_str = ""
for index, row in top_risks.iterrows():
    list_str += f"- Fund: {row['CNPJ_FUNDO']} | Estimated Loss: $ {abs(row['fluxo_liquido']):,.2f}\n"

print(generate_commercial_email_body(list_str))