In [6]:
import pandas as pd
import numpy as np
from pathlib import Path

np.random.seed(42)

# Parameters
n_customers = 500
months = pd.date_range("2024-01-01", periods=12, freq="MS")

# Industries (20+)
industries = [
    "Manufacturing", "Logistics", "Retail", "Construction", "Tech SaaS",
    "Automotive", "Agriculture", "Energy", "Telecom", "Healthcare",
    "Pharmaceuticals", "Consumer Goods", "Financial Services", "Real Estate",
    "Hospitality", "Transportation", "Chemicals", "Metals & Mining",
    "Media & Entertainment", "Utilities", "Food & Beverage", "Professional Services"
]

# European countries sample
countries = [
    "Germany", "France", "Netherlands", "Italy", "Spain", "Poland", "Belgium",
    "Austria", "Sweden", "Norway", "Denmark", "Finland", "Portugal", "Ireland",
    "Czech Republic", "Switzerland"
]

# --- Base customer-level attributes ---
customer_ids = [f"C{1000+i}" for i in range(n_customers)]
base_industries = np.random.choice(industries, size=n_customers)
base_countries = np.random.choice(countries, size=n_customers)

# Base revenue level (M€), roughly lognormal
base_revenue = np.random.lognormal(mean=3.5, sigma=0.8, size=n_customers)
# Base EBITDA margin (0.01–0.40)
base_ebitda_margin = np.clip(
    np.random.normal(0.15, 0.07, size=n_customers),
    0.01,
    0.40
)
# Loan amount as fraction of revenue
base_loan_amount = base_revenue * np.random.uniform(0.05, 0.35, size=n_customers)
# Collateral coverage
base_collateral_ratio = np.random.uniform(0.5, 1.3, size=n_customers)
# PD/LGD
base_pd = np.clip(np.random.beta(2, 15, size=n_customers) * 100, 0.5, 30.0)  # %, 0.5–30
base_lgd = np.random.uniform(20, 75, size=n_customers)                        # %, 20–75
# Leverage & pricing
base_de_ratio = np.clip(np.random.normal(1.8, 0.9, size=n_customers), 0.1, 6.0)
base_interest_rate = np.random.uniform(3.0, 9.5, size=n_customers)            # %, 3–9.5

rows = []

for i, cust_id in enumerate(customer_ids):
    industry = base_industries[i]
    country = base_countries[i]
    revenue_level = base_revenue[i]
    ebitda_margin = base_ebitda_margin[i]
    loan_amount = base_loan_amount[i]
    collateral_ratio = base_collateral_ratio[i]
    pd_level = base_pd[i]
    lgd_level = base_lgd[i]
    de_level = base_de_ratio[i]
    base_ir = base_interest_rate[i]

    # Random starting rating (1 best, 10 worst)
    rating = np.random.randint(1, 10)
    dpd = max(0, int(np.random.normal(10 * (rating - 3), 15)))
    covenant_breach_flag = "Y" if (rating >= 7 or dpd > 60) else "N"

    for m in months:
        # Revenue evolves month to month
        revenue_growth = np.random.normal(0.003, 0.04)  # around +0.3% with noise
        revenue_level = max(1.0, revenue_level * (1 + revenue_growth))

        # EBITDA margin drift with small noise
        ebitda_margin = float(
            np.clip(
                ebitda_margin + np.random.normal(0.0, 0.01),
                0.02,
                0.45
            )
        )
        ebitda = revenue_level * ebitda_margin

        # Cash metrics
        ocf = ebitda * np.random.uniform(0.7, 1.1)
        fcf = ocf - np.random.uniform(0.0, ebitda * 0.5)

        # Leverage & interest coverage signal
        net_debt_ebitda = float(
            np.clip(
                np.random.normal(de_level, 0.7),
                0.1,
                10.0
            )
        )
        # Approx interest expense ~ loan * rate
        approx_int_expense = max(0.1, loan_amount * base_ir / 100)
        interest_coverage = float(
            np.clip(
                np.random.normal(ebitda / approx_int_expense, 1.0),
                0.1,
                15.0
            )
        )

        # PD/LGD dynamics
        pd_shock = np.random.normal(0.0, 1.0)
        pd = float(
            np.clip(
                pd_level + pd_shock + 0.05 * (rating - 5),
                0.3,
                40.0
            )
        )
        lgd = float(
            np.clip(
                lgd_level + np.random.normal(0.0, 3.0),
                10.0,
                90.0
            )
        )

        # DPD dynamics
        dpd_change = int(np.random.normal(1.0 * (rating - 5), 10))
        dpd = max(0, dpd + dpd_change)

        # Rating migration rules (very simple)
        if dpd > 90 or pd > 25:
            rating = min(10, rating + 1)
        elif dpd == 0 and pd < 5 and rating > 1:
            rating = rating - 1

        # IFRS9-style Stage
        if dpd <= 30 and rating <= 6:
            stage = 1
        elif dpd <= 90:
            stage = 2
        else:
            stage = 3

        # Collateral & ROIC
        collateral_value = loan_amount * collateral_ratio * np.random.uniform(0.9, 1.05)
        roic = float(
            np.clip(
                np.random.normal(6.0 + (2 - stage), 2.0),
                -5.0,
                20.0
            )
        )

        # Covenant breach logic
        covenant_breach_flag = "Y" if (dpd > 60 or interest_coverage < 1.0 or rating >= 8) else "N"

        rows.append({
            "CustomerID": cust_id,
            "Month": m.strftime("%Y-%m-%d"),
            "Industry": industry,
            "Country": country,
            "Revenue_MEUR": round(revenue_level, 2),
            "EBITDA_MEUR": round(ebitda, 2),
            "EBITDA_Margin": round(ebitda_margin, 4),
            "Operating_Cash_Flow_MEUR": round(ocf, 2),
            "Free_Cash_Flow_MEUR": round(fcf, 2),
            "Loan_Amount_MEUR": round(loan_amount, 2),
            "Collateral_Value_MEUR": round(collateral_value, 2),
            "PD_pct": round(pd, 2),
            "LGD_pct": round(lgd, 2),
            "Interest_Rate_pct": round(base_ir, 2),
            "Days_Past_Due": dpd,
            "Risk_Rating_1to10": rating,
            "Net_Debt_to_EBITDA": round(net_debt_ebitda, 2),
            "Debt_to_Equity": round(de_level, 2),
            "Interest_Coverage": round(interest_coverage, 2),
            "ROIC_pct": round(roic, 2),
            "Stage": stage,
            "Covenant_Breach": covenant_breach_flag
        })
import pandas as pd
df = pd.DataFrame(rows)


In [9]:
output_path = Path(r"C:\Users\Thanh Le\Python") / "synthetic_corporate_loan_portfolio_500_customers.csv"
df.to_csv(output_path, index=False)
print(f"Saved dataset to: {output_path}")



Saved dataset to: C:\Users\Thanh Le\Python\synthetic_corporate_loan_portfolio_500_customers.csv
