In [48]:
import numpy as np
import pandas as pd
from scipy.stats import zscore

In [49]:
def load_data():
 bs=pd.read_csv(r'/Users/kangshiyu/Desktop/financial_data/balance_sheet_20231231.csv')
 ins=pd.read_csv(r'/Users/kangshiyu/Desktop/financial_data/income_statement_20231231.csv')
 cf=pd.read_csv(r'/Users/kangshiyu/Desktop/financial_data/cashflow_statement_20231231.csv')
 return bs,ins,cf

In [50]:
for df in [bs,ins,cf]:
    df.columns=[c.strip().replace('\n','') for c in df.columns]

In [51]:
def clean_data(bs,ins,cf):
    bs = bs[[
        "股票代码", "股票简称",
        "资产-货币资金", "资产-应收账款", "资产-存货",
        "资产-总资产",
        "负债-应付账款", "负债-预收账款",
        "负债-总负债",
        "资产负债率", "股东权益合计"
    ]].copy()
    bs.columns = [
        "stock_code", "company_name",
        "cash", "accounts_receivable", "inventory",
        "total_assets",
        "accounts_payable", "advance_receipts",
        "total_liabilities",
        "debt_asset_ratio", "total_equity"
    ]
    ins = ins[[
        "股票代码","股票简称",
        "营业总收入", "净利润", "营业总支出-营业总支出", "营业利润", "利润总额"
    ]].copy()
    ins.columns = [
        "stock_code","company_name",
        "revenue", "net_profit", "total_expenses", "operating_profit", "total_profit"
    ]
    cf = cf[[
        "股票代码", "经营性现金流-现金流量净额",
        "经营性现金流-净现金流占比", "净现金流-同比增长"
    ]].copy()
    cf.columns = [
        "stock_code",
        "operating_cash_flow",
        "ocf_ratio",
        "ocf_growth"
    ]
    df = bs.merge(ins, on="stock_code", how="left")
    df = df.merge(cf, on="stock_code", how="left")
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.fillna(0, inplace=True)
    return df    

In [53]:
def create_features(df):
    f = df.copy()
#Balance Sheet
    f["leverage"] = f["total_liabilities"] / f["total_assets"]                  # Leverage
    f["cash_to_debt"] = f["cash"] / f["total_liabilities"]                      #Cash to Debt Ratio
    f["operating_asset_ratio"] = (f["accounts_receivable"] + f["inventory"]) / f["total_assets"]  #Operating Asset Ratio
    f["operating_liability_ratio"] = (f["accounts_payable"] + f["advance_receipts"]) / f["total_liabilities"]  # Operating Liability Ratio
#Income Statement
    f["gross_margin"] = (f["revenue"] - f["total_expenses"]) / f["revenue"]   # Gross Margin
    f["net_profit_ratio"] = f["net_profit"] / f["revenue"]                  # Net Profit Ratio
    f["operating_profit_ratio"] = f["operating_profit"] / f["revenue"]             # Operating Profit Ratio
    f["profit_to_total_profit"] = f["net_profit"] / (f["total_profit"] + 1e-9)     # Profit to Total_Profit

#Cash Flow Statement
    f["cash_profit_ratio"] = f["operating_cash_flow"] / (f["net_profit"] + 1e-9)  # Cash Profit Ratio
    f["cashflow_to_debt"] = f["operating_cash_flow"] / (f["total_liabilities"] + 1e-9)  # Cashflow to Debt
    f["ocf_ratio"] = f["ocf_ratio"]  # OCF ratio
    f["ocf_growth"] = f["ocf_growth"]  # OCF Growth


    f.replace([np.inf, -np.inf], np.nan, inplace=True)
    f.fillna(0, inplace=True)

    return f

In [54]:
def build_risk_score_z(df):
    r=df.copy()
    high_risk=[
    "leverage","operating_asset_ratio","operating_liability_ratio","ocf_growth"]
    low_risk=[
    "cash_to_debt", "gross_margin", "net_profit_ratio", "operating_profit_ratio",
    "profit_to_total_profit", "cash_profit_ratio", "cashflow_to_debt", "ocf_ratio"]
    for col in high_risk:
        r[col + "_z"] = zscore(r[col])
    for col in low_risk:
        r[col + "_z"] = -zscore(r[col])
    score_cols = [c for c in r.columns if c.endswith("_z")]
    r[score_cols]=(r[score_cols]-r[score_cols].min())/(r[score_cols].max()-r[score_cols].min())*100
    r["risk_score"] = r[score_cols].mean(axis=1) 

    r[score_cols + ["risk_score"]] = r[score_cols + ["risk_score"]].round(3)


    return r[["stock_code", "company_name_x", "risk_score"] + score_cols]
    

In [55]:
if __name__ == "__main__":
    bs, ins, cf = load_data()
    df = clean_data(bs, ins, cf)
    df_feat = create_features(df)
    risk_result = build_risk_score_z(df_feat)

    risk_result.to_csv("/Users/kangshiyu/Desktop/corporate_riskscore.csv", index=False)

    print("Risk scoring completed. File saved as corporate_riskscore.csv")

Risk scoring completed. File saved as corporate_riskscore.csv
