In [1]:
from data import loader, exporter
from constant import *
import pandas as pd

In [3]:
import pandas as pd

def process_finalcase_data(df):
    # 1. 处理日期字段，确保它们是 %Y%m%d 格式
    df['FINALDATE'] = pd.to_datetime(df['FINALDATE'], format='%Y%m%d', errors='coerce')
    df['DATA_DAT'] = pd.to_datetime(df['DATA_DAT'], format='%Y%m%d', errors='coerce')

    # 2. 计算每个企业的终本案件总次数
    finalcase_count = df.groupby('CUST_NO').size().reset_index(name='FINALCASE_COUNT')

    # 3. 计算未履行总金额、平均未履行金额和最大未履行金额
    unperformed_amount_stats = df.groupby('CUST_NO')['UNPERFMONEY'].agg(['sum', 'mean', 'max']).reset_index()
    unperformed_amount_stats.columns = ['CUST_NO', 'UNPERFMONEY_TOTAL', 'UNPERFMONEY_AVG', 'UNPERFMONEY_MAX']

    # 4. 计算最新终本案件日期与当前日期的差值
    latest_finalcase_date = df.groupby('CUST_NO')['FINALDATE'].max().reset_index()
    latest_finalcase_with_date = pd.merge(latest_finalcase_date, df[['CUST_NO', 'DATA_DAT']].drop_duplicates(), on='CUST_NO', how='left')
    latest_finalcase_with_date['DAYS_SINCE_LAST_FINALCASE'] = (latest_finalcase_with_date['DATA_DAT'] - latest_finalcase_with_date['FINALDATE']).dt.days

    # 5. 计算未履行金额占比（UNPERFMONEY / EXECMONEY）
    df['UNPERFMONEY_RATIO'] = df['UNPERFMONEY'] / df['EXECMONEY']
    unperformed_ratio_stats = df.groupby('CUST_NO')['UNPERFMONEY_RATIO'].mean().reset_index()
    unperformed_ratio_stats.columns = ['CUST_NO', 'UNPERFMONEY_RATIO_AVG']

    # 合并所有特征
    df_finalcase_features = pd.merge(finalcase_count, unperformed_amount_stats, on='CUST_NO', how='left')
    df_finalcase_features = pd.merge(df_finalcase_features, latest_finalcase_with_date[['CUST_NO', 'DAYS_SINCE_LAST_FINALCASE']], on='CUST_NO', how='left')
    df_finalcase_features = pd.merge(df_finalcase_features, unperformed_ratio_stats, on='CUST_NO', how='left')

    return df_finalcase_features


df_finalcase = loader.to_concat_df('XW_ENTINFO_FINALCASE')
df_finalcase_features = process_finalcase_data(df_finalcase)

exporter.export_df_to_preprocess('finalcase', df_finalcase_features)
df_finalcase_features.head()


Unnamed: 0,CUST_NO,FINALCASE_COUNT,UNPERFMONEY_TOTAL,UNPERFMONEY_AVG,UNPERFMONEY_MAX,DAYS_SINCE_LAST_FINALCASE,UNPERFMONEY_RATIO_AVG
0,015cadd272042dc8f25e5015707ddebb,1,0.0,0.0,0.0,2780,
1,02df5bca12bbfa30b03c34d8b9bd4fa8,1,102.39,102.39,102.39,2958,1.009763
2,04e0fc38822587669d4c1031f8c770cd,1,0.0,0.0,0.0,2165,0.0
3,054db2322e9ab7b15eba0f67908ffde6,1,0.0,0.0,0.0,4617,0.0
4,061264b7227fa718e6e9fbe0ebb5b910,2,251.9,125.95,147.81,5,1.0
