In [2]:
from data import loader, exporter
from constant import *
import pandas as pd

In [9]:
def process_fncl_tr_dtal_data(df):
    # 1. 处理日期字段，确保它们是 %Y%m%d 格式
    df['TR_DAT'] = pd.to_datetime(df['TR_DAT'], format='%Y%m%d', errors='coerce')

    # 2. 计算每个企业的交易总次数
    transaction_count = df.groupby('CUST_NO').size().reset_index(name='TRANSACTION_COUNT')

    # 3. 计算每个企业的总交易金额、平均交易金额和最大交易金额
    amount_stats = df.groupby('CUST_NO')['RMB_TR_AMT'].agg(['sum', 'mean', 'max']).reset_index()
    amount_stats.columns = ['CUST_NO', 'TOTAL_AMOUNT', 'AVG_AMOUNT', 'MAX_AMOUNT']

    # 4. 统计每个企业的交易对手多样性
    counterpart_diversity = df.groupby('CUST_NO')['CPT_CUST_NO'].nunique().reset_index(name='COUNTERPART_DIVERSITY')

    # 5. 统计每个企业的交易渠道多样性
    channel_diversity = df.groupby('CUST_NO')['CHANL_CD'].nunique().reset_index(name='CHANNEL_DIVERSITY')

    # 合并所有特征
    df_fncl_tr_dtal_features = pd.merge(transaction_count, amount_stats, on='CUST_NO', how='left')
    df_fncl_tr_dtal_features = pd.merge(df_fncl_tr_dtal_features, counterpart_diversity, on='CUST_NO', how='left')
    df_fncl_tr_dtal_features = pd.merge(df_fncl_tr_dtal_features, channel_diversity, on='CUST_NO', how='left')

    return df_fncl_tr_dtal_features


df_fncl_tr_dtal = loader.to_concat_df('XW_ENTINFO_FNCL_TR_DTAL')
df_fncl_tr_dtal_features = process_fncl_tr_dtal_data(df_fncl_tr_dtal)

exporter.export_df_to_preprocess('fncl_tr_dtal', df_fncl_tr_dtal_features)
df_fncl_tr_dtal_features

Unnamed: 0,CUST_NO,TRANSACTION_COUNT,TOTAL_AMOUNT,AVG_AMOUNT,MAX_AMOUNT,COUNTERPART_DIVERSITY,CHANNEL_DIVERSITY
0,000034607497713173a75a0d9910cb52,134,3451.4,25.756716,161.27,2,5
1,0000dfb26b1e1fb3d96c92eb3f00a3d4,15,801.46,53.430667,178.33,1,6
2,0001f54424332282c4ee1dd10ef43e67,25,974.06,38.9624,128.0,1,4
3,0001f87942183f01cee6998d65092fe9,34,1683.04,49.501176,93.52,4,3
4,00021b96bf933024181336f4862bd1cb,98,4487.88,45.794694,122.41,5,6
