In [6]:
from data import loader, exporter
from constant import *
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder

In [13]:
file_names = {
    "alter": "XW_ENTINFO_ALTER",
    "basic": "XW_ENTINFO_BASIC",
    "person": "XW_ENTINFO_PERSON",
    "yrpinfo": "XW_ENTINFO_YRPINFO",
    "punished": "XW_ENTINFO_PUNISHED",
    "punishbreak": "XW_ENTINFO_PUNISHBREAK",
    "finalcase": "XW_ENTINFO_FINALCASE",
    "shareholder": "XW_ENTINFO_SHAREHOLDER",
    "taxdeclare": "XW_ENTINFO_TAXDECLARE",
    "fncl_tr_dtal": "XW_ENTINFO_FNCL_TR_DTAL",
    "target": "XW_ENTINFO_TARGET"
}


def load_and_merge_data(file_names, loader):

    df_target = loader.to_df(os.path.join(dir_preprocess,'target.csv'))
    
    # 去除重复的 CUST_NO
    df_target = df_target.drop_duplicates(subset='CUST_NO')

    # 2. 依次加载并合并其他表
    merged_df = df_target.copy()
    for key, file_name in file_names.items():
        if key != 'target':  # 跳过 target 表
            print(f"Merging {file_name}...")
            df = loader.to_df(os.path.join(dir_preprocess,f'{key}.csv'))
            # 左连接
            merged_df = pd.merge(merged_df, df, on=['CUST_NO'], how='left')

    return merged_df


merged_data = load_and_merge_data(file_names, loader)

exporter.export_df_to_preprocess('v1',merged_data)

merged_data

Merging XW_ENTINFO_ALTER...
Merging XW_ENTINFO_BASIC...
Merging XW_ENTINFO_PERSON...
Merging XW_ENTINFO_YRPINFO...
Merging XW_ENTINFO_PUNISHED...
Merging XW_ENTINFO_PUNISHBREAK...
Merging XW_ENTINFO_FINALCASE...
Merging XW_ENTINFO_SHAREHOLDER...
Merging XW_ENTINFO_TAXDECLARE...
Merging XW_ENTINFO_FNCL_TR_DTAL...


Unnamed: 0,CUST_NO,SRC,FLAG,ALTER_COUNT,DAYS_SINCE_LAST_ALTER,ALTER_TYPE_DIVERSITY,INVESTOR_CHANGE_COUNT,LEGAL_REP_CHANGE_COUNT,REGCAP,LIFE_SPAN,...,MAX_SALES,TOTAL_TAX,AVG_TAX,MAX_TAX,TRANSACTION_COUNT,TOTAL_AMOUNT,AVG_AMOUNT,MAX_AMOUNT,COUNTERPART_DIVERSITY,CHANNEL_DIVERSITY
0,158a8d99bec2a2b652a6de45a2b52ec9,train,0.0,4.0,855.0,3.0,0.0,0.0,275.77,20.010959,...,77.05,59.82,14.955,30.0,34.0,1002.70,29.491176,106.75,2.0,5.0
1,b1d244a25a82adb7beafe33fe971402c,train,0.0,1.0,1309.0,1.0,0.0,0.0,218.88,,...,,,,,8.0,101.32,12.665000,16.03,1.0,1.0
2,85b1ab1270516d2ebe21ed00c6abbf27,train,0.0,2.0,1073.0,2.0,0.0,0.0,275.77,30.016438,...,,,,,72.0,2583.23,35.878194,85.69,6.0,7.0
3,ef194610bdbecdea9af3cc23bceba8b2,train,0.0,9.0,1486.0,9.0,0.0,0.0,341.56,,...,,,,,185.0,13126.58,70.954486,397.73,5.0,7.0
4,1220f9592fdd0b3fa9bbbd90e6d69d84,train,0.0,5.0,281.0,5.0,1.0,0.0,59.41,,...,,,,,20.0,303.76,15.188000,47.16,2.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59074,c3e7a6f3977f5e8faab6f12e2774f623,test,,7.0,1260.0,6.0,0.0,0.0,275.77,,...,,,,,37.0,1114.25,30.114865,161.37,3.0,4.0
59075,362971a7ef62e55ff7aad8109ab93829,test,,,,,,,128.00,,...,,,,,37.0,1015.50,27.445946,128.00,3.0,5.0
59076,458746be9ea05dd028cb3679090e4584,test,,2.0,2598.0,2.0,0.0,0.0,161.27,,...,,,,,401.0,10843.12,27.040200,101.59,40.0,6.0
59077,60452ece68500add14a81ac11184bb7a,test,,8.0,2228.0,8.0,0.0,0.0,218.88,,...,,,,,61.0,1148.93,18.834918,53.00,5.0,5.0
