In [None]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import gc
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

sns.set()
sns.set_style('whitegrid')
sns.set_color_codes()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [226]:
train_trans = pd.read_csv("/content/drive/MyDrive/프로젝트/train_transaction.csv")
train_idf = pd.read_csv("/content/drive/MyDrive/프로젝트/train_identity.csv")
test_trans = pd.read_csv("/content/drive/MyDrive/프로젝트/test_transaction.csv")
test_idf = pd.read_csv("/content/drive/MyDrive/프로젝트/test_identity.csv")

In [227]:
train_idf.columns = [col.replace('-', '_') if 'id' in col else col for col in train_idf.columns]
test_idf.columns = [col.replace('-', '_') if 'id' in col else col for col in test_idf.columns]

In [228]:
filtered_cols = [
    'TransactionID', 'isFraud', 'TransactionDT', 'TransactionAmt', 'ProductCD', 'card1', 'card2',
    'card3', 'card4', 'card5', 'card6', 'addr1', 'addr2', 'dist1', 'dist2', 'P_emaildomain',
    'R_emaildomain', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12',
    'C13', 'C14', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12',
    'D13', 'D14', 'D15', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9',
    'V1', 'V3', 'V4', 'V6', 'V8', 'V11', 'V13', 'V14', 'V17', 'V20', 'V23', 'V26', 'V27', 'V30',
    'V36', 'V37', 'V40', 'V41', 'V44', 'V47', 'V48', 'V54', 'V56', 'V59', 'V62', 'V65', 'V67', 'V68', 'V70',
    'V76', 'V78', 'V80', 'V82', 'V86', 'V88', 'V89', 'V91', 'V96', 'V98', 'V99', 'V104', 'V107', 'V108',
    'V111', 'V115', 'V117', 'V120', 'V121', 'V123', 'V124', 'V127', 'V129', 'V130', 'V136', 'V138', 'V139',
    'V142', 'V147', 'V156', 'V162', 'V165', 'V160', 'V166', 'V178', 'V176', 'V173', 'V182', 'V187', 'V203',
    'V205', 'V207', 'V215', 'V169', 'V171', 'V175', 'V180', 'V185', 'V188', 'V198', 'V210', 'V209', 'V218',
    'V223', 'V224', 'V226', 'V228', 'V229', 'V235', 'V240', 'V258', 'V257', 'V253', 'V252', 'V260', 'V261',
    'V264', 'V266', 'V267', 'V274', 'V277', 'V220', 'V221', 'V234', 'V238', 'V250', 'V271', 'V294', 'V284',
    'V285', 'V286', 'V291', 'V297', 'V303', 'V305', 'V307', 'V309', 'V310', 'V320', 'V281', 'V283', 'V289',
    'V296', 'V301', 'V314', 'V332', 'V325', 'V335', 'V338'
]
train_trans = train_trans[filtered_cols]
filtered_cols2 = [
    'TransactionID', 'TransactionDT', 'TransactionAmt', 'ProductCD', 'card1', 'card2',
    'card3', 'card4', 'card5', 'card6', 'addr1', 'addr2', 'dist1', 'dist2', 'P_emaildomain',
    'R_emaildomain', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12',
    'C13', 'C14', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12',
    'D13', 'D14', 'D15', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9',
    'V1', 'V3', 'V4', 'V6', 'V8', 'V11', 'V13', 'V14', 'V17', 'V20', 'V23', 'V26', 'V27', 'V30',
    'V36', 'V37', 'V40', 'V41', 'V44', 'V47', 'V48', 'V54', 'V56', 'V59', 'V62', 'V65', 'V67', 'V68', 'V70',
    'V76', 'V78', 'V80', 'V82', 'V86', 'V88', 'V89', 'V91', 'V96', 'V98', 'V99', 'V104', 'V107', 'V108',
    'V111', 'V115', 'V117', 'V120', 'V121', 'V123', 'V124', 'V127', 'V129', 'V130', 'V136', 'V138', 'V139',
    'V142', 'V147', 'V156', 'V162', 'V165', 'V160', 'V166', 'V178', 'V176', 'V173', 'V182', 'V187', 'V203',
    'V205', 'V207', 'V215', 'V169', 'V171', 'V175', 'V180', 'V185', 'V188', 'V198', 'V210', 'V209', 'V218',
    'V223', 'V224', 'V226', 'V228', 'V229', 'V235', 'V240', 'V258', 'V257', 'V253', 'V252', 'V260', 'V261',
    'V264', 'V266', 'V267', 'V274', 'V277', 'V220', 'V221', 'V234', 'V238', 'V250', 'V271', 'V294', 'V284',
    'V285', 'V286', 'V291', 'V297', 'V303', 'V305', 'V307', 'V309', 'V310', 'V320', 'V281', 'V283', 'V289',
    'V296', 'V301', 'V314', 'V332', 'V325', 'V335', 'V338'
]
test_trans = test_trans[filtered_cols2]

In [229]:
train_trans['TransactionDay'] = train_trans['TransactionDT'] // (24 * 60 * 60)
test_trans['TransactionDay'] = test_trans['TransactionDT'] // (24 * 60 * 60)

In [230]:
train_trans['TransactionDT2'] = train_trans['TransactionDT']
test_trans['TransactionDT2'] = test_trans['TransactionDT']

In [232]:
for col in train_trans.columns:
    for i in range(1, 16):
      if i != 9:
        train_trans[f'D{i}n'] = train_trans['TransactionDay'] - train_trans[f'D{i}']
        test_trans[f'D{i}n'] = test_trans['TransactionDay'] - test_trans[f'D{i}']

In [233]:
train_trans['uid'] = train_trans.card1.astype(str) + '_' + train_trans.addr1.astype(str) + '_' + train_trans.D1n.astype(str)

In [234]:
test_trans['uid'] = train_trans.card1.astype(str) + '_' + train_trans.addr1.astype(str) + '_' + train_trans.D1n.astype(str)

In [235]:
import datetime
import pandas as pd

START_DATE = datetime.datetime.strptime('2017-11-30', '%Y-%m-%d')

# TransactionDT가 이미 datetime 유형인 경우
train_trans['TransactionDT'] = pd.to_datetime(train_trans['TransactionDT'])
test_trans['TransactionDT'] = pd.to_datetime(test_trans['TransactionDT'])

# 년도와 월을 이용해 DT_M 계산
train_trans['DT_M'] = (train_trans['TransactionDT'].dt.year - 2017) * 12 + train_trans['TransactionDT'].dt.month
test_trans['DT_M'] = (test_trans['TransactionDT'].dt.year - 2017) * 12 + test_trans['TransactionDT'].dt.month

# TransactionDTday 계산
train_trans['TransactionDTday'] = train_trans['TransactionDT'].dt.day
test_trans['TransactionDTday'] = test_trans['TransactionDT'].dt.day

# 확인
print(train_trans[['TransactionDT', 'DT_M', 'TransactionDTday']].head())
print(test_trans[['TransactionDT', 'DT_M', 'TransactionDTday']].head())


                  TransactionDT  DT_M  TransactionDTday
0 1970-01-01 00:00:00.000086400  -563                 1
1 1970-01-01 00:00:00.000086401  -563                 1
2 1970-01-01 00:00:00.000086469  -563                 1
3 1970-01-01 00:00:00.000086499  -563                 1
4 1970-01-01 00:00:00.000086506  -563                 1
                  TransactionDT  DT_M  TransactionDTday
0 1970-01-01 00:00:00.018403224  -563                 1
1 1970-01-01 00:00:00.018403263  -563                 1
2 1970-01-01 00:00:00.018403310  -563                 1
3 1970-01-01 00:00:00.018403310  -563                 1
4 1970-01-01 00:00:00.018403317  -563                 1


In [236]:
train_trans['TransactionDT2'] = pd.to_datetime(train_trans['TransactionDT2'], unit='s', origin=START_DATE)
test_trans['TransactionDT2'] = pd.to_datetime(test_trans['TransactionDT2'], unit='s', origin=START_DATE)

client_activity = train_trans.groupby('uid')['TransactionDT2'].agg(['min', 'max'])
client_activity['duration'] = (client_activity['max'] - client_activity['min']).dt.days

# 활동 기간 피처 추가
train_trans = train_trans.merge(client_activity['duration'], on='uid', how='left')
test_trans = test_trans.merge(client_activity['duration'], on='uid', how='left')
train_trans.rename(columns={'duration': 'client_activity_duration'}, inplace=True)
test_trans.rename(columns={'duration': 'client_activity_duration'}, inplace=True)

In [251]:
def encode_AG(main_columns, uids, aggregations=['mean'], train_df = train_trans, test_df = test_trans,
              fillna=True, usena=False):
    # AGGREGATION OF MAIN WITH UID FOR GIVEN STATISTICS
    for main_column in main_columns:
        for col in uids:
            for agg_type in aggregations:
                new_col_name = main_column+'_'+col+'_'+agg_type
                temp_df = pd.concat([train_df[[col, main_column]], test_df[[col,main_column]]])
                if usena: temp_df.loc[temp_df[main_column]==-1,main_column] = np.nan
                temp_df = temp_df.groupby([col])[main_column].agg([agg_type]).reset_index().rename(
                                                        columns={agg_type: new_col_name})

                temp_df.index = list(temp_df[col])
                temp_df = temp_df[new_col_name].to_dict()

                train_df[new_col_name] = train_df[col].map(temp_df).astype('float32')
                test_df[new_col_name]  = test_df[col].map(temp_df).astype('float32')

                if fillna:
                    train_df[new_col_name].fillna(-1,inplace=True)
                    test_df[new_col_name].fillna(-1,inplace=True)

                print("'"+new_col_name+"'",', ',end='')

In [238]:
def encode_AG2(main_columns, uids, train_df=train_trans, test_df=test_trans):
    for main_column in main_columns:
        for col in uids:
            comb = pd.concat([train_df[[col, main_column]],test_df[[col, main_column]]],axis=0)
            mp = comb.groupby(col)[main_column].agg(['nunique'])['nunique'].to_dict()
            train_df[col+'_'+main_column+'_ct'] = train_df[col].map(mp).astype('float32')
            test_df[col+'_'+main_column+'_ct'] = test_df[col].map(mp).astype('float32')
            print(col+'_'+main_column+'_ct, ',end='')

In [239]:
def encode_FE(df1, df2, cols):
    for col in cols:
        df = pd.concat([df1[col],df2[col]])
        vc = df.value_counts(dropna=True, normalize=True).to_dict()
        vc[-1] = -1
        nm = col+'_FE'
        df1[nm] = df1[col].map(vc)
        df1[nm] = df1[nm].astype('float32')
        df2[nm] = df2[col].map(vc)
        df2[nm] = df2[nm].astype('float32')
        print(nm,', ',end='')

In [240]:
def encode_CB(col1,col2,df1=train_trans,df2=test_trans):
    nm = col1+'_'+col2
    df1[nm] = df1[col1].astype(str)+'_'+df1[col2].astype(str)
    df2[nm] = df2[col1].astype(str)+'_'+df2[col2].astype(str)
    print(nm,', ',end='')

In [241]:
def encode_LE(columns, train_df, test_df, verbose=True):
    for col in columns:
        df_comb = pd.concat([train_df[col], test_df[col]], axis=0)
        df_comb, _ = df_comb.factorize(sort=True)
        nm = col
        if df_comb.max() > 32000:
            train_df[nm] = df_comb[:len(train_df)].astype('int32')
            test_df[nm] = df_comb[len(train_df):].astype('int32')
        else:
            train_df[nm] = df_comb[:len(train_df)].astype('int16')
            test_df[nm] = df_comb[len(train_df):].astype('int16')
        del df_comb
        gc.collect()
        if verbose:
            print(nm, ', ', end='')

In [242]:
encode_AG2(['TransactionAmt', 'D4n', 'D10n', 'D15n'], ['uid'])

uid_TransactionAmt_ct, uid_D4n_ct, uid_D10n_ct, uid_D15n_ct, 

In [243]:
encode_AG(['D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15'], ['uid'], aggregations=['mean', 'std'])

'D1_uid_mean' , 'D1_uid_std' , 'D2_uid_mean' , 'D2_uid_std' , 'D3_uid_mean' , 'D3_uid_std' , 'D4_uid_mean' , 'D4_uid_std' , 'D5_uid_mean' , 'D5_uid_std' , 'D6_uid_mean' , 'D6_uid_std' , 'D7_uid_mean' , 'D7_uid_std' , 'D8_uid_mean' , 'D8_uid_std' , 'D10_uid_mean' , 'D10_uid_std' , 'D11_uid_mean' , 'D11_uid_std' , 'D12_uid_mean' , 'D12_uid_std' , 'D13_uid_mean' , 'D13_uid_std' , 'D14_uid_mean' , 'D14_uid_std' , 'D15_uid_mean' , 'D15_uid_std' , 

In [244]:
encode_AG(['D1n', 'D2n', 'D3n', 'D4n', 'D5n', 'D6n', 'D7n', 'D8n', 'D10n', 'D11n', 'D12n', 'D13n', 'D14n', 'D15n'], ['uid'], aggregations=['mean', 'std'])

'D1n_uid_mean' , 'D1n_uid_std' , 'D2n_uid_mean' , 'D2n_uid_std' , 'D3n_uid_mean' , 'D3n_uid_std' , 'D4n_uid_mean' , 'D4n_uid_std' , 'D5n_uid_mean' , 'D5n_uid_std' , 'D6n_uid_mean' , 'D6n_uid_std' , 'D7n_uid_mean' , 'D7n_uid_std' , 'D8n_uid_mean' , 'D8n_uid_std' , 'D10n_uid_mean' , 'D10n_uid_std' , 'D11n_uid_mean' , 'D11n_uid_std' , 'D12n_uid_mean' , 'D12n_uid_std' , 'D13n_uid_mean' , 'D13n_uid_std' , 'D14n_uid_mean' , 'D14n_uid_std' , 'D15n_uid_mean' , 'D15n_uid_std' , 

In [245]:
encode_AG2(['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10'], ['uid'])
encode_AG(['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10'], ['uid'], aggregations=['mean', 'std'])

uid_C1_ct, uid_C2_ct, uid_C3_ct, uid_C4_ct, uid_C5_ct, uid_C6_ct, uid_C7_ct, uid_C8_ct, uid_C9_ct, uid_C10_ct, 'C1_uid_mean' , 'C1_uid_std' , 'C2_uid_mean' , 'C2_uid_std' , 'C3_uid_mean' , 'C3_uid_std' , 'C4_uid_mean' , 'C4_uid_std' , 'C5_uid_mean' , 'C5_uid_std' , 'C6_uid_mean' , 'C6_uid_std' , 'C7_uid_mean' , 'C7_uid_std' , 'C8_uid_mean' , 'C8_uid_std' , 'C9_uid_mean' , 'C9_uid_std' , 'C10_uid_mean' , 'C10_uid_std' , 

In [246]:
# 거리 정보 관련 피처 생성
train_trans['log_dist1'] = np.log1p(train_trans['dist1'])
test_trans['log_dist1'] = np.log1p(test_trans['dist1'])
train_trans['log_dist2'] = np.log1p(train_trans['dist2'])
test_trans['log_dist2'] = np.log1p(test_trans['dist2'])
encode_AG(['dist1', 'dist2'], ['uid'], aggregations=['mean', 'std'])

'dist1_uid_mean' , 'dist1_uid_std' , 'dist2_uid_mean' , 'dist2_uid_std' , 

In [247]:
# 거래 금액 관련 피처 생성
train_trans['log_TransactionAmt'] = np.log1p(train_trans['TransactionAmt'])
test_trans['log_TransactionAmt'] = np.log1p(test_trans['TransactionAmt'])
encode_FE(train_trans, test_trans, ['TransactionAmt'])

TransactionAmt_FE , 

In [250]:
train_trans.uid

0          13926_315.0_-13.0
1             2755_325.0_1.0
2             4663_330.0_1.0
3         18132_476.0_-111.0
4             4497_420.0_1.0
                 ...        
590535      6550_272.0_153.0
590536     10444_204.0_182.0
590537     12037_231.0_182.0
590538      7826_387.0_160.0
590539     15066_299.0_182.0
Name: uid, Length: 590540, dtype: object

In [252]:
encode_AG(['log_dist1', 'log_TransactionAmt'], 'uid', train_, test_)

KeyError: "['u'] not in index"

TransactionAmt (거래 금액)
- 추천 인코딩: 로그 변환, 빈도 인코딩
- 이유: 거래 금액의 분포를 정규화하고, 특정 금액의 빈도 정보를 얻기 위해.
- 가설: 특정 금액대에서 사기 거래의 비율이 높을 수 있습니다.

In [253]:
encode_CB('P_emaildomain', 'R_emaildomain', train_trans, test_trans)
encode_FE(train_trans, test_trans, ['P_emaildomain', 'R_emaildomain'])

P_emaildomain_R_emaildomain , P_emaildomain_FE , R_emaildomain_FE , 

In [254]:
id_feature = [ c for c in train_trans.columns if c.find('id_') !=-1]
v_feature = [ c for c in train_trans.columns if c.find('V') !=-1]
card_feature = [ c for c in train_trans.columns if c.find('card') !=-1]
C_feature = [ c for c in train_trans.columns if c.find('C') !=-1 and c != 'ProductCD']
D_feature = [ c for c in train_trans.columns if c.find('n') == -1 and c.find('D') !=-1 and c not in ['ProductCD','TransactionID','TransactionDT','DeviceType','DeviceInfo','TransactionDay', 'DT_M']]
Dn_feature = [ c for c in train_trans.columns if c.find('n') != -1 and c.find('D') !=-1 and c not in ['ProductCD','TransactionID','TransactionDT','DeviceType','DeviceInfo','TransactionDay', 'DT_M']]
M_feature = [ c for c in train_trans.columns if c.find('M') !=-1]

In [255]:
mapping = {'T': 1, 'F': 0, 'M0': 0, 'M1': 1, 'M2': 2}
for col in M_feature:
    train_trans[col] = train_trans[col].map(mapping)
    test_trans[col] = test_trans[col].map(mapping)

In [249]:
def encode_LE(columns, train_df, test_df, verbose=True):
    for col in columns:
        df_comb = pd.concat([train_df[col], test_df[col]], axis=0)
        df_comb, _ = df_comb.factorize(sort=True)
        nm = col
        if df_comb.max() > 32000:
            train_df[nm] = df_comb[:len(train_df)].astype('int32')
            test_df[nm] = df_comb[len(train_df):].astype('int32')
        else:
            train_df[nm] = df_comb[:len(train_df)].astype('int16')
            test_df[nm] = df_comb[len(train_df):].astype('int16')
        del df_comb
        gc.collect()
        if verbose:
            print(nm, ', ', end='')

In [256]:
train_trans.head(1)

Unnamed: 0,TransactionID,isFraud,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,addr2,dist1,dist2,P_emaildomain,R_emaildomain,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15,M1,M2,M3,M4,M5,M6,M7,M8,M9,V1,V3,V4,V6,V8,V11,V13,V14,V17,V20,V23,V26,V27,V30,V36,V37,V40,V41,V44,V47,V48,V54,V56,V59,V62,V65,V67,V68,V70,V76,V78,V80,V82,V86,V88,V89,V91,V96,V98,V99,V104,V107,V108,V111,V115,V117,V120,V121,V123,V124,V127,V129,V130,V136,V138,V139,V142,V147,V156,V162,V165,V160,V166,V178,V176,V173,V182,V187,V203,V205,V207,V215,V169,V171,V175,V180,V185,V188,V198,V210,V209,V218,V223,V224,V226,V228,V229,V235,V240,V258,V257,V253,V252,V260,V261,V264,V266,V267,V274,V277,V220,V221,V234,V238,V250,V271,V294,V284,V285,V286,V291,V297,V303,V305,V307,V309,V310,V320,V281,V283,V289,V296,V301,V314,V332,V325,V335,V338,TransactionDay,TransactionDT2,D1n,D2n,D3n,D4n,D5n,D6n,D7n,D8n,D10n,D11n,D12n,D13n,D14n,D15n,uid,DT_M,TransactionDTday,client_activity_duration,uid_TransactionAmt_ct,uid_D4n_ct,uid_D10n_ct,uid_D15n_ct,D1_uid_mean,D1_uid_std,D2_uid_mean,D2_uid_std,D3_uid_mean,D3_uid_std,D4_uid_mean,D4_uid_std,D5_uid_mean,D5_uid_std,D6_uid_mean,D6_uid_std,D7_uid_mean,D7_uid_std,D8_uid_mean,D8_uid_std,D10_uid_mean,D10_uid_std,D11_uid_mean,D11_uid_std,D12_uid_mean,D12_uid_std,D13_uid_mean,D13_uid_std,D14_uid_mean,D14_uid_std,D15_uid_mean,D15_uid_std,D1n_uid_mean,D1n_uid_std,D2n_uid_mean,D2n_uid_std,D3n_uid_mean,D3n_uid_std,D4n_uid_mean,D4n_uid_std,D5n_uid_mean,D5n_uid_std,D6n_uid_mean,D6n_uid_std,D7n_uid_mean,D7n_uid_std,D8n_uid_mean,D8n_uid_std,D10n_uid_mean,D10n_uid_std,D11n_uid_mean,D11n_uid_std,D12n_uid_mean,D12n_uid_std,D13n_uid_mean,D13n_uid_std,D14n_uid_mean,D14n_uid_std,D15n_uid_mean,D15n_uid_std,uid_C1_ct,uid_C2_ct,uid_C3_ct,uid_C4_ct,uid_C5_ct,uid_C6_ct,uid_C7_ct,uid_C8_ct,uid_C9_ct,uid_C10_ct,C1_uid_mean,C1_uid_std,C2_uid_mean,C2_uid_std,C3_uid_mean,C3_uid_std,C4_uid_mean,C4_uid_std,C5_uid_mean,C5_uid_std,C6_uid_mean,C6_uid_std,C7_uid_mean,C7_uid_std,C8_uid_mean,C8_uid_std,C9_uid_mean,C9_uid_std,C10_uid_mean,C10_uid_std,log_dist1,log_dist2,dist1_uid_mean,dist1_uid_std,dist2_uid_mean,dist2_uid_std,log_TransactionAmt,TransactionAmt_FE,P_emaildomain_R_emaildomain,P_emaildomain_FE,R_emaildomain_FE
0,2987000,0,1970-01-01 00:00:00.000086400,68.5,W,13926,,150.0,discover,142.0,credit,315.0,87.0,19.0,,,,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0,14.0,,13.0,,,,,,,13.0,13.0,,,,0.0,1.0,1.0,1.0,2.0,0.0,1.0,,,,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,,,,,,,,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,117.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,117.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,1,2017-12-01,-13.0,,-12.0,,,,,,-12.0,-12.0,,,,1.0,13926_315.0_-13.0,,1,0,2.0,1.0,2.0,2.0,216.5,286.378235,419.0,-1.0,20.0,9.899495,398.0,-1.0,27.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,215.5,286.378235,108.0,134.350296,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,204.5,289.206665,-109.5,136.471603,-206.0,-1.0,87.0,140.007141,-185.0,-1.0,186.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-108.5,136.471603,-1.0,15.556349,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-97.5,139.300034,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,3.5,3.535534,3.5,3.535534,0.0,0.0,0.0,0.0,1.5,2.12132,2.5,2.12132,0.0,0.0,0.0,0.0,3.5,3.535534,0.0,0.0,2.995732,,10.0,12.727922,-1.0,-1.0,4.241327,0.001198,nan_nan,,


In [257]:
drop_cols = ['TransactionID', 'P_emaildomain', 'R_emaildomain', 'card1', 'card2', 'card3', 'card4', 'card5', 'card6', 'TransactionDT', 'TransactionDT2', 'DT_M', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'addr1', 'addr2', 'dist1', 'dist2']
train_ = train_trans.drop(columns=drop_cols)
test_ = test_trans.drop(columns=drop_cols)

In [208]:
train_trans['uid']

0          13926_315.0_-13.0
1             2755_325.0_1.0
2             4663_330.0_1.0
3         18132_476.0_-111.0
4             4497_420.0_1.0
                 ...        
590535      6550_272.0_153.0
590536     10444_204.0_182.0
590537     12037_231.0_182.0
590538      7826_387.0_160.0
590539     15066_299.0_182.0
Name: uid, Length: 590540, dtype: object

In [258]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

label_cols = ['ProductCD']

encode_LE(label_cols, train_, test_)

ProductCD , 

In [259]:
def encode_LE2(col, df1, df2, verbose=True):
    df_combined = pd.concat([df1[col], df2[col]], axis=0)
    df_combined, _ = df_combined.factorize(sort=True)
    df1[col] = df_combined[:len(df1)]
    df2[col] = df_combined[len(df1):]
    if verbose:
        print(col, ', ', end='')

In [260]:
encode_LE2('P_emaildomain_R_emaildomain', train_, test_)

P_emaildomain_R_emaildomain , 

In [261]:
train_['P_emaildomain_R_emaildomain']

0         573
1         316
2         632
3         826
4         316
         ... 
590535    573
590536    316
590537    316
590538     80
590539    316
Name: P_emaildomain_R_emaildomain, Length: 590540, dtype: int64

In [223]:
train_.head(1)

Unnamed: 0,isFraud,TransactionAmt,ProductCD,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,M1,M2,M3,M4,M5,M6,M7,M8,M9,V1,V3,V4,V6,V8,V11,V13,V14,V17,V20,V23,V26,V27,V30,V36,V37,V40,V41,V44,V47,V48,V54,V56,V59,V62,V65,V67,V68,V70,V76,V78,V80,V82,V86,V88,V89,V91,V96,V98,V99,V104,V107,V108,V111,V115,V117,V120,V121,V123,V124,V127,V129,V130,V136,V138,V139,V142,V147,V156,V162,V165,V160,V166,V178,V176,V173,V182,V187,V203,V205,V207,V215,V169,V171,V175,V180,V185,V188,V198,V210,V209,V218,V223,V224,V226,V228,V229,V235,V240,V258,V257,V253,V252,V260,V261,V264,V266,V267,V274,V277,V220,V221,V234,V238,V250,V271,V294,V284,V285,V286,V291,V297,V303,V305,V307,V309,V310,V320,V281,V283,V289,V296,V301,V314,V332,V325,V335,V338,TransactionDay,log_dist1,log_dist2,P_emaildomain_R_emaildomain,P_emaildomain_FE,R_emaildomain_FE,log_TransactionAmt,TransactionAmt_FE,D1n,D2n,D3n,D4n,D5n,D6n,D7n,D8n,D10n,D11n,D12n,D13n,D14n,D15n,uid,TransactionDTday,client_activity_duration,C1_TransactionAmt_mean,C1_TransactionAmt_sum,C2_TransactionAmt_mean,C2_TransactionAmt_sum,C3_TransactionAmt_mean,C3_TransactionAmt_sum,C4_TransactionAmt_mean,C4_TransactionAmt_sum,C5_TransactionAmt_mean,C5_TransactionAmt_sum,C6_TransactionAmt_mean,C6_TransactionAmt_sum,C7_TransactionAmt_mean,C7_TransactionAmt_sum,C8_TransactionAmt_mean,C8_TransactionAmt_sum,C9_TransactionAmt_mean,C9_TransactionAmt_sum,C10_TransactionAmt_mean,C10_TransactionAmt_sum,C11_TransactionAmt_mean,C11_TransactionAmt_sum,C12_TransactionAmt_mean,C12_TransactionAmt_sum,C13_TransactionAmt_mean,C13_TransactionAmt_sum,C14_TransactionAmt_mean,C14_TransactionAmt_sum,C1_TransactionAmt_mean_uid,C2_TransactionAmt_mean_uid,C3_TransactionAmt_mean_uid,C4_TransactionAmt_mean_uid,C5_TransactionAmt_mean_uid,C6_TransactionAmt_mean_uid,C7_TransactionAmt_mean_uid,C8_TransactionAmt_mean_uid,C9_TransactionAmt_mean_uid,C10_TransactionAmt_mean_uid,C11_TransactionAmt_mean_uid,C12_TransactionAmt_mean_uid,C13_TransactionAmt_mean_uid,C14_TransactionAmt_mean_uid,C1_TransactionAmt_sum_uid,C2_TransactionAmt_sum_uid,C3_TransactionAmt_sum_uid,C4_TransactionAmt_sum_uid,C5_TransactionAmt_sum_uid,C6_TransactionAmt_sum_uid,C7_TransactionAmt_sum_uid,C8_TransactionAmt_sum_uid,C9_TransactionAmt_sum_uid,C10_TransactionAmt_sum_uid,C11_TransactionAmt_sum_uid,C12_TransactionAmt_sum_uid,C13_TransactionAmt_sum_uid,C14_TransactionAmt_sum_uid
0,0,68.5,4,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0,1.0,,,,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,,,,,,,,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,117.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,117.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,1,2.995732,,573,,,4.241327,0.001198,-13.0,,-12.0,,,,,,-12.0,-12.0,,,,1.0,13926_315.0_-13.0,1,0,150.040883,47531600.0,154.905039,48990420.0,135.154332,79485750.0,150.920681,68198490.0,117.311315,43690840.0,147.91546,50520820.0,146.897158,76848070.0,151.679092,67901720.0,172.718073,39541730.0,150.40526,68200060.0,120.33687,10791450.0,148.241751,72519720.0,150.907173,30081380.0,142.848178,45738420.0,150.040883,154.905039,135.154332,150.920681,117.311315,147.91546,146.897158,151.679092,172.718073,150.40526,120.33687,148.241751,150.907173,142.848178,47531600.0,48990420.0,79485750.0,68198490.0,43690840.0,50520820.0,76848070.0,67901720.0,39541730.0,68200060.0,10791450.0,72519720.0,30081380.0,45738420.0


In [267]:
# 각 D 컬럼별로 'TransactionAmt'의 평균과 총합 계산 함수
def add_transaction_amt_features2(df):
    for i in range(1, 16):
        if i != 9:
            col = f'D{i}n'
            df[f'{col}_TransactionAmt_mean'] = df.groupby(col)['TransactionAmt'].transform('mean')
            df[f'{col}_TransactionAmt_sum'] = df.groupby(col)['TransactionAmt'].transform('sum')
    return df

In [268]:
train_ = add_transaction_amt_features2(train_)
test_ = add_transaction_amt_features2(test_)

In [269]:
# 각 C 컬럼별로 'TransactionAmt'의 평균과 총합 계산 함수
def add_transaction_amt_features(df):
    for col in [f'C{i}' for i in range(1, 15)]:
        df[f'{col}_TransactionAmt_mean'] = df.groupby(col)['TransactionAmt'].transform('mean')
        df[f'{col}_TransactionAmt_sum'] = df.groupby(col)['TransactionAmt'].transform('sum')
    return df

# train_와 test_에 동일한 과정 적용
train_ = add_transaction_amt_features(train_)
test_ = add_transaction_amt_features(test_)

# 중첩된 그룹화 예시: 'uid'로 그룹화하여 새로운 통계 계산 함수
def add_grouped_stats(df):
    grouped_stats = df.groupby('uid').agg(
        {f'{col}_TransactionAmt_mean': 'mean' for col in [f'C{i}' for i in range(1, 15)]} |
        {f'{col}_TransactionAmt_sum': 'sum' for col in [f'C{i}' for i in range(1, 15)]}
    ).reset_index()
    return grouped_stats

# train_와 test_에 동일한 과정 적용
train_grouped_stats = add_grouped_stats(train_)
test_grouped_stats = add_grouped_stats(test_)

# train_와 test_에 다시 병합
train_ = train_.merge(train_grouped_stats, on='uid', how='left', suffixes=('', '_uid'))
test_ = test_.merge(test_grouped_stats, on='uid', how='left', suffixes=('', '_uid'))

In [270]:
train_.drop(columns = 'uid', inplace = True)
test_.drop(columns = 'uid', inplace = True)

In [271]:
# NaN 값을 -999로 채우기 (모든 열에 대해 동일하게 처리)
train_.fillna(-999, inplace=True)
test_.fillna(-999, inplace=True)

In [273]:
from sklearn.preprocessing import StandardScaler

# 정규화할 컬럼 리스트
normalize_cols = [
    'C1_TransactionAmt_sum', 'C2_TransactionAmt_mean', 'C2_TransactionAmt_sum', 'C3_TransactionAmt_mean',
    'C3_TransactionAmt_sum', 'C4_TransactionAmt_mean', 'C4_TransactionAmt_sum', 'C5_TransactionAmt_mean',
    'C5_TransactionAmt_sum', 'C6_TransactionAmt_mean', 'C6_TransactionAmt_sum', 'C7_TransactionAmt_mean',
    'C7_TransactionAmt_sum', 'C8_TransactionAmt_mean', 'C8_TransactionAmt_sum', 'C9_TransactionAmt_mean',
    'C9_TransactionAmt_sum', 'C10_TransactionAmt_mean', 'C10_TransactionAmt_sum', 'C11_TransactionAmt_mean',
    'C11_TransactionAmt_sum', 'C12_TransactionAmt_mean', 'C12_TransactionAmt_sum', 'C13_TransactionAmt_mean',
    'C13_TransactionAmt_sum', 'C14_TransactionAmt_mean', 'C14_TransactionAmt_sum', 'C1_TransactionAmt_mean_uid',
    'C2_TransactionAmt_mean_uid', 'C3_TransactionAmt_mean_uid', 'C4_TransactionAmt_mean_uid', 'C5_TransactionAmt_mean_uid',
    'C6_TransactionAmt_mean_uid', 'C7_TransactionAmt_mean_uid', 'C8_TransactionAmt_mean_uid', 'C9_TransactionAmt_mean_uid',
    'C10_TransactionAmt_mean_uid', 'C11_TransactionAmt_mean_uid', 'C12_TransactionAmt_mean_uid', 'C13_TransactionAmt_mean_uid',
    'C14_TransactionAmt_mean_uid', 'C1_TransactionAmt_sum_uid', 'C2_TransactionAmt_sum_uid', 'C3_TransactionAmt_sum_uid',
    'C4_TransactionAmt_sum_uid', 'C5_TransactionAmt_sum_uid', 'C6_TransactionAmt_sum_uid', 'C7_TransactionAmt_sum_uid',
    'C8_TransactionAmt_sum_uid', 'C9_TransactionAmt_sum_uid', 'C10_TransactionAmt_sum_uid', 'C11_TransactionAmt_sum_uid',
    'C12_TransactionAmt_sum_uid', 'C13_TransactionAmt_sum_uid', 'C14_TransactionAmt_sum_uid'
]

# 스케일링 수행
scaler = StandardScaler()

train_[normalize_cols] = scaler.fit_transform(train_[normalize_cols])
test_[normalize_cols] = scaler.transform(test_[normalize_cols])

In [276]:
X = train_.drop(columns = ['isFraud','C1_TransactionAmt_mean_uid'])
y = train_.isFraud

In [277]:
from sklearn.model_selection import train_test_split

# train, valid split
X_tr, X_v, y_tr, y_v = train_test_split(X, y, test_size=0.25,
random_state=2024,
stratify=y)


import lightgbm as lgb

# 데이터 셋 생성
train_data = lgb.Dataset(X_tr, label=y_tr)
valid_data = lgb.Dataset(X_v, label=y_v)

params = {
'objective': 'binary',
'metric': 'auc'
}

# 모델 학습
callbacks = [lgb.early_stopping(stopping_rounds=100),
lgb.log_evaluation(period=100)]
clf = lgb.train(params, train_data,
num_boost_round = 10000,
valid_sets = [train_data, valid_data],
callbacks=callbacks)

# test 제출
preds = clf.predict(test_)
result = pd.read_csv("/content/drive/MyDrive/프로젝트/sample_submission.csv")
result ['isFraud'] = preds
result .to_csv('result.csv', index=False)

LightGBMError: Feature (C2_TransactionAmt_mean_uid) appears more than one time.

In [None]:
# 중요도 추출
feature_importances = clf.feature_importance(importance_type='split')  # 'split' or 'gain'
feature_names = clf.feature_name()

# 중요도 데이터프레임 생성
importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': feature_importances
})

# 중요도 기준으로 정렬
importance_df = importance_df.sort_values(by='importance', ascending=False).reset_index(drop=True)

# 상위 N개의 특성 선택
top_n = 20
top_features = importance_df.head(top_n)

# 중요도 시각화
plt.figure(figsize=(12, 8))
plt.barh(top_features['feature'], top_features['importance'])
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title(f'Top {top_n} Feature Importances')
plt.gca().invert_yaxis()
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.show()

In [162]:
# 중요도가 낮은 피처 제거
# 여기서 10을 임계값으로 설정합니다. 임계값은 원하는대로 조정 가능합니다.
threshold = 10
low_importance_features = importance_df[importance_df['importance'] < threshold]['feature'].tolist()

# 중요도가 낮은 피처 제거
X = X.drop(columns=low_importance_features)
test_ = test_.drop(columns=low_importance_features)

# 새로운 데이터셋으로 모델 학습 및 평가
X_tr, X_v, y_tr, y_v = train_test_split(X, y, test_size=0.25, random_state=2024, stratify=y)
train_data = lgb.Dataset(X_tr, label=y_tr)
valid_data = lgb.Dataset(X_v, label=y_v)

clf = lgb.train(params, train_data, num_boost_round=10000, valid_sets=[train_data, valid_data], callbacks=callbacks)

# test 제출
preds = clf.predict(test_)
result = pd.read_csv("/content/drive/MyDrive/프로젝트/sample_submission.csv")
result['isFraud'] = preds
result.to_csv('result.csv', index=False)

Unnamed: 0,isFraud,TransactionAmt,ProductCD,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,M1,M2,M3,M4,M5,M6,M7,M8,M9,V1,V3,V4,V6,V8,V11,V13,V14,V17,V20,V23,V26,V27,V30,V36,V37,V40,V41,V44,V47,V48,V54,V56,V59,V62,V65,V67,V68,V70,V76,V78,V80,V82,V86,V88,V89,V91,V96,V98,V99,V104,V107,V108,V111,V115,V117,V120,V121,V123,V124,V127,V129,V130,V136,V138,V139,V142,V147,V156,V162,V165,V160,V166,V178,V176,V173,V182,V187,V203,V205,V207,V215,V169,V171,V175,V180,V185,V188,V198,V210,V209,V218,V223,V224,V226,V228,V229,V235,V240,V258,V257,V253,V252,V260,V261,V264,V266,V267,V274,V277,V220,V221,V234,V238,V250,V271,V294,V284,V285,V286,V291,V297,V303,V305,V307,V309,V310,V320,V281,V283,V289,V296,V301,V314,V332,V325,V335,V338,TransactionDay,D1n,D2n,D3n,D4n,D5n,D6n,D7n,D8n,D10n,D11n,D12n,D13n,D14n,D15n,P_emaildomain_R_emaildomain,P_emaildomain_FE,R_emaildomain_FE,TransactionDTday,client_activity_duration,log_dist1,log_dist2,log_TransactionAmt,TransactionAmt_FE,C1_TransactionAmt_mean,C1_TransactionAmt_sum,C2_TransactionAmt_mean,C2_TransactionAmt_sum,C3_TransactionAmt_mean,C3_TransactionAmt_sum,C4_TransactionAmt_mean,C4_TransactionAmt_sum,C5_TransactionAmt_mean,C5_TransactionAmt_sum,C6_TransactionAmt_mean,C6_TransactionAmt_sum,C7_TransactionAmt_mean,C7_TransactionAmt_sum,C8_TransactionAmt_mean,C8_TransactionAmt_sum,C9_TransactionAmt_mean,C9_TransactionAmt_sum,C10_TransactionAmt_mean,C10_TransactionAmt_sum,C11_TransactionAmt_mean,C11_TransactionAmt_sum,C12_TransactionAmt_mean,C12_TransactionAmt_sum,C13_TransactionAmt_mean,C13_TransactionAmt_sum,C14_TransactionAmt_mean,C14_TransactionAmt_sum,C1_TransactionAmt_mean_uid,C2_TransactionAmt_mean_uid,C3_TransactionAmt_mean_uid,C4_TransactionAmt_mean_uid,C5_TransactionAmt_mean_uid,C6_TransactionAmt_mean_uid,C7_TransactionAmt_mean_uid,C8_TransactionAmt_mean_uid,C9_TransactionAmt_mean_uid,C10_TransactionAmt_mean_uid,C11_TransactionAmt_mean_uid,C12_TransactionAmt_mean_uid,C13_TransactionAmt_mean_uid,C14_TransactionAmt_mean_uid,C1_TransactionAmt_sum_uid,C2_TransactionAmt_sum_uid,C3_TransactionAmt_sum_uid,C4_TransactionAmt_sum_uid,C5_TransactionAmt_sum_uid,C6_TransactionAmt_sum_uid,C7_TransactionAmt_sum_uid,C8_TransactionAmt_sum_uid,C9_TransactionAmt_sum_uid,C10_TransactionAmt_sum_uid,C11_TransactionAmt_sum_uid,C12_TransactionAmt_sum_uid,C13_TransactionAmt_sum_uid,C14_TransactionAmt_sum_uid
0,0,68.5,4,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,,,,,,,,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,117.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,117.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,1,-13.0,,-12.0,,,,,,-12.0,-12.0,,,,1.0,573,,,1,0,2.995732,,4.241327,0.001198,150.040883,0.911565,0.767217,0.915675,0.053204,0.064266,0.535447,0.552748,-0.457821,0.676504,0.670533,0.84698,0.358233,0.358905,0.544378,0.563549,1.197454,1.142667,0.52083,0.548531,-0.65603,-1.177947,0.444589,0.454782,0.527804,1.375472,0.270325,0.907878,0.796869,0.869849,0.058782,0.558255,-0.512157,0.758161,0.359412,0.574598,1.341678,0.557014,-0.738201,0.475437,0.609681,0.319121,-0.412622,-0.260886,-0.177086,-0.382323,-0.144249,-0.278255,-0.151071,-0.382944,-0.159654,-0.392133,-0.375995,-0.137683,-0.234281,-0.38581
1,0,29.0,4,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,,,,,,,,,,,,,,,,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,1,1.0,,,1.0,,,,,1.0,,,,,1.0,316,0.466807,,1,0,,,3.401197,0.017956,150.040883,0.911565,0.767217,0.915675,0.053204,0.064266,0.535447,0.552748,-0.457821,0.676504,0.670533,0.84698,0.358233,0.358905,0.544378,0.563549,-0.922365,-0.244083,0.52083,0.548531,0.630591,0.713476,0.444589,0.454782,0.527804,1.375472,0.270325,0.907878,0.796869,0.869849,0.058782,0.558255,-0.512157,0.758161,0.359412,0.574598,-1.033457,0.557014,0.709577,0.475437,0.609681,0.319121,-0.412622,-0.260886,-0.177086,-0.382323,-0.144249,-0.278255,-0.151071,-0.382944,-0.174219,-0.392133,-0.326321,-0.137683,-0.234281,-0.38581
2,0,59.0,4,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,1,1.0,,,1.0,,,,,1.0,-314.0,,,,-314.0,632,0.010641,,1,168,5.66296,,4.094345,0.052812,150.040883,0.911565,0.767217,0.915675,0.053204,0.064266,0.535447,0.552748,-0.457821,0.676504,0.670533,0.84698,0.358233,0.358905,0.544378,0.563549,1.197454,1.142667,0.52083,0.548531,0.630591,0.713476,0.444589,0.454782,0.527804,1.375472,0.270325,0.907878,0.796869,0.367886,0.058782,0.558255,-0.512157,0.204784,0.359412,0.574598,1.341678,0.557014,0.709577,0.475437,-0.011495,0.319121,-0.13025,-0.167765,-0.135208,-0.219214,-0.10224,-0.198752,-0.10892,-0.220106,-0.073527,-0.229704,-0.143322,-0.094913,-0.177667,-0.135971
3,0,50.0,4,2.0,5.0,0.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,1.0,0.0,25.0,1.0,,,,,,,,,,,,,,,,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,48.0,0.0,10.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1758.0,0.0,354.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,38.0,0.0,10.0,0.0,1.0,0.0,0.0,1.0,1758.0,0.0,354.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,1,-111.0,-111.0,1.0,-93.0,1.0,,,,-83.0,,,,,-110.0,826,0.195788,,1,60,,,3.931826,0.028689,135.411956,-0.709971,-1.456387,-1.295486,0.053204,0.064266,0.535447,0.552748,-0.457821,0.676504,-1.527013,-1.341119,0.358233,0.358905,0.544378,0.563549,1.197454,1.142667,0.52083,0.548531,0.630591,0.713476,0.444589,0.454782,-0.903175,-0.898412,0.270325,0.907878,0.020423,-1.651209,0.058782,0.558255,-0.512157,-1.726568,0.359412,0.574598,1.341678,0.557014,0.709577,0.475437,-1.270734,0.319121,1.859916,-0.15158,0.981542,4.130367,1.018019,-0.160748,1.015112,4.122232,2.223203,4.10172,4.736663,1.045599,-0.302021,6.526395
4,0,50.0,1,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5155.0,169690.796875,2840.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1.0,,,,,,,,,,,,,,316,0.466807,,1,0,,,3.931826,0.028689,150.040883,0.911565,0.767217,0.915675,0.053204,0.064266,0.535447,0.552748,-0.457821,0.676504,0.670533,0.84698,0.358233,0.358905,-1.588766,-1.680611,-0.922365,-0.244083,-1.520408,-1.726913,0.630591,0.713476,0.444589,0.454782,0.527804,1.375472,0.270325,0.907878,0.796869,0.869849,0.058782,0.558255,-0.512157,0.758161,0.359412,-1.676964,-1.033457,-1.626036,0.709577,0.475437,0.609681,0.319121,-0.412622,-0.260886,-0.177086,-0.382323,-0.144249,-0.278255,-0.151071,-0.429969,-0.174219,-0.439103,-0.326321,-0.137683,-0.234281,-0.38581
