# 제품 이상여부 판별 프로젝트

## 1. 데이터 불러오기


### 필수 라이브러리


In [1]:
import os
from pprint import pprint

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

### 데이터 읽어오기


In [2]:
ROOT_DIR = "data"
RANDOM_STATE = 110

# Load data
train = pd.read_csv(os.path.join(ROOT_DIR, "train_adv.csv"))
test = pd.read_csv('./data/test_adv.csv')

### 필요하거나 묶일 수 있는 변수 가져오기

In [3]:
train

Unnamed: 0,Wip Line_Dam,Process Desc._Dam,Equipment_Dam,Model.Suffix_Dam,Workorder_Dam,Insp. Seq No._Dam,Insp Judge Code_Dam,CURE END POSITION X Collect Result_Dam,CURE END POSITION Z Collect Result_Dam,CURE END POSITION Θ Collect Result_Dam,...,Head Clean Position Y Collect Result_Fill2,Head Clean Position Z Collect Result_Fill2,Head Purge Position X Collect Result_Fill2,Head Purge Position Y Collect Result_Fill2,Head Purge Position Z Collect Result_Fill2,Machine Tact time Collect Result_Fill2,PalletID Collect Result_Fill2,Production Qty Collect Result_Fill2,Receip No Collect Result_Fill2,target
0,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XA938-1,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,114.612,19.9,7.0,127,1,Normal
1,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,3KPM0016-2,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,85.000,19.6,7.0,185,1,Normal
2,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,4E1X9167-1,1,OK,1000.0,12.5,90,...,50,91.8,270.0,50,114.612,19.8,10.0,73,1,Normal
3,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,3K1X0057-1,1,OK,1000.0,12.5,90,...,50,91.8,270.0,50,85.000,19.9,12.0,268,1,Normal
4,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3HPM0007-1,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,85.000,19.7,8.0,121,1,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40501,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3J1XF434-2,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,85.000,19.2,1.0,318,1,Normal
40502,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,4E1XC796-1,1,OK,1000.0,12.5,90,...,50,91.8,270.0,50,114.612,20.5,14.0,197,1,Normal
40503,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,4C1XD438-1,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,85.000,19.7,1.0,27,1,Normal
40504,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,3I1XA258-1,1,OK,1000.0,12.5,90,...,50,91.8,270.0,50,85.000,20.1,13.0,117,1,Normal


In [4]:
# 이 셀은 한번만 클릭
train['Equipment_Dam'] = train['Equipment_Dam'].str.slice(15, 16)
train['Equipment_Fill1'] = train['Equipment_Fill1'].str.slice(17, 18)
train['Equipment_Fill2'] = train['Equipment_Fill2'].str.slice(17, 18)

test['Equipment_Dam'] = test['Equipment_Dam'].str.slice(15, 16)
test['Equipment_Fill1'] = test['Equipment_Fill1'].str.slice(17, 18)
test['Equipment_Fill2'] = test['Equipment_Fill2'].str.slice(17, 18)

In [5]:
# 새로운 영역 지정
use_train = pd.DataFrame(train['target'], columns = ['target'])
use_test = pd.DataFrame(test['Set ID'], columns = ['Set ID'])

In [6]:
# Dam, Fill1, Fill2에서 지정된 값이 다를 경우 Abnormal 
def inconsistant(data, columnname, iwantthiscolumnsname, is_train = True):
    # 장비 번호가 다르면 불일치
    if is_train:
        cri = [
            train[columnname + '_Dam'] != train[columnname + '_Fill1'],
            train[columnname + '_Dam'] != train[columnname + '_Fill2'],
            train[columnname + '_Fill1'] != train[columnname + '_Fill1'],
            data[iwantthiscolumnsname] == 1
        ]
        
    else:
        cri = [
            test[columnname + '_Dam'] != test[columnname + '_Fill1'],
            test[columnname + '_Dam'] != test[columnname + '_Fill2'],
            test[columnname + '_Fill1'] != test[columnname + '_Fill1'],
            data[iwantthiscolumnsname] == 1
        ]
    con = [1, 1, 1, 1]

    data[iwantthiscolumnsname] = np.select(cri, con, default = 0)

In [7]:
# 불일치 변수
use_train['inconsistant'] = 0
use_test['inconsistant'] = 0

# 기준
columnname = ['Equipment', 'Receip No Collect Result', 'Production Qty Collect Result', 'PalletID Collect Result', ]

# 장착
for i in columnname:
    inconsistant(use_train, i, 'inconsistant', True)
    inconsistant(use_test, i, 'inconsistant', False)

#### 크다 1 같다 0 작다 -1
dam fill1 /dam fill2 /fill1 fill2
AbNormal
- X stage1: fill2 < dam < fill1 or dam < fill1 < fill2
- X stage2: 구분 못함
- X stage3: 구분 못함
- Y stage1: fill2 < dam < fill1 or fill2 < fill1 < dam
- Y stage2: fill2 < dam < fill1
- Y stage3: fill2 < fill1 < dam or fill2 < dam < fill1
- Z stage1: fill1 < fill2 < dam
- Z stage2: 구별불가
- Z stage3: fill1 < fill2 < dam

In [8]:
# 위치 좌표가 각 조건을 만족하면 abnormal
cri = [
    (train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] > train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2']) & (train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] > train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam']),
    (train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] < train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1']) & (train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2'] > train['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1']),
    (train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] > train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2']) & (train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'] > train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam']),
    (train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] > train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1']) & (train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'] > train['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2']),
    (train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] < train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1']) & (train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] > train['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2']),    
    (train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'] > train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1']) & (train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'] > train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2']),
    (train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'] > train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2']) & (train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'] > train['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam']),
    (train['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam'] > train['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2']) & (train['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2'] > train['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1']),    
    (train['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam'] > train['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2']) & (train['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2'] > train['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1']),
]
cri2 = [
    (test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] > test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2']) & (test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'] > test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam']),
    (test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'] < test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1']) & (test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2'] > test['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1']),
    (test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] > test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2']) & (test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'] > test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam']),
    (test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'] > test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1']) & (test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'] > test['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2']),
    (test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] < test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1']) & (test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'] > test['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2']),    
    (test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'] > test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1']) & (test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'] > test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2']),
    (test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'] > test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2']) & (test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'] > test['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam']),
    (test['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam'] > test['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2']) & (test['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2'] > test['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1']),    
    (test['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam'] > test['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2']) & (test['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2'] > test['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1']),

]
con = [
    1, 1, 1, 1, 1, 1, 1, 1, 1
]
use_train['place'] = np.select(cri, con, default = 0)
use_test['place'] = np.select(cri2, con, default = 0)

In [9]:
# 유클리드 거리 계산
def euclide_distance(data, first_dot, second_dot):
    
    # 값 반환
    iwantcalculateeuclidedistance = np.array([0]*len(data))
    
    # 제곱값 더해주기
    for i, j in zip(first_dot, second_dot):
        iwantcalculateeuclidedistance += (data[i] - data[j])**2
        
    # 반환
    return iwantcalculateeuclidedistance**0.5

In [10]:
# dam , fill1
first_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam'
]

second_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1'
]

# 유클리드 계산을 통한 위치 차이
use_train['distance_diff_dam_fill1_stage1'] = euclide_distance(train, first_dot, second_dot)
use_test['distance_diff_dam_fill1_stage1'] = euclide_distance(test, first_dot, second_dot)

In [11]:
# fill1, fill2
first_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam'
]

second_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2'
]

# 유클리드 계산을 통한 위치 차이
use_train['distance_diff_dam_fill2_stage1'] = euclide_distance(train, first_dot, second_dot)
use_test['distance_diff_dam_fill2_stage1'] = euclide_distance(test, first_dot, second_dot)

In [12]:
# dam fill2
first_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1'
]

second_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2'
]

# 유클리드 계산을 통한 위치 차이
use_train['distance_diff_fill1_fill2_stage1'] = euclide_distance(train, first_dot, second_dot)
use_test['distance_diff_fill1_fill2_stage1'] = euclide_distance(test, first_dot, second_dot)

In [13]:
pd.crosstab(use_train['distance_diff_dam_fill2_stage1'], use_train['target']).reset_index()[60:]

target,distance_diff_dam_fill2_stage1,AbNormal,Normal
60,292.160166,44,1196
61,292.212068,90,2187
62,292.504517,126,2272
63,292.901939,15,431
64,293.290061,71,721
65,293.318019,40,431
66,293.430452,8,391
67,293.443322,1,35
68,293.443837,11,396
69,293.80514,647,6624


In [14]:
# dam , fill1
first_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam'
]

second_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1'
]

# 유클리드 계산을 통한 위치 차이
use_train['distance_diff_dam_fill1_stage2'] = euclide_distance(train, first_dot, second_dot)
use_test['distance_diff_dam_fill1_stage2'] = euclide_distance(test, first_dot, second_dot)

In [15]:
# fill1, fill2
first_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam'
]

second_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2'
]

# 유클리드 계산을 통한 위치 차이
use_train['distance_diff_dam_fill2_stage2'] = euclide_distance(train, first_dot, second_dot)
use_test['distance_diff_dam_fill2_stage2'] = euclide_distance(test, first_dot, second_dot)

In [16]:
# dam fill2
first_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1'
]

second_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2'
]

# 유클리드 계산을 통한 위치 차이
use_train['distance_diff_fill1_fill2_stage2'] = euclide_distance(train, first_dot, second_dot)
use_test['distance_diff_fill1_fill2_stage2'] = euclide_distance(test, first_dot, second_dot)

In [17]:
pd.crosstab(use_train['distance_diff_fill1_fill2_stage2'], use_train['target']).reset_index()

target,distance_diff_fill1_fill2_stage2,AbNormal,Normal
0,1.793801,1,13
1,1.844647,1,5
2,2.04267,2,233
3,2.069396,106,1869
4,2.22991,5,121
5,2.686192,24,501
6,2.692582,333,7458
7,2.706012,5,177
8,2.71737,5,263
9,2.733249,21,474


In [18]:
# dam , fill1
first_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam'
]

second_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1'
]

# 유클리드 계산을 통한 위치 차이
use_train['distance_diff_dam_fill1_stage3'] = euclide_distance(train, first_dot, second_dot)
use_test['distance_diff_dam_fill1_stage3'] = euclide_distance(test, first_dot, second_dot)

In [19]:
# fill1, fill2
first_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam'
]

second_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2'
]

# 유클리드 계산을 통한 위치 차이
use_train['distance_diff_dam_fill2_stage3'] = euclide_distance(train, first_dot, second_dot)
use_test['distance_diff_dam_fill2_stage3'] = euclide_distance(test, first_dot, second_dot)

In [20]:
# dam fill2
first_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1'
]

second_dot = [
    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2'
]

# 유클리드 계산을 통한 위치 차이
use_train['distance_diff_fill1_fill2_stage3'] = euclide_distance(train, first_dot, second_dot)
use_test['distance_diff_fill1_fill2_stage3'] = euclide_distance(test, first_dot, second_dot)

In [21]:
pd.crosstab(use_train['distance_diff_dam_fill1_stage3'], use_train['target']).reset_index()[60:]

target,distance_diff_dam_fill1_stage3,AbNormal,Normal
60,399.308959,21,359
61,399.323782,31,701
62,399.33347,2,0
63,399.445707,53,1002
64,399.45348,0,28
65,399.596988,0,13
66,399.679683,19,470
67,399.739293,54,619
68,399.748749,21,393
69,399.75823,58,425


In [22]:
# Dam의 CURE END POSITION X을 기준으로 9가지 변수 묶기
use_train['dam_cure_position'] = np.where(train['CURE END POSITION X Collect Result_Dam'] == 1000 ,1 ,0)
use_test['dam_cure_position'] = np.where(test['CURE END POSITION X Collect Result_Dam'] == 1000 ,1 ,0)

In [25]:
use_train

Unnamed: 0,target,inconsistant,place,distance_diff_dam_fill1_stage1,distance_diff_dam_fill2_stage1,distance_diff_fill1_fill2_stage1,distance_diff_dam_fill1_stage2,distance_diff_dam_fill2_stage2,distance_diff_fill1_fill2_stage2,distance_diff_dam_fill1_stage3,distance_diff_dam_fill2_stage3,distance_diff_fill1_fill2_stage3,dam_cure_position
0,Normal,0,0,293.089406,290.017682,3.559831,55.778940,54.729298,2.069396,55.336155,53.983850,2.638636,0
1,Normal,0,0,295.391778,292.212068,3.839271,65.446944,63.810364,2.692582,65.584392,63.811382,3.054505,0
2,Normal,0,0,678.135530,155.016931,532.900861,68.686733,75.482309,41.734006,400.031995,154.742525,537.901522,1
3,Normal,0,0,678.310093,156.759633,532.701267,64.496996,74.377903,41.043026,400.073271,156.216460,537.002421,1
4,Normal,0,0,296.919939,293.805140,3.651027,67.159586,65.574538,2.692582,66.687030,65.203221,2.782086,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
40501,Normal,0,0,295.391778,292.212068,3.839271,65.446944,63.810364,2.692582,65.584392,63.811382,3.054505,0
40502,Normal,0,0,676.689546,150.936175,533.300770,75.986788,82.951666,41.110962,400.442695,149.383697,536.700178,1
40503,Normal,0,0,294.232079,290.988189,4.068249,61.610358,59.838168,2.733249,61.268966,59.126698,3.364617,0
40504,Normal,0,0,678.522441,157.717667,532.701427,66.672502,76.232949,41.027430,399.739293,157.809577,537.002579,1


In [28]:
use_train['distance'] = 0
use_test['distance'] = 0

for i in range(3, 12):
    use_train['distance'] = np.where(use_train[use_train.columns[i]] > 800, 1, use_train['distance'])
    use_test['distance'] = np.where(use_test[use_test.columns[i]] > 800, 1, use_test['distance'])

In [30]:
pd.crosstab([use_train['inconsistant'], use_train['place'], use_train['distance']], use_train['target'])

Unnamed: 0_level_0,Unnamed: 1_level_0,target,AbNormal,Normal
inconsistant,place,distance,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,0,2256,38156
1,0,0,60,0
1,1,1,34,0


In [None]:
# 저장
use_train.to_csv('./data/train_simple_variable.csv', index = False)
use_test.to_csv('./data/test_simple_variable.csv', index = False)