# 제품 이상여부 판별 프로젝트

## 1. 데이터 불러오기


### 필수 라이브러리


In [1]:
import os
from pprint import pprint

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

### 데이터 읽어오기


In [2]:
ROOT_DIR = "data"
RANDOM_STATE = 110

# Load data
train = pd.read_csv(os.path.join(ROOT_DIR, "train_adv.csv"))
test = pd.read_csv('./data/test_adv.csv')

### 필요하거나 묶일 수 있는 변수 가져오기

In [3]:
train

Unnamed: 0,Wip Line_Dam,Process Desc._Dam,Equipment_Dam,Model.Suffix_Dam,Workorder_Dam,Insp. Seq No._Dam,Insp Judge Code_Dam,CURE END POSITION X Collect Result_Dam,CURE END POSITION Z Collect Result_Dam,CURE END POSITION Θ Collect Result_Dam,...,Head Clean Position Y Collect Result_Fill2,Head Clean Position Z Collect Result_Fill2,Head Purge Position X Collect Result_Fill2,Head Purge Position Y Collect Result_Fill2,Head Purge Position Z Collect Result_Fill2,Machine Tact time Collect Result_Fill2,PalletID Collect Result_Fill2,Production Qty Collect Result_Fill2,Receip No Collect Result_Fill2,target
0,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,4F1XA938-1,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,114.612,19.9,7.0,127,1,Normal
1,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334505,3KPM0016-2,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,85.000,19.6,7.0,185,1,Normal
2,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,4E1X9167-1,1,OK,1000.0,12.5,90,...,50,91.8,270.0,50,114.612,19.8,10.0,73,1,Normal
3,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,3K1X0057-1,1,OK,1000.0,12.5,90,...,50,91.8,270.0,50,85.000,19.9,12.0,268,1,Normal
4,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3HPM0007-1,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,85.000,19.7,8.0,121,1,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40501,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,3J1XF434-2,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,85.000,19.2,1.0,318,1,Normal
40502,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,4E1XC796-1,1,OK,1000.0,12.5,90,...,50,91.8,270.0,50,114.612,20.5,14.0,197,1,Normal
40503,IVI-OB6,Dam Dispenser,Dam dispenser #1,AJX75334501,4C1XD438-1,1,OK,240.0,2.5,-90,...,50,91.8,270.0,50,85.000,19.7,1.0,27,1,Normal
40504,IVI-OB6,Dam Dispenser,Dam dispenser #2,AJX75334501,3I1XA258-1,1,OK,1000.0,12.5,90,...,50,91.8,270.0,50,85.000,20.1,13.0,117,1,Normal


In [4]:
# 이 셀은 한번만 클릭
train['Equipment_Dam'] = train['Equipment_Dam'].str.slice(15, 16)
train['Equipment_Fill1'] = train['Equipment_Fill1'].str.slice(17, 18)
train['Equipment_Fill2'] = train['Equipment_Fill2'].str.slice(17, 18)

test['Equipment_Dam'] = test['Equipment_Dam'].str.slice(15, 16)
test['Equipment_Fill1'] = test['Equipment_Fill1'].str.slice(17, 18)
test['Equipment_Fill2'] = test['Equipment_Fill2'].str.slice(17, 18)

In [5]:
# 새로운 영역 지정
use_train = pd.DataFrame(train['target'], columns = ['target'])
use_test = pd.DataFrame(test['Set ID'], columns = ['Set ID'])

In [6]:
# Dam, Fill1, Fill2에서 지정된 값이 다를 경우 Abnormal 
def inconsistant(data, columnname, iwantthiscolumnsname, is_train = True):
    # 장비 번호가 다르면 불일치
    if is_train:
        cri = [
            train[columnname + '_Dam'] != train[columnname + '_Fill1'],
            train[columnname + '_Dam'] != train[columnname + '_Fill2'],
            train[columnname + '_Fill1'] != train[columnname + '_Fill1'],
            data[iwantthiscolumnsname] == 1
        ]
        
    else:
        cri = [
            test[columnname + '_Dam'] != test[columnname + '_Fill1'],
            test[columnname + '_Dam'] != test[columnname + '_Fill2'],
            test[columnname + '_Fill1'] != test[columnname + '_Fill1'],
            data[iwantthiscolumnsname] == 1
        ]
    con = [1, 1, 1, 1]

    data[iwantthiscolumnsname] = np.select(cri, con, default = 0)

In [7]:
# 불일치 변수
use_train['inconsistant'] = 0
use_test['inconsistant'] = 0

# 기준
columnname = ['Equipment', 'Receip No Collect Result', 'Production Qty Collect Result', 'PalletID Collect Result', ]

# 장착
for i in columnname:
    inconsistant(use_train, i, 'inconsistant', True)
    inconsistant(use_test, i, 'inconsistant', False)

In [8]:
# Dam의 CURE END POSITION X을 기준으로 9가지 변수 묶기
use_train['dam_cure_position'] = np.where(train['CURE END POSITION X Collect Result_Dam'] == 1000 ,1 ,0)
use_test['dam_cure_position'] = np.where(test['CURE END POSITION X Collect Result_Dam'] == 1000 ,1 ,0)

In [9]:
# model suffix
use_train['model_suffix'] = train['Model.Suffix_Dam']
use_test['model_suffix'] = test['Model.Suffix_Dam']

In [10]:
# workorder
use_train['workorder'] = train['Workorder_Dam']
use_test['workorder'] = test['Workorder_Dam']

In [11]:
# cure speed 
use_train['cure_speed_dam'] = train['CURE SPEED Collect Result_Dam']
use_test['cure_speed_dam'] = test['CURE SPEED Collect Result_Dam']

In [12]:
# dam의 discharge파트
col = ['discharged_speed_dam', 'cid_time_dam', 'seq_time_dam', 'cluster_time_dam', 'cid_volume_dam', 'Seq_volume_dam', 'cluster_volume_dam']

use_train[col] = train[['DISCHARGED SPEED OF RESIN Collect Result_Dam',
'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam',
'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam', 
'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam', 
'Dispense Volume(Stage1) Collect Result_Dam',
'Dispense Volume(Stage2) Collect Result_Dam',
'Dispense Volume(Stage3) Collect Result_Dam']]

use_test[col] = test[['DISCHARGED SPEED OF RESIN Collect Result_Dam',
'DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam',
'DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam', 
'DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam', 
'Dispense Volume(Stage1) Collect Result_Dam',
'Dispense Volume(Stage2) Collect Result_Dam',
'Dispense Volume(Stage3) Collect Result_Dam']]

In [13]:
use_test

Unnamed: 0,Set ID,inconsistant,dam_cure_position,model_suffix,workorder,cure_speed_dam,discharged_speed_dam,cid_time_dam,seq_time_dam,cluster_time_dam,cid_volume_dam,Seq_volume_dam,cluster_volume_dam
0,0001be084fbc4aaa9d921f39e595961b,0,1,AJX75334501,3J1XF767-1,70,10,17.0,4.9,17.0,1.19,0.34,1.19
1,0005bbd180064abd99e63f9ed3e1ac80,0,1,AJX75334501,4B1XD472-2,70,16,14.2,8.3,14.2,0.99,0.58,0.99
2,000948934c4140d883d670adcb609584,0,0,AJX75334501,3H1XE355-1,70,10,9.7,4.9,9.7,0.67,0.34,0.67
3,000a6bfd02874c6296dc7b2e9c5678a7,0,1,AJX75334501,3L1XA128-1,70,10,21.3,10.6,21.3,1.49,0.74,1.49
4,0018e78ce91343678716e2ea27a51c95,0,0,AJX75334501,4A1XA639-1,70,16,13.2,7.5,13.2,0.92,0.52,0.92
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17356,ffea508b59934d689b540f95eb3fa730,0,1,AJX75334501,3K1XB597-1,70,10,21.3,10.6,21.3,1.49,0.74,1.49
17357,ffed8923c8a448a98afc641b770be153,0,1,AJX75334501,4A1XB974-1,70,16,13.2,7.6,13.2,0.92,0.53,0.92
17358,fff1e73734da40adbe805359b3efb462,0,0,AJX75334501,3L1XA998-1,70,16,13.2,6.6,13.2,1.45,0.72,1.45
17359,fff8e38bdd09470baf95f71e92075dec,0,0,AJX75334501,3F1XC376-1,70,10,9.7,3.9,9.7,0.67,0.27,0.67
