## 1. Load library

In [33]:
# basic library
import pandas as pd
import random
import os
import numpy as np
import warnings
import time
import datetime

# option library
warnings.filterwarnings('ignore')

# preprocessing library
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

# model library
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold,StratifiedKFold
import lightgbm as lgb

# metrics library(평가지표)
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import classification_report

# validation library(검증)
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_validate

# optuna library
import optuna
from optuna.samplers import TPESampler
from sklearn.model_selection import train_test_split

In [2]:
# Seed 고정
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(37) 

## 2. Load Data

In [3]:
train_df = pd.read_csv('../train.csv')
test_df = pd.read_csv('../test.csv')

* Columns
 - Y_class : 제품 품질 상태(Target)
   + 0 : 적정 기준 미달 (부적합)
   + 1 : 적합
   + 2 : 적정 기준 초과 (부적합)
 - Y_Quality : 제품 품질 관련 정량적 수치
 - TIMESTAMP : 제품이 공정에 들어간 시각
 - LINE : 제품이 들어간 공정 LINE 종류  ('T050304','T050307','T100304','T010306','T010305')
 - PRODUCT_CODE : 제품의 CODE 번호('A_31', 'T_31', 'O_31')
 - X_1 ~ X_2875 : 공정 과정에서 추출되어 비식별화된 변수

## 3. 데이터 확인

In [4]:
# train데이터 
train_df

Unnamed: 0,PRODUCT_ID,Y_Class,Y_Quality,TIMESTAMP,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,TRAIN_000,1,0.533433,2022-06-13 5:14,T050304,A_31,,,,,...,39.34,40.89,32.56,34.09,77.77,,,,,
1,TRAIN_001,2,0.541819,2022-06-13 5:22,T050307,A_31,,,,,...,38.89,42.82,43.92,35.34,72.55,,,,,
2,TRAIN_002,1,0.531267,2022-06-13 5:30,T050304,A_31,,,,,...,39.19,36.65,42.47,36.53,78.35,,,,,
3,TRAIN_003,2,0.537325,2022-06-13 5:39,T050307,A_31,,,,,...,37.74,39.17,52.17,30.58,71.78,,,,,
4,TRAIN_004,1,0.531590,2022-06-13 5:47,T050304,A_31,,,,,...,38.70,41.89,46.93,33.09,76.97,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,TRAIN_593,1,0.526546,2022-09-08 14:30,T100306,T_31,2.0,95.0,0.0,45.0,...,,,,,,,,,,
594,TRAIN_594,0,0.524022,2022-09-08 22:38,T050304,A_31,,,,,...,49.47,53.07,50.89,55.10,66.49,1.0,,,,
595,TRAIN_595,0,0.521289,2022-09-08 22:47,T050304,A_31,,,,,...,,,,,,1.0,,,,
596,TRAIN_596,1,0.531375,2022-09-08 14:38,T100304,O_31,40.0,94.0,0.0,45.0,...,,,,,,,,,,


In [5]:
#test 데이터
test_df

Unnamed: 0,PRODUCT_ID,TIMESTAMP,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,TEST_000,2022-09-09 2:01,T100306,T_31,2.0,94.0,0.0,45.0,10.0,0.0,...,,,,,,,,,,
1,TEST_001,2022-09-09 2:09,T100304,T_31,2.0,93.0,0.0,45.0,11.0,0.0,...,,,,,,,,,,
2,TEST_002,2022-09-09 8:42,T100304,T_31,2.0,95.0,0.0,45.0,11.0,0.0,...,,,,,,,,,,
3,TEST_003,2022-09-09 10:56,T010305,A_31,,,,,,,...,,,,,,,,,,
4,TEST_004,2022-09-09 11:04,T010306,A_31,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,TEST_305,2022-11-05 11:18,T100306,T_31,2.0,91.0,0.0,45.0,10.0,0.0,...,,,,,,,,,,
306,TEST_306,2022-11-05 16:39,T100304,T_31,2.0,96.0,0.0,45.0,11.0,0.0,...,,,,,,,,,,
307,TEST_307,2022-11-05 16:47,T100306,T_31,2.0,91.0,0.0,45.0,10.0,0.0,...,,,,,,,,,,
308,TEST_308,2022-11-05 20:53,T100306,T_31,2.0,95.0,0.0,45.0,10.0,0.0,...,,,,,,,,,,


> #### 데이터 공정라인과 제품번호 확인

In [6]:
qual_col = ['LINE', 'PRODUCT_CODE']

for i in qual_col:
    print(np.unique(test_x[i]))

NameError: name 'test_x' is not defined

## 4. Data PreProcessing

In [7]:
# test data와 train data 에서 분류할 colums LINE,PRODUCT_CODE,X데이터만 남김
test_x = test_df.drop(columns = ['PRODUCT_ID','TIMESTAMP'])
train_x = train_df.drop(columns = ['PRODUCT_ID','TIMESTAMP','Y_Class','Y_Quality'])

In [8]:
# Train데이터에서 분류된 Y_Class train_y
train_y = train_df['Y_Class']

In [9]:
test_x

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,T100306,T_31,2.0,94.0,0.0,45.0,10.0,0.0,51.0,10.0,...,,,,,,,,,,
1,T100304,T_31,2.0,93.0,0.0,45.0,11.0,0.0,45.0,10.0,...,,,,,,,,,,
2,T100304,T_31,2.0,95.0,0.0,45.0,11.0,0.0,45.0,10.0,...,,,,,,,,,,
3,T010305,A_31,,,,,,,,,...,,,,,,,,,,
4,T010306,A_31,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,T100306,T_31,2.0,91.0,0.0,45.0,10.0,0.0,51.0,10.0,...,,,,,,,,,,
306,T100304,T_31,2.0,96.0,0.0,45.0,11.0,0.0,45.0,10.0,...,,,,,,,,,,
307,T100306,T_31,2.0,91.0,0.0,45.0,10.0,0.0,50.0,10.0,...,,,,,,,,,,
308,T100306,T_31,2.0,95.0,0.0,45.0,10.0,0.0,51.0,10.0,...,,,,,,,,,,


In [10]:
train_x

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,T050304,A_31,,,,,,,,,...,39.34,40.89,32.56,34.09,77.77,,,,,
1,T050307,A_31,,,,,,,,,...,38.89,42.82,43.92,35.34,72.55,,,,,
2,T050304,A_31,,,,,,,,,...,39.19,36.65,42.47,36.53,78.35,,,,,
3,T050307,A_31,,,,,,,,,...,37.74,39.17,52.17,30.58,71.78,,,,,
4,T050304,A_31,,,,,,,,,...,38.70,41.89,46.93,33.09,76.97,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,T100306,T_31,2.0,95.0,0.0,45.0,10.0,0.0,50.0,10.0,...,,,,,,,,,,
594,T050304,A_31,,,,,,,,,...,49.47,53.07,50.89,55.10,66.49,1.0,,,,
595,T050304,A_31,,,,,,,,,...,,,,,,1.0,,,,
596,T100304,O_31,40.0,94.0,0.0,45.0,11.0,0.0,45.0,10.0,...,,,,,,,,,,


In [11]:
train_y

0      1
1      2
2      1
3      2
4      1
      ..
593    1
594    0
595    0
596    1
597    1
Name: Y_Class, Length: 598, dtype: int64

#### Label Encoding

In [12]:
qual_col = ['LINE','PRODUCT_CODE'] # 변환 할 피처

In [13]:
for i in qual_col:
    le = LabelEncoder()
    le = le.fit(train_x[i])
    train_x[i] = le.transform(train_x[i])
    
    for label in np.unique(test_x[i]): 
        if label not in le.classes_: 
            le.classes_ = np.append(le.classes_, label)
    test_x[i] = le.transform(test_x[i]) 

- qualitative to quantitative : 정성적 데이터(비정형데이터, 문자나 언어의 텍스트 파일)을 정량적 데이터로 수치화하는 전처리 작업
- le.fit() : 학습 시 고유 피처들을 학습하여 고유번호를 지정
- le.transform() : 각 피처값을 해당하는 학습된 고유값으로 변환
- .classes_ : 고유번호 0번부터 고유값에 대한 원본 데이터 값을 가지고 있음
- np.append(대상 어레이,추가할 값, 파라미터축)

In [14]:
train_x

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,2,0,,,,,,,,,...,39.34,40.89,32.56,34.09,77.77,,,,,
1,3,0,,,,,,,,,...,38.89,42.82,43.92,35.34,72.55,,,,,
2,2,0,,,,,,,,,...,39.19,36.65,42.47,36.53,78.35,,,,,
3,3,0,,,,,,,,,...,37.74,39.17,52.17,30.58,71.78,,,,,
4,2,0,,,,,,,,,...,38.70,41.89,46.93,33.09,76.97,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,5,2,2.0,95.0,0.0,45.0,10.0,0.0,50.0,10.0,...,,,,,,,,,,
594,2,0,,,,,,,,,...,49.47,53.07,50.89,55.10,66.49,1.0,,,,
595,2,0,,,,,,,,,...,,,,,,1.0,,,,
596,4,1,40.0,94.0,0.0,45.0,11.0,0.0,45.0,10.0,...,,,,,,,,,,


In [15]:
test_x

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,5,2,2.0,94.0,0.0,45.0,10.0,0.0,51.0,10.0,...,,,,,,,,,,
1,4,2,2.0,93.0,0.0,45.0,11.0,0.0,45.0,10.0,...,,,,,,,,,,
2,4,2,2.0,95.0,0.0,45.0,11.0,0.0,45.0,10.0,...,,,,,,,,,,
3,0,0,,,,,,,,,...,,,,,,,,,,
4,1,0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,5,2,2.0,91.0,0.0,45.0,10.0,0.0,51.0,10.0,...,,,,,,,,,,
306,4,2,2.0,96.0,0.0,45.0,11.0,0.0,45.0,10.0,...,,,,,,,,,,
307,5,2,2.0,91.0,0.0,45.0,10.0,0.0,50.0,10.0,...,,,,,,,,,,
308,5,2,2.0,95.0,0.0,45.0,10.0,0.0,51.0,10.0,...,,,,,,,,,,


#### 정규화

In [16]:
columns_x = train_x.columns.str.contains('X')
print(columns_x)

[False False  True ...  True  True  True]


In [17]:
x_col = train_x.columns[train_x.columns.str.contains('X')].tolist()
print(x_col[:10])

['X_1', 'X_2', 'X_3', 'X_4', 'X_5', 'X_6', 'X_7', 'X_8', 'X_9', 'X_10']


- .columns.str.contains("a") : a라는 문자가 들어있는 문자열 모두 찾음

In [18]:
train_x[x_col]

Unnamed: 0,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,X_9,X_10,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,,,,,,,,,,,...,39.34,40.89,32.56,34.09,77.77,,,,,
1,,,,,,,,,,,...,38.89,42.82,43.92,35.34,72.55,,,,,
2,,,,,,,,,,,...,39.19,36.65,42.47,36.53,78.35,,,,,
3,,,,,,,,,,,...,37.74,39.17,52.17,30.58,71.78,,,,,
4,,,,,,,,,,,...,38.70,41.89,46.93,33.09,76.97,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,2.0,95.0,0.0,45.0,10.0,0.0,50.0,10.0,52.0,2.0,...,,,,,,,,,,
594,,,,,,,,,,,...,49.47,53.07,50.89,55.10,66.49,1.0,,,,
595,,,,,,,,,,,...,,,,,,1.0,,,,
596,40.0,94.0,0.0,45.0,11.0,0.0,45.0,10.0,31.0,2.0,...,,,,,,,,,,


In [19]:
scaler = MinMaxScaler()
scaler.fit(train_x[x_col])
  
train_x[x_col] = scaler.transform(train_x[x_col])
test_x[x_col] = scaler.transform(test_x[x_col])
  
train_x.head()

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,2,0,,,,,,,,,...,0.256757,0.248647,0.0,0.122283,0.890487,,,,,
1,3,0,,,,,,,,,...,0.240754,0.300866,0.407899,0.164742,0.60177,,,,,
2,2,0,,,,,,,,,...,0.251422,0.133929,0.355835,0.205163,0.922566,,,,,
3,3,0,,,,,,,,,...,0.199858,0.20211,0.704129,0.003057,0.559181,,,,,
4,2,0,,,,,,,,,...,0.233997,0.275703,0.515978,0.088315,0.846239,,,,,


#### 결측값(NaN) 평균으로 채우기

In [20]:
train_x = train_x.fillna(train_x.mean())# mean 연산시 nan값은 분석 대상에서 제외
test_x = test_x.fillna(train_x.mean())
# train_x = train_x.fillna(0)# mean 연산시 nan값은 분석 대상에서 제외
# test_x = test_x.fillna(0)

In [21]:
train_x

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.256757,0.248647,0.000000,0.122283,0.890487,0.0,,,,
1,3,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.240754,0.300866,0.407899,0.164742,0.601770,0.0,,,,
2,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.251422,0.133929,0.355835,0.205163,0.922566,0.0,,,,
3,3,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.199858,0.202110,0.704129,0.003057,0.559181,0.0,,,,
4,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.233997,0.275703,0.515978,0.088315,0.846239,0.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,5,2,0.009804,0.533333,0.0,0.0,0.00000,0.0,0.294118,0.000000,...,0.664555,0.592741,0.612072,0.719083,0.275426,0.0,,,,
594,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.616999,0.578193,0.658169,0.835938,0.266593,0.0,,,,
595,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.664555,0.592741,0.612072,0.719083,0.275426,0.0,,,,
596,4,1,0.382353,0.466667,0.0,0.0,1.00000,0.0,0.000000,0.000000,...,0.664555,0.592741,0.612072,0.719083,0.275426,0.0,,,,


#### 전체 결측값(Null)인 X변수 Drop하기

In [22]:
# 결측값 확인
train_x.isnull()

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,True,True
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,True,True
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,True,True
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,True,True
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,True,True
594,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,True,True
595,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,True,True
596,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,True,True


In [23]:
train_x.isnull().sum()

LINE              0
PRODUCT_CODE      0
X_1               0
X_2               0
X_3               0
               ... 
X_2871            0
X_2872          598
X_2873          598
X_2874          598
X_2875          598
Length: 2877, dtype: int64

In [24]:
# 결측값있는 열 전체 삭제
train_x = train_x.dropna(axis =1)
test_x = test_x.dropna(axis=1)

In [25]:
train_x

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2862,X_2863,X_2864,X_2865,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871
0,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.918723,0.100000,0.113966,0.119565,0.256757,0.248647,0.000000,0.122283,0.890487,0.0
1,3,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.869376,0.100000,0.107939,0.119565,0.240754,0.300866,0.407899,0.164742,0.601770,0.0
2,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.577649,0.100000,0.103476,0.119565,0.251422,0.133929,0.355835,0.205163,0.922566,0.0
3,3,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.582003,0.110000,0.123562,0.119565,0.199858,0.202110,0.704129,0.003057,0.559181,0.0
4,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.825835,0.100000,0.103810,0.108696,0.233997,0.275703,0.515978,0.088315,0.846239,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,5,2,0.009804,0.533333,0.0,0.0,0.00000,0.0,0.294118,0.000000,...,0.545584,0.505582,0.520356,0.500698,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
594,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.624093,0.110000,0.130258,0.119565,0.616999,0.578193,0.658169,0.835938,0.266593,0.0
595,2,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.448476,0.100000,0.100229,0.108696,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
596,4,1,0.382353,0.466667,0.0,0.0,1.00000,0.0,0.000000,0.000000,...,0.545584,0.505582,0.520356,0.500698,0.664555,0.592741,0.612072,0.719083,0.275426,0.0


In [26]:
test_x

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2862,X_2863,X_2864,X_2865,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871
0,5,2,0.009804,0.466667,0.0,0.0,0.00000,0.0,0.352941,0.000000,...,0.545584,0.505582,0.520356,0.500698,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
1,4,2,0.009804,0.400000,0.0,0.0,1.00000,0.0,0.000000,0.000000,...,0.545584,0.505582,0.520356,0.500698,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
2,4,2,0.009804,0.533333,0.0,0.0,1.00000,0.0,0.000000,0.000000,...,0.545584,0.505582,0.520356,0.500698,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
3,0,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.843251,0.940000,0.930103,0.880435,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
4,1,0,0.013821,0.541547,0.0,0.0,0.39255,0.0,0.223664,0.048711,...,0.783745,0.920000,0.919557,0.880435,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,5,2,0.009804,0.266667,0.0,0.0,0.00000,0.0,0.352941,0.000000,...,0.545584,0.505582,0.520356,0.500698,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
306,4,2,0.009804,0.600000,0.0,0.0,1.00000,0.0,0.000000,0.000000,...,0.545584,0.505582,0.520356,0.500698,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
307,5,2,0.009804,0.266667,0.0,0.0,0.00000,0.0,0.294118,0.000000,...,0.545584,0.505582,0.520356,0.500698,0.664555,0.592741,0.612072,0.719083,0.275426,0.0
308,5,2,0.009804,0.533333,0.0,0.0,0.00000,0.0,0.352941,0.000000,...,0.545584,0.505582,0.520356,0.500698,0.664555,0.592741,0.612072,0.719083,0.275426,0.0


## 5. Modeling

#### RandomForestClassifier

In [27]:
train_x.shape, train_y.shape, test_x.shape

((598, 2795), (598,), (310, 2795))

In [28]:
rf_train_xx,rf_valid_xx,rf_train_yy,rf_valid_yy = train_test_split(train_x,train_y,test_size = 0.2, shuffle = True, random_state=42)

In [90]:
params = {'max_depth': 5, 'min_samples_leaf': 7, 'min_samples_split': 6, 'n_estimators': 2000}
rf_cls = RandomForestClassifier(max_depth = 5,n_estimators=500, min_samples_leaf=5,oob_score = True, min_samples_split=7, max_features=2795).fit(rf_train_xx,rf_train_yy)

In [86]:
rf_cls.predict(rf_valid_xx)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
       1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2,
       1, 1, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 1], dtype=int64)

In [91]:
print(rf_cls.oob_score_)

0.7531380753138075


In [88]:
print(rf_cls.score(rf_valid_xx,rf_valid_yy))

0.75


In [106]:
rf_cls = RandomForestClassifier(max_depth = 4,n_estimators=500, min_samples_leaf=5,oob_score = True, min_samples_split=7, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 0 1
 1 1 1 0 1 0 1 1 1]
0.7531380753138075 0.7166666666666667


In [107]:
rf_cls = RandomForestClassifier(max_depth = 5,n_estimators=500, min_samples_leaf=4,oob_score = True, min_samples_split=7, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 0 1
 1 1 1 0 1 0 1 1 1]
0.7594142259414226 0.725


In [108]:
rf_cls = RandomForestClassifier(max_depth = 5,n_estimators=2000, min_samples_leaf=5,oob_score = True, min_samples_split=7, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 0 1 0 1 1 1]
0.7573221757322176 0.7333333333333333


In [110]:
rf_cls = RandomForestClassifier(max_depth = 5,n_estimators= 2000, min_samples_leaf=4,oob_score = True, min_samples_split=7, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 0 1
 1 1 1 0 1 0 1 1 1]
0.7594142259414226 0.725


In [111]:
rf_cls = RandomForestClassifier(max_depth = 5,n_estimators=1000, min_samples_leaf=5,oob_score = True, min_samples_split=7, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 0 1 0 1 1 1]
0.7510460251046025 0.7333333333333333


In [113]:
rf_cls = RandomForestClassifier(n_estimators=1000, min_samples_leaf=5,oob_score = True, min_samples_split=7, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 2 1 1 1 2 1 1 2 1 1 1 1 1 1 2 1 0 1
 1 1 1 0 1 0 1 1 1]
0.7510460251046025 0.7166666666666667


In [112]:
rf_cls = RandomForestClassifier(max_depth = 15,n_estimators=1000, min_samples_leaf=5,oob_score = True, min_samples_split=9, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 2 1 1 1 2 1 1 2 1 1 1 1 1 1 2 1 0 1
 1 1 1 0 1 0 1 1 1]
0.7552301255230126 0.7166666666666667


In [105]:
rf_cls = RandomForestClassifier(max_depth = 5,n_estimators=500, min_samples_leaf=5,oob_score = True, min_samples_split=7, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 0 1
 1 1 1 0 1 0 1 1 1]
0.7594142259414226 0.7166666666666667


* * *

In [114]:
rf_cls = RandomForestClassifier(n_estimators=1000, min_samples_leaf=5,oob_score = True, min_samples_split=7, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 0 1 0 1 1 1]
0.7510460251046025 0.7333333333333333


In [29]:
rf_cls = RandomForestClassifier(n_estimators=500, min_samples_leaf=5,oob_score = True, min_samples_split=7, max_features=2795,random_state=42).fit(rf_train_xx,rf_train_yy)
print(rf_cls.predict(rf_valid_xx))
print(rf_cls.oob_score_,rf_cls.score(rf_valid_xx,rf_valid_yy))

[1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 2 1 1 1 0
 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 0 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 0 1
 1 2 1 0 1 0 1 1 1]
0.7531380753138075 0.7166666666666667


In [115]:
rf_pred=rf_cls.predict(test_x)

In [116]:
submit = pd.read_csv('../sample_submission.csv')
submit['Y_Class'] = rf_pred
submit.to_csv('./ffinal_rf_pred.csv',index=False)

#### RandomForestClassifier

In [78]:
from optuna.samplers import TPESampler# objective와 study를 정의하고, n_trails 파라미터를 조정하여 몇 회의 trial 수행할지 설정하는 방식
def objective(trial,X,y,cv,scoring):
    param = {
        'objective': 'multiclass',
        'verbose': -1, 
        'learning_rate': trial.suggest_loguniform("learning_rate", 0.1, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 500, 2000),
        'subsample' : 0.7,
        'colsample_bytree' : 0.7,
        'num_leaves' : 31,
        'max_depth' : -1
    }

    model_lgb = lgb.LGBMClassifier(**param)## *arg는 튜플형식가변인자, **kwargs 딕셔너리 키워드 
    scores = cross_validate(model_lgb, X,y,cv = cv, scoring = scoring)#eval_set 검증데이터 셋 지정, early_stoping_rounds : 검증데이터n개를 학습기를 통해 가중치 부여하며 오류개선 -> n_estimators횟수반복 ->loss나 성능지표가 100회 동안 향상되지 않으면 조기종료
    accuracy = scores["test_score"].mean()
    return accuracy

In [79]:
sampler = TPESampler(seed = 10)
#study_name='lgbm_papameter_opt',
study_lgb = optuna.create_study(direction='maximize',study_name="lgbm_parameter_opt") #>> study 정의 minimize: logloss를 최소화 시켜주는 방향으로 튜닝하고 싶을경우. accuracy, roc-auc 같은 경우는 최대화 시켜주는 방향으로 maximize로 설정
study_lgb.set_user_attr("verbose", True)
kf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 42)

# func = lambda trial: objective(trial, train_x, train_y, cv = kf , scoring = "accuracy")
study_lgb.optimize(lambda trial: objective(trial, train_x, train_y, cv = kf , scoring = "accuracy"), n_trials = 10) # optimize 최적함수 파라미터 찾는것. 즉 최적화

[I 2024-05-01 17:02:45,931] A new study created in memory with name: lgbm_parameter_opt
[I 2024-05-01 17:03:12,376] Trial 0 finished with value: 0.7624509803921569 and parameters: {'learning_rate': 0.17347246719186413, 'n_estimators': 1515}. Best is trial 0 with value: 0.7624509803921569.
[I 2024-05-01 17:03:43,654] Trial 1 finished with value: 0.7641316526610644 and parameters: {'learning_rate': 0.10216914281216279, 'n_estimators': 1264}. Best is trial 1 with value: 0.7641316526610644.
[I 2024-05-01 17:04:03,880] Trial 2 finished with value: 0.7557563025210084 and parameters: {'learning_rate': 0.23977046437086363, 'n_estimators': 1102}. Best is trial 1 with value: 0.7641316526610644.
[I 2024-05-01 17:04:29,838] Trial 3 finished with value: 0.7607422969187676 and parameters: {'learning_rate': 0.26381140948762577, 'n_estimators': 1558}. Best is trial 1 with value: 0.7641316526610644.
[I 2024-05-01 17:04:52,259] Trial 4 finished with value: 0.7657422969187675 and parameters: {'learning_r

In [80]:
print(f"accuracy_score : {(study_lgb.best_value)*100}%")
print(f"best Trial:score{study_lgb.best_trial.value}")
for key, value in study_lgb.best_trial.params.items():
    print(f"\t{key} : {value}")

accuracy_score : 76.9075630252101%
best Trial:score0.7690756302521009
	learning_rate : 0.17098992291874773
	n_estimators : 1525


In [82]:
sampler = TPESampler(seed = 10)
#study_name='lgbm_papameter_opt',
study_lgb = optuna.create_study(direction='maximize',study_name="lgbm_parameter_opt") #>> study 정의 minimize: logloss를 최소화 시켜주는 방향으로 튜닝하고 싶을경우. accuracy, roc-auc 같은 경우는 최대화 시켜주는 방향으로 maximize로 설정
study_lgb.set_user_attr("verbose", True)
kf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 42)

# func = lambda trial: objective(trial, train_x, train_y, cv = kf , scoring = "accuracy")
study_lgb.optimize(lambda trial: objective(trial, train_x, train_y, cv = kf , scoring = "accuracy"), n_trials = 10) # optimize 최적함수 파라미터 찾는것. 즉 최적화

[I 2024-05-01 17:17:03,389] A new study created in memory with name: lgbm_parameter_opt
[I 2024-05-01 17:17:32,017] Trial 0 finished with value: 0.7691176470588236 and parameters: {'learning_rate': 0.17385153349534938, 'n_estimators': 1860}. Best is trial 0 with value: 0.7691176470588236.
[I 2024-05-01 17:17:59,598] Trial 1 finished with value: 0.7741036414565826 and parameters: {'learning_rate': 0.13276227318892456, 'n_estimators': 1429}. Best is trial 1 with value: 0.7741036414565826.
[I 2024-05-01 17:18:29,857] Trial 2 finished with value: 0.7640896358543418 and parameters: {'learning_rate': 0.1306125282207722, 'n_estimators': 1668}. Best is trial 1 with value: 0.7741036414565826.
[I 2024-05-01 17:18:54,412] Trial 3 finished with value: 0.7691036414565826 and parameters: {'learning_rate': 0.27479486139487713, 'n_estimators': 1402}. Best is trial 1 with value: 0.7741036414565826.
[I 2024-05-01 17:19:17,907] Trial 4 finished with value: 0.767436974789916 and parameters: {'learning_rat

In [83]:
print(f"accuracy_score : {(study_lgb.best_value)*100}%")
print(f"best Trial:score{study_lgb.best_trial.value}")
for key, value in study_lgb.best_trial.params.items():
    print(f"\t{key} : {value}")

accuracy_score : 77.41036414565826%
best Trial:score0.7741036414565826
	learning_rate : 0.13276227318892456
	n_estimators : 1429


In [86]:
trial_params = study_lgb.best_trial.params

In [85]:
a={'subsample' : 0.7,
        'colsample_bytree' : 0.7,
        'num_leaves' : 31,
        'max_depth' : -1}

In [87]:
trial_params.update(a)
trial_params

{'learning_rate': 0.13276227318892456,
 'n_estimators': 1429,
 'subsample': 0.7,
 'colsample_bytree': 0.7,
 'num_leaves': 31,
 'max_depth': -1}

In [100]:
tt_params = {'learning_rate': 0.2005982042454876,
 'n_estimators': 1623,
 'subsample': 0.7,
 'colsample_bytree': 0.7,
 'num_leaves': 31,
 'max_depth': -1}

In [101]:
#oof 4번학습 결과 도출
folds = StratifiedKFold(n_splits=10, shuffle=True, random_state= 42)

test_preds = np.zeros((test_x.shape[0],3))
oof_preds = np.zeros((train_x.shape[0],3))# 학습된 모델의 valitation set 을 예측하여 결과 확률을 담을 array 생성// np.zeros : 0으로 채워진 [shape] 생성

final_lgb_model = lgb.LGBMClassifier(**tt_params, random_state=42)#optuna로 찾은 최적 하이퍼파라미터 적용
for fold_idx, (train_idx, valid_idx) in enumerate(folds.split(train_x,train_y)):#train_x 데이터를 count idx, 학습 검증데이터 분류 idx
    print('##### interation', fold_idx, ' 시작')
    train_xxx = train_x.iloc[train_idx, :]#분류된 train_idx 값에 맞는 데이터 저장
    train_yyy = train_y.iloc[train_idx]
    valid_xxx = train_x.iloc[valid_idx, :]
    valid_yyy = train_y.iloc[valid_idx]
    final_lgb_model.fit(train_xxx, train_yyy, eval_set=(valid_xxx, valid_yyy), verbose=200, early_stopping_rounds = 50)
#     oof_preds[valid_idx] = final_lgb_model.predict_proba(valid_xxx, num_iteration=final_lgb_model.best_iteration_)
#     test_preds += final_lgb_model.predict_proba(test_x, num_iteration = final_lgb_model.best_iteration_)/folds.n_splits
    oof_preds[valid_idx] = final_lgb_model.predict_proba(valid_xxx, num_iteration=final_lgb_model.best_iteration_)
    test_preds += final_lgb_model.predict_proba(test_x, num_iteration = final_lgb_model.best_iteration_)/folds.n_splits

##### interation 0  시작
##### interation 1  시작
##### interation 2  시작
##### interation 3  시작
##### interation 4  시작
##### interation 5  시작
##### interation 6  시작
##### interation 7  시작
##### interation 8  시작
##### interation 9  시작


In [102]:
oof_preds

array([[0.09701797, 0.84967826, 0.05330376],
       [0.00461673, 0.04255622, 0.95282705],
       [0.08535724, 0.8145318 , 0.10011096],
       ...,
       [0.78754339, 0.13360472, 0.0788519 ],
       [0.11389229, 0.70869101, 0.1774167 ],
       [0.08904625, 0.86702169, 0.04393206]])

In [103]:
test_preds

array([[0.07410409, 0.84189811, 0.0839978 ],
       [0.10764823, 0.68440007, 0.20795171],
       [0.09607336, 0.76153637, 0.14239027],
       [0.14315532, 0.78016473, 0.07667995],
       [0.11350663, 0.63802402, 0.24846935],
       [0.10311272, 0.57854884, 0.31833844],
       [0.11585087, 0.76173625, 0.12241288],
       [0.438658  , 0.4734785 , 0.0878635 ],
       [0.4033046 , 0.49504716, 0.10164824],
       [0.08381622, 0.55501149, 0.36117228],
       [0.1075339 , 0.80946265, 0.08300345],
       [0.06775311, 0.79588661, 0.13636028],
       [0.06424132, 0.48435858, 0.4514001 ],
       [0.55267436, 0.17618256, 0.27114308],
       [0.75870685, 0.18323774, 0.05805541],
       [0.14203113, 0.76257552, 0.09539334],
       [0.09359328, 0.74302381, 0.16338291],
       [0.06358563, 0.81764541, 0.11876896],
       [0.04983112, 0.84243304, 0.10773584],
       [0.08168278, 0.67889066, 0.23942656],
       [0.07559537, 0.84934139, 0.07506324],
       [0.06320408, 0.73293803, 0.20385789],
       [0.

In [104]:
lgbm_preds= []
for i in range(len(test_preds)):
     lgbm_preds.append(np.argmax(test_preds[i]))
lgbm_pred = np.array(lgbm_preds)
print(lgbm_pred)

[1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
 0 1 0 1 0 0 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 0 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 1
 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 0 1 1 1 1 1 1 1 0 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [106]:
submit = pd.read_csv('../sample_submission.csv')
submit['Y_Class'] = lgbm_pred
submit.to_csv('./ffinal_lgbm_pred.csv',index=False)

In [97]:
lgb_m=lgb.LGBMClassifier(random_state=42)
lgb_m.fit(train_x,train_y)
t_l=lgb_m.predict(test_x)
t_l


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,