# Pandas 데모 - Explarotary Data Analysis

## 라이브러리 import 및 설정

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import lightgbm as lgb
from matplotlib import pyplot as plt
from matplotlib import rcParams
import numpy as np
from pathlib import Path
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import seaborn as sns
import warnings
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV

In [3]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import math
from sklearn.metrics import confusion_matrix

from sklearn.preprocessing import StandardScaler, RobustScaler # 데이터 표준화

# lightgbm 모델
from lightgbm import LGBMClassifier, plot_importance

# normalization
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

#feature Selection
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.feature_selection import f_classif
import itertools

In [4]:
rcParams['figure.figsize'] = (18, 9)
plt.style.use('fivethirtyeight')
pd.set_option('max_columns', 100)
pd.set_option("display.precision", 4)
warnings.simplefilter('ignore')

## 데이터 다운로드

데이터는 [Dacon 단국대 소/중 데이터 분석 AI 경진대회 웹사이트](https://www.dacon.io/competitions/official/235638/data/)에서 다운로드 받아 `../input` 폴더에 저장.

In [5]:
!ls -alF ../input/

total 93780
drwxr-xr-x 2 swcu swcu     4096 Sep 28 05:26 ./
drwxr-xr-x 7 swcu swcu     4096 Sep 28 05:31 ../
-rw-rw-r-- 1 swcu swcu   800010 Jul 26 04:35 sample_submission.csv
-rw-rw-r-- 1 swcu swcu 18936246 Jul 26 04:35 test.csv
-rw-rw-r-- 1 swcu swcu 76278443 Jul 26 04:35 train.csv


In [6]:
data_dir = Path('../input/')
trn_file = data_dir / 'train.csv'
tst_file = data_dir / 'test.csv'
# feature_file = data_dir / 'feature.csv'
sample_file = data_dir / 'sample_submission.csv'
submission_dir = Path('../submission')
target_col = 'class'
seed = 42

In [7]:
algo_name = 'lgb'
feature_name = 'feature'
model_name = f'{algo_name}_{feature_name}'

#feature_file = feature_dir / f'{feature_name}.csv'
submission_file = submission_dir / f'{model_name}.csv'

## EDA

In [8]:
trn = pd.read_csv(trn_file, index_col=0)
print(trn.shape)
trn.head()

(320000, 19)


Unnamed: 0_level_0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z,class
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,23.264,20.3368,19.0095,17.6724,16.9396,-8.1086e-05,23.1243,20.2578,18.9551,17.6321,16.9089,18,18,1.1898,1.1907,1.189,1.1894,1.1902,0
1,15.0521,14.062,13.4524,13.2684,13.1689,0.0045061,14.9664,14.0045,13.4114,13.2363,13.1347,1,1,1.2533,1.2578,1.2488,1.251,1.2555,1
2,16.7864,15.8254,15.5363,15.3935,15.35,0.00047198,16.6076,15.6866,15.44,15.3217,15.2961,2,2,1.0225,1.0241,1.021,1.0217,1.0233,0
3,25.6606,21.1887,20.2212,19.8949,19.6346,5.8143e-06,25.3536,20.9947,20.0873,19.7947,19.5552,4,3,1.2054,1.2061,1.2049,1.2051,1.2057,0
4,24.4534,20.6992,19.0424,18.3242,17.9826,-3.3247e-05,23.7714,20.4338,18.863,18.1903,17.8759,13,12,1.1939,1.1943,1.1937,1.1938,1.1941,0


## 데이터 개요

In [9]:
trn.describe()

Unnamed: 0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z,class
count,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0,320000.0
mean,19.8524,18.3768,17.7358,17.1742,16.8752,0.058308,19.6289,18.2271,17.6154,17.1523,16.8693,6.3327,6.1388,1.1758,1.1765,1.1751,1.1754,1.1761,1.1163
std,1.9455,31.1108,1.4662,52.5615,57.1463,0.57546,1.9236,17.7866,1.473,25.0758,35.4365,8.8811,8.5516,0.1163,0.1181,0.1147,0.1155,0.1172,0.9235
min,-17.6875,-17555.3656,7.7314,-23955.8178,-23955.8178,-166.05,-42.8375,-9999.0,-16.3802,-9999.0,-9999.0,1.0,1.0,1.0001,1.0,1.0002,1.0001,1.0,0.0
25%,18.7242,17.4749,16.8772,16.5239,16.2889,3.558e-05,18.5636,17.3484,16.7871,16.453,16.2341,1.0,1.0,1.0882,1.0884,1.0878,1.088,1.0883,0.0
50%,19.4194,18.1405,17.5258,17.1497,16.9176,0.047129,19.2646,18.0224,17.4434,17.0874,16.8694,2.0,2.0,1.1793,1.1792,1.1794,1.1794,1.1793,1.0
75%,20.4321,19.0732,18.4281,18.0074,17.7289,0.094597,20.1976,18.8884,18.2911,17.907,17.6557,5.0,5.0,1.2275,1.226,1.2292,1.2286,1.2268,2.0
max,58.6233,46.3383,72.0097,50.1641,52.6127,62.323,30.7779,30.6132,31.9572,32.0162,30.7813,44.0,42.0,2.0508,2.0803,2.0221,2.0363,2.0654,2.0


In [10]:
trn.dtypes

u            float64
g            float64
r            float64
i            float64
z            float64
redshift     float64
dered_u      float64
dered_g      float64
dered_r      float64
dered_i      float64
dered_z      float64
nObserve       int64
nDetect        int64
airmass_u    float64
airmass_g    float64
airmass_r    float64
airmass_i    float64
airmass_z    float64
class          int64
dtype: object

In [11]:
for i in trn.columns:
    print(i, len(trn.loc[trn[i]<0]))


u 3
g 4
r 0
i 4
z 9
redshift 70876
dered_u 8
dered_g 9
dered_r 6
dered_i 7
dered_z 8
nObserve 0
nDetect 0
airmass_u 0
airmass_g 0
airmass_r 0
airmass_i 0
airmass_z 0
class 0


In [12]:
col = ['u', 'g', 'r', 'i', 'z', 'dered_u', 'dered_g', 
      'dered_r', 'dered_i', 'dered_z']
trn1 = trn.copy()
for i in col:
    trn1 = trn1[trn1[i] >=0]

trn1

Unnamed: 0_level_0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z,class
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,23.2640,20.3368,19.0095,17.6724,16.9396,-8.1086e-05,23.1243,20.2578,18.9551,17.6321,16.9089,18,18,1.1898,1.1907,1.1890,1.1894,1.1902,0
1,15.0521,14.0620,13.4524,13.2684,13.1689,4.5061e-03,14.9664,14.0045,13.4114,13.2363,13.1347,1,1,1.2533,1.2578,1.2488,1.2510,1.2555,1
2,16.7864,15.8254,15.5363,15.3935,15.3500,4.7198e-04,16.6076,15.6866,15.4400,15.3217,15.2961,2,2,1.0225,1.0241,1.0210,1.0217,1.0233,0
3,25.6606,21.1887,20.2212,19.8949,19.6346,5.8143e-06,25.3536,20.9947,20.0873,19.7947,19.5552,4,3,1.2054,1.2061,1.2049,1.2051,1.2057,0
4,24.4534,20.6992,19.0424,18.3242,17.9826,-3.3247e-05,23.7714,20.4338,18.8630,18.1903,17.8759,13,12,1.1939,1.1943,1.1937,1.1938,1.1941,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319995,18.3656,17.2409,16.7739,16.4850,16.3404,5.1988e-02,18.1328,17.0406,16.6374,16.3861,16.2874,2,2,1.0930,1.0888,1.0973,1.0951,1.0909,2
319996,18.6856,17.6658,17.1904,16.8169,16.6367,8.7261e-02,18.5207,17.5131,17.0871,16.7436,16.6017,2,2,1.0517,1.0508,1.0526,1.0522,1.0513,1
319997,21.1890,18.4234,17.2956,16.9011,16.6841,-1.0148e-05,20.9890,18.3076,17.2152,16.8403,16.6355,31,30,1.1933,1.1941,1.1925,1.1929,1.1937,0
319998,18.8472,17.3186,16.5057,16.1806,15.9116,-4.5209e-05,16.3900,15.4058,15.1823,15.1969,15.1793,1,1,1.1725,1.1726,1.1725,1.1725,1.1725,0


In [13]:
trn1.describe()

Unnamed: 0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z,class
count,319978.0,319978.0,319978.0,319978.0,319978.0,319980.0,319978.0,319978.0,319978.0,319978.0,319978.0,319978.0,319978.0,319978.0,319978.0,319978.0,319978.0,319978.0,319978.0
mean,19.8528,18.432,17.7358,17.3043,17.0613,0.058309,19.6301,18.2593,17.6161,17.2154,16.9947,6.3328,6.139,1.1758,1.1765,1.1751,1.1754,1.1761,1.1163
std,1.9428,1.6591,1.4662,1.3198,1.3333,0.57548,1.9072,1.6566,1.4665,1.3249,1.3233,8.8814,8.5519,0.1163,0.1181,0.1147,0.1155,0.1172,0.9235
min,2.2651,1.2383,7.7314,2.7492,5.1281,-166.05,0.7538,3.0828,6.0403,6.8561,7.2567,1.0,1.0,1.0001,1.0,1.0002,1.0001,1.0,0.0
25%,18.7242,17.4749,16.8772,16.5239,16.289,3.558e-05,18.5637,17.3484,16.7873,16.4531,16.2342,1.0,1.0,1.0882,1.0884,1.0878,1.088,1.0883,0.0
50%,19.4195,18.1405,17.5258,17.1498,16.9177,0.047133,19.2646,18.0224,17.4434,17.0874,16.8694,2.0,2.0,1.1793,1.1792,1.1794,1.1794,1.1793,1.0
75%,20.432,19.0729,18.4281,18.0075,17.729,0.0946,20.1976,18.8885,18.2912,17.907,17.6556,5.0,5.0,1.2275,1.226,1.2292,1.2286,1.2268,2.0
max,58.6233,46.3383,72.0097,50.1641,52.6127,62.323,30.7779,30.6132,31.9572,32.0162,30.7813,44.0,42.0,2.0508,2.0803,2.0221,2.0363,2.0654,2.0


In [14]:
tst = pd.read_csv(tst_file, index_col = 0)
tst

Unnamed: 0_level_0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
320000,19.1431,18.0172,17.4733,17.1144,16.9263,1.2130e-01,19.0750,17.9550,17.4308,17.0838,16.9087,1,1,1.1563,1.1515,1.1613,1.1588,1.1539
320001,18.7409,17.1705,16.3691,15.9865,15.7307,-6.6907e-05,15.0703,14.3098,14.3901,14.5160,14.6372,2,2,1.0076,1.0074,1.0080,1.0078,1.0075
320002,19.2980,17.8803,17.2221,16.8096,16.6352,1.1001e-01,19.1004,17.7726,17.1460,16.7497,16.5689,1,1,1.3809,1.3905,1.3715,1.3761,1.3856
320003,18.0775,17.1569,16.9041,16.8118,16.7955,2.9697e-05,17.9729,17.0735,16.8470,16.7700,16.7668,1,1,1.1408,1.1408,1.1409,1.1408,1.1408
320004,17.8325,16.7646,16.2135,15.8722,15.6621,6.8471e-02,17.7545,16.6794,16.1563,15.8315,15.6433,2,2,1.1110,1.1077,1.1145,1.1127,1.1093
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399995,19.6734,18.6617,18.2708,18.1225,18.0627,-3.1726e-04,19.3361,18.3933,18.0860,17.9861,17.9670,17,17,1.1858,1.1863,1.1855,1.1857,1.1861
399996,21.5229,20.7283,20.6087,21.0049,20.8458,6.5455e-02,21.3993,20.6399,20.5451,20.9507,20.7827,8,4,1.2850,1.2908,1.2793,1.2821,1.2878
399997,19.3991,18.2411,18.1605,18.1978,18.2018,-8.4180e-04,18.9136,17.8753,17.9047,18.0043,18.0400,1,1,1.1885,1.1886,1.1885,1.1885,1.1885
399998,18.8997,17.6009,17.1329,16.8223,16.5946,4.2817e-02,18.7715,17.4880,17.0557,16.7660,16.5585,17,17,1.1804,1.1813,1.1797,1.1800,1.1809


In [15]:
df = pd.concat([trn1, tst])
df

Unnamed: 0_level_0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z,class
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,23.2640,20.3368,19.0095,17.6724,16.9396,-8.1086e-05,23.1243,20.2578,18.9551,17.6321,16.9089,18,18,1.1898,1.1907,1.1890,1.1894,1.1902,0.0
1,15.0521,14.0620,13.4524,13.2684,13.1689,4.5061e-03,14.9664,14.0045,13.4114,13.2363,13.1347,1,1,1.2533,1.2578,1.2488,1.2510,1.2555,1.0
2,16.7864,15.8254,15.5363,15.3935,15.3500,4.7198e-04,16.6076,15.6866,15.4400,15.3217,15.2961,2,2,1.0225,1.0241,1.0210,1.0217,1.0233,0.0
3,25.6606,21.1887,20.2212,19.8949,19.6346,5.8143e-06,25.3536,20.9947,20.0873,19.7947,19.5552,4,3,1.2054,1.2061,1.2049,1.2051,1.2057,0.0
4,24.4534,20.6992,19.0424,18.3242,17.9826,-3.3247e-05,23.7714,20.4338,18.8630,18.1903,17.8759,13,12,1.1939,1.1943,1.1937,1.1938,1.1941,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399995,19.6734,18.6617,18.2708,18.1225,18.0627,-3.1726e-04,19.3361,18.3933,18.0860,17.9861,17.9670,17,17,1.1858,1.1863,1.1855,1.1857,1.1861,
399996,21.5229,20.7283,20.6087,21.0049,20.8458,6.5455e-02,21.3993,20.6399,20.5451,20.9507,20.7827,8,4,1.2850,1.2908,1.2793,1.2821,1.2878,
399997,19.3991,18.2411,18.1605,18.1978,18.2018,-8.4180e-04,18.9136,17.8753,17.9047,18.0043,18.0400,1,1,1.1885,1.1886,1.1885,1.1885,1.1885,
399998,18.8997,17.6009,17.1329,16.8223,16.5946,4.2817e-02,18.7715,17.4880,17.0557,16.7660,16.5585,17,17,1.1804,1.1813,1.1797,1.1800,1.1809,


# Feature Engineering

In [16]:
target_col = 'class'
feature_col = [x for x in trn1.columns if trn1[x].dtype in [np.int64, np.float64] and x !=target_col]
len(feature_col)

18

In [17]:
oneChar_col = ['u','g','r','i','z']
dered_col = [c for c in df.columns if c.find('dered') != -1]
airmass_col = [c for c in df.columns if c.find('airmass') != -1]
print(dered_col)
print(airmass_col)

['dered_u', 'dered_g', 'dered_r', 'dered_i', 'dered_z']
['airmass_u', 'airmass_g', 'airmass_r', 'airmass_i', 'airmass_z']


In [18]:
#모든 조합 차이 feature
diff_feature = []
for c1, c2 in itertools.combinations(feature_col,2):
    new_c = f'{c1}_{c2}_diff'
    df[new_c] = df[c1] - df[c2]
    diff_feature.append(new_c)
# 모든 조합 나누기    
divide_feature = []
for c1, c2 in itertools.combinations(feature_col,2):
    new_c = f'{c1}_{c2}_divide'
    df[new_c] = df[c1] / df[c2]
    divide_feature.append(new_c)

df.columns.shape

(325,)

## Adding PCA 

In [19]:
from sklearn.decomposition import PCA
from sklearn.decomposition import IncrementalPCA
from sklearn.decomposition import KernelPCA



In [20]:
#default pca - 
temp = df.columns
temp = temp.drop('class')
pca = PCA(n_components=0.95)# 주성분 개수 설정 
X_pca = pca.fit_transform(df[temp])
print('eigen_value :', pca.explained_variance_)
print('explained variance ratio :', pca.explained_variance_ratio_)


df['PCA'] = X_pca
df['PCA'].shape

eigen_value : [1.11660542e+16]
explained variance ratio : [0.99948469]


(399978,)

In [21]:
X_pca.shape

(399978, 1)

In [22]:
# incrementalPCA - difference is just dividing n_batches for memory 
n_batches = 100
inc_pca = IncrementalPCA(n_components=18)#temp.size*0.5)
for batch_x in np.array_split(df[temp], n_batches):
    print(".", end="") # not shown in the book
    inc_pca.partial_fit(batch_x)
    
X_inc = inc_pca.transform(df[temp])


# print('eigen_value :', inc_pca.explained_variance_)
# print('explained variance ratio :', inc_pca.explained_variance_ratio_)

# X_inc

X_inc_t = X_inc.transpose()

....................................................................................................

In [23]:
for i , ele in enumerate(X_inc_t):
    incPCAn = 'incPCA'+str(i)
    df[incPCAn] = ele
df

Unnamed: 0_level_0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z,class,u_g_diff,u_r_diff,u_i_diff,u_z_diff,u_redshift_diff,u_dered_u_diff,u_dered_g_diff,u_dered_r_diff,u_dered_i_diff,u_dered_z_diff,u_nObserve_diff,u_nDetect_diff,u_airmass_u_diff,u_airmass_g_diff,u_airmass_r_diff,u_airmass_i_diff,u_airmass_z_diff,g_r_diff,g_i_diff,g_z_diff,g_redshift_diff,g_dered_u_diff,g_dered_g_diff,g_dered_r_diff,g_dered_i_diff,g_dered_z_diff,g_nObserve_diff,g_nDetect_diff,g_airmass_u_diff,g_airmass_g_diff,g_airmass_r_diff,...,dered_i_airmass_r_divide,dered_i_airmass_i_divide,dered_i_airmass_z_divide,dered_z_nObserve_divide,dered_z_nDetect_divide,dered_z_airmass_u_divide,dered_z_airmass_g_divide,dered_z_airmass_r_divide,dered_z_airmass_i_divide,dered_z_airmass_z_divide,nObserve_nDetect_divide,nObserve_airmass_u_divide,nObserve_airmass_g_divide,nObserve_airmass_r_divide,nObserve_airmass_i_divide,nObserve_airmass_z_divide,nDetect_airmass_u_divide,nDetect_airmass_g_divide,nDetect_airmass_r_divide,nDetect_airmass_i_divide,nDetect_airmass_z_divide,airmass_u_airmass_g_divide,airmass_u_airmass_r_divide,airmass_u_airmass_i_divide,airmass_u_airmass_z_divide,airmass_g_airmass_r_divide,airmass_g_airmass_i_divide,airmass_g_airmass_z_divide,airmass_r_airmass_i_divide,airmass_r_airmass_z_divide,airmass_i_airmass_z_divide,PCA,incPCA0,incPCA1,incPCA2,incPCA3,incPCA4,incPCA5,incPCA6,incPCA7,incPCA8,incPCA9,incPCA10,incPCA11,incPCA12,incPCA13,incPCA14,incPCA15,incPCA16,incPCA17
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
0,23.2640,20.3368,19.0095,17.6724,16.9396,-8.1086e-05,23.1243,20.2578,18.9551,17.6321,16.9089,18,18,1.1898,1.1907,1.1890,1.1894,1.1902,0.0,2.9272,4.2545,5.5915,6.3243,23.2640,0.1397,3.0062,4.3088,5.6318,6.3550,5.2640,5.2640,22.0742,22.0733,22.0750,22.0746,22.0738,1.3273,2.6643,3.3972,20.3369,-2.7875,0.0790,1.3817,2.7047,3.4278,2.3368,2.3368,19.1470,19.1461,19.1478,...,14.8296,14.8249,14.8143,0.9394,0.9394,14.2120,14.2011,14.2214,14.2169,14.2067,1.0000,15.1291,15.1174,15.1390,15.1343,15.1234,15.1291,15.1174,15.1390,15.1343,15.1234,0.9992,1.0007,1.0003,0.9996,1.0014,1.0011,1.0004,0.9997,0.9990,0.9993,4.1012e+05,-4.1012e+05,-44588.1645,700.0365,-705.3298,-2043.2987,3.8336,-80.0839,-3.4657,17.4298,12.8590,6.3963,-1.8975,0.1039,-1.1943,-0.1168,-0.3289,-1.2392,0.8164
1,15.0521,14.0620,13.4524,13.2684,13.1689,4.5061e-03,14.9664,14.0045,13.4114,13.2363,13.1347,1,1,1.2533,1.2578,1.2488,1.2510,1.2555,1.0,0.9902,1.5997,1.7837,1.8832,15.0476,0.0857,1.0476,1.6408,1.8159,1.9175,14.0521,14.0521,13.7989,13.7943,13.8034,13.8012,13.7966,0.6096,0.7935,0.8930,14.0575,-0.9045,0.0574,0.6506,0.8257,0.9273,13.0620,13.0620,12.8087,12.8041,12.8132,...,10.5995,10.5806,10.5423,13.1347,13.1347,10.4805,10.4423,10.5182,10.4994,10.4614,1.0000,0.7979,0.7950,0.8008,0.7994,0.7965,0.7979,0.7950,0.8008,0.7994,0.7965,0.9964,1.0036,1.0018,0.9982,1.0073,1.0055,1.0018,0.9982,0.9946,0.9964,-1.3453e+05,1.3453e+05,1329.3993,234.1073,-139.3329,141.9778,0.8354,26.8378,4.3943,-49.9491,3.9271,6.1358,0.9277,-2.3727,0.1881,-0.6354,-0.4376,1.1678,-0.0957
2,16.7864,15.8254,15.5363,15.3935,15.3500,4.7198e-04,16.6076,15.6866,15.4400,15.3217,15.2961,2,2,1.0225,1.0241,1.0210,1.0217,1.0233,0.0,0.9610,1.2501,1.3928,1.4364,16.7859,0.1787,1.0998,1.3463,1.4647,1.4903,14.7864,14.7864,15.7639,15.7623,15.7654,15.7647,15.7631,0.2891,0.4319,0.4754,15.8250,-0.7822,0.1388,0.3854,0.5037,0.5293,13.8254,13.8254,14.8029,14.8013,14.8044,...,15.0068,14.9959,14.9730,7.6480,7.6480,14.9595,14.9360,14.9817,14.9708,14.9479,1.0000,1.9560,1.9529,1.9589,1.9575,1.9545,1.9560,1.9529,1.9589,1.9575,1.9545,0.9984,1.0015,1.0008,0.9992,1.0031,1.0023,1.0008,0.9993,0.9977,0.9985,-2.0247e+05,2.0247e+05,1399.9650,333.2776,117.0187,206.6743,-0.3252,15.6501,-15.6121,-13.6220,-9.5625,8.5752,0.3751,-0.9788,0.0770,-0.7371,0.0022,0.2375,0.3560
3,25.6606,21.1887,20.2212,19.8949,19.6346,5.8143e-06,25.3536,20.9947,20.0873,19.7947,19.5552,4,3,1.2054,1.2061,1.2049,1.2051,1.2057,0.0,4.4719,5.4395,5.7657,6.0260,25.6606,0.3070,4.6660,5.5734,5.8660,6.1055,21.6606,22.6606,24.4552,24.4546,24.4558,24.4555,24.4549,0.9676,1.2938,1.5541,21.1887,-4.1649,0.1941,1.1015,1.3941,1.6335,17.1887,18.1887,19.9833,19.9827,19.9839,...,16.4288,16.4255,16.4174,4.8888,6.5184,16.2230,16.2141,16.2301,16.2267,16.2188,1.3333,3.3184,3.3166,3.3198,3.3192,3.3175,2.4888,2.4874,2.4899,2.4894,2.4882,0.9995,1.0004,1.0002,0.9997,1.0010,1.0008,1.0003,0.9998,0.9993,0.9995,-8.3498e+06,8.3498e+06,540693.7038,-255606.5334,48177.6402,58140.3174,-1.5553,19.1608,-19.4878,36.2563,12.6515,-3.8705,4.3914,10.1283,-5.9023,0.3165,-0.6332,-3.9224,-0.1401
4,24.4534,20.6992,19.0424,18.3242,17.9826,-3.3247e-05,23.7714,20.4338,18.8630,18.1903,17.8759,13,12,1.1939,1.1943,1.1937,1.1938,1.1941,0.0,3.7543,5.4111,6.1293,6.4708,24.4535,0.6820,4.0196,5.5904,6.2631,6.5775,11.4534,12.4534,23.2595,23.2591,23.2597,23.2596,23.2593,1.6568,2.3750,2.7165,20.6992,-3.0722,0.2653,1.8362,2.5089,2.8233,7.6992,8.6992,19.5052,19.5049,19.5054,...,15.2381,15.2370,15.2335,1.3751,1.4897,14.9721,14.9679,14.9747,14.9736,14.9702,1.0833,10.8883,10.8852,10.8902,10.8894,10.8869,10.0507,10.0479,10.0525,10.0517,10.0494,0.9997,1.0002,1.0001,0.9999,1.0005,1.0004,1.0002,0.9999,0.9997,0.9998,1.2285e+06,-1.2285e+06,-112285.4841,28776.9020,2431.2498,-1802.2420,2.0952,-45.3906,-15.2844,24.0859,13.5414,3.5764,2.3899,5.9035,-2.4057,0.1657,1.1459,-0.3625,-0.5340
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399995,19.6734,18.6617,18.2708,18.1225,18.0627,-3.1726e-04,19.3361,18.3933,18.0860,17.9861,17.9670,17,17,1.1858,1.1863,1.1855,1.1857,1.1861,,1.0117,1.4026,1.5509,1.6107,19.6737,0.3372,1.2801,1.5874,1.6873,1.7064,2.6734,2.6734,18.4875,18.4871,18.4879,18.4877,18.4873,0.3909,0.5392,0.5990,18.6620,-0.6745,0.2684,0.5757,0.6756,0.6947,1.6617,1.6617,17.4758,17.4753,17.4762,...,15.1718,15.1698,15.1645,1.0569,1.0569,15.1512,15.1451,15.1557,15.1537,15.1484,1.0000,14.3358,14.3300,14.3400,14.3381,14.3331,14.3358,14.3300,14.3400,14.3381,14.3331,0.9996,1.0003,1.0002,0.9998,1.0007,1.0006,1.0002,0.9999,0.9995,0.9997,3.1910e+03,-3.1910e+03,976.2744,-108.3388,-523.7569,224.4434,2.1475,-76.7207,-6.6688,2.6309,-7.0843,-3.4028,-2.0577,0.6967,0.7779,-0.0225,-0.5576,-0.0965,-0.0123
399996,21.5229,20.7283,20.6087,21.0049,20.8458,6.5455e-02,21.3993,20.6399,20.5451,20.9507,20.7827,8,4,1.2850,1.2908,1.2793,1.2821,1.2878,,0.7946,0.9142,0.5180,0.6771,21.4574,0.1235,0.8829,0.9778,0.5722,0.7402,13.5229,17.5229,20.2379,20.2321,20.2435,20.2407,20.2350,0.1196,-0.2766,-0.1175,20.6628,-0.6711,0.0884,0.1832,-0.2224,-0.0544,12.7283,16.7283,19.4433,19.4375,19.4490,...,16.3763,16.3407,16.2681,2.5978,5.1957,16.1738,16.1010,16.2450,16.2096,16.1377,2.0000,6.2259,6.1978,6.2533,6.2397,6.2120,3.1129,3.0989,3.1266,3.1198,3.1060,0.9955,1.0044,1.0022,0.9978,1.0089,1.0067,1.0023,0.9978,0.9934,0.9956,-1.2839e+05,1.2839e+05,1177.6816,235.3707,-134.0418,158.5533,-2.2023,-0.1329,-23.9801,21.9871,-6.7453,-20.9497,12.4153,0.8315,-1.5234,0.4739,0.6165,-1.5697,-1.4322
399997,19.3991,18.2411,18.1605,18.1978,18.2018,-8.4180e-04,18.9136,17.8753,17.9047,18.0043,18.0400,1,1,1.1885,1.1886,1.1885,1.1885,1.1885,,1.1580,1.2386,1.2013,1.1972,19.3999,0.4855,1.5237,1.4944,1.3948,1.3591,18.3991,18.3991,18.2106,18.2105,18.2105,18.2106,18.2106,0.0806,0.0433,0.0392,18.2419,-0.6725,0.3657,0.3364,0.2368,0.2011,17.2411,17.2411,17.0526,17.0525,17.0526,...,15.1485,15.1490,15.1487,18.0400,18.0400,15.1791,15.1780,15.1786,15.1791,15.1788,1.0000,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.9999,1.0000,1.0000,1.0000,1.0000,1.0001,1.0001,1.0000,1.0000,1.0000,-7.8679e+04,7.8679e+04,1575.7238,325.5438,-441.0233,68.7280,-3.0811,47.3562,22.8207,-3.3286,-6.1932,-7.7851,-0.4672,2.6724,0.6055,-0.0312,0.6407,-1.0401,0.1324
399998,18.8997,17.6009,17.1329,16.8223,16.5946,4.2817e-02,18.7715,17.4880,17.0557,16.7660,16.5585,17,17,1.1804,1.1813,1.1797,1.1800,1.1809,,1.2989,1.7669,2.0774,2.3052,18.8569,0.1282,1.4118,1.8441,2.1338,2.3412,1.8997,1.8997,17.7193,17.7184,17.7201,17.7197,17.7189,0.4680,0.7785,1.0063,17.5581,-1.1707,0.1129,0.5452,0.8349,1.0424,0.6009,0.6009,16.4204,16.4195,16.4212,...,14.2126,14.2081,14.1981,0.9740,0.9740,14.0275,14.0168,14.0367,14.0323,14.0224,1.0000,14.4015,14.3906,14.4110,14.4065,14.3963,14.4015,14.3906,14.4110,14.4065,14.3963,0.9992,1.0007,1.0003,0.9996,1.0014,1.0011,1.0004,0.9997,0.9990,0.9993,-1.2858e+05,1.2858e+05,1207.3018,235.8713,-136.2204,161.1962,3.0984,-79.9384,-6.7238,-7.4420,-3.2682,1.9715,-1.7284,-0.6124,-0.0696,-0.0560,-0.6277,-0.4468,0.1871


In [24]:
# from sklearn.model_selection import StratifiedShuffleSplit

# split = StratifiedShuffleSplit(n_splits=10, random_state=seed)

In [25]:
# trn2 = df.iloc[:319978]
# tst2 = df.iloc[319978:]
# tst2.drop('class', axis =1 , inplace = True)
# trn2.shape, tst2.shape

In [26]:
# trn2['class']

In [27]:
# for trn_idx, test_idx in split.split(trn2[temp], trn2['class']):
#     print (len(trn_idx), len(test_idx))
    
    

# testing = trn2[temp].iloc[test_idx]
    
# rbf_pca = KernelPCA(n_components = 100, kernel="rbf", gamma=0.0433)
# X_rbf = rbf_pca.fit_transform(testing)    

In [28]:
# #커널을 이용해 데이터를 저차원에서 고차원으로 매핑시켜 비선형 데이터셋에 SVM을 적용시키는 Kernel SVM, 
# #같은 기법을 PCA에 적용해 비선형 투영으로 차원을 축소할 수 있는데, 이것을 Kernel PCA(KPCA)라고 한다.



# rbf_pca = KernelPCA(n_components = 100, kernel="rbf", gamma=0.0433)
# X_rbf = rbf_pca.fit_transform(df[temp])

# print('eigen_value :', X_rbf.explained_variance_)
# print('explained variance ratio :', X_rbf.explained_variance_ratio_)
# print(X_rbf)

# lin_pca = KernelPCA(n_components = 0.95, kernel="linear") #fit_inverse_transform=True)
# X_lin = lin_pca.fit_transform(df[temp])

# print('eigen_value :', X_lin.explained_variance_)
# print('explained variance ratio :', X_lin.explained_variance_ratio_)
# print(X_lin)


# sig_pca = KernelPCA(n_components = 0.95, kernel="sigmoid", gamma=0.001, coef0=1)# fit_inverse_transform=True)
# X_sig = sig_pca.fit_transform(df[temp])

# print('eigen_value :', X_sig.explained_variance_)
# print('explained variance ratio :', X_sig.explained_variance_ratio_)
# print(X_sig)









In [29]:
df

Unnamed: 0_level_0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z,class,u_g_diff,u_r_diff,u_i_diff,u_z_diff,u_redshift_diff,u_dered_u_diff,u_dered_g_diff,u_dered_r_diff,u_dered_i_diff,u_dered_z_diff,u_nObserve_diff,u_nDetect_diff,u_airmass_u_diff,u_airmass_g_diff,u_airmass_r_diff,u_airmass_i_diff,u_airmass_z_diff,g_r_diff,g_i_diff,g_z_diff,g_redshift_diff,g_dered_u_diff,g_dered_g_diff,g_dered_r_diff,g_dered_i_diff,g_dered_z_diff,g_nObserve_diff,g_nDetect_diff,g_airmass_u_diff,g_airmass_g_diff,g_airmass_r_diff,...,dered_i_airmass_r_divide,dered_i_airmass_i_divide,dered_i_airmass_z_divide,dered_z_nObserve_divide,dered_z_nDetect_divide,dered_z_airmass_u_divide,dered_z_airmass_g_divide,dered_z_airmass_r_divide,dered_z_airmass_i_divide,dered_z_airmass_z_divide,nObserve_nDetect_divide,nObserve_airmass_u_divide,nObserve_airmass_g_divide,nObserve_airmass_r_divide,nObserve_airmass_i_divide,nObserve_airmass_z_divide,nDetect_airmass_u_divide,nDetect_airmass_g_divide,nDetect_airmass_r_divide,nDetect_airmass_i_divide,nDetect_airmass_z_divide,airmass_u_airmass_g_divide,airmass_u_airmass_r_divide,airmass_u_airmass_i_divide,airmass_u_airmass_z_divide,airmass_g_airmass_r_divide,airmass_g_airmass_i_divide,airmass_g_airmass_z_divide,airmass_r_airmass_i_divide,airmass_r_airmass_z_divide,airmass_i_airmass_z_divide,PCA,incPCA0,incPCA1,incPCA2,incPCA3,incPCA4,incPCA5,incPCA6,incPCA7,incPCA8,incPCA9,incPCA10,incPCA11,incPCA12,incPCA13,incPCA14,incPCA15,incPCA16,incPCA17
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
0,23.2640,20.3368,19.0095,17.6724,16.9396,-8.1086e-05,23.1243,20.2578,18.9551,17.6321,16.9089,18,18,1.1898,1.1907,1.1890,1.1894,1.1902,0.0,2.9272,4.2545,5.5915,6.3243,23.2640,0.1397,3.0062,4.3088,5.6318,6.3550,5.2640,5.2640,22.0742,22.0733,22.0750,22.0746,22.0738,1.3273,2.6643,3.3972,20.3369,-2.7875,0.0790,1.3817,2.7047,3.4278,2.3368,2.3368,19.1470,19.1461,19.1478,...,14.8296,14.8249,14.8143,0.9394,0.9394,14.2120,14.2011,14.2214,14.2169,14.2067,1.0000,15.1291,15.1174,15.1390,15.1343,15.1234,15.1291,15.1174,15.1390,15.1343,15.1234,0.9992,1.0007,1.0003,0.9996,1.0014,1.0011,1.0004,0.9997,0.9990,0.9993,4.1012e+05,-4.1012e+05,-44588.1645,700.0365,-705.3298,-2043.2987,3.8336,-80.0839,-3.4657,17.4298,12.8590,6.3963,-1.8975,0.1039,-1.1943,-0.1168,-0.3289,-1.2392,0.8164
1,15.0521,14.0620,13.4524,13.2684,13.1689,4.5061e-03,14.9664,14.0045,13.4114,13.2363,13.1347,1,1,1.2533,1.2578,1.2488,1.2510,1.2555,1.0,0.9902,1.5997,1.7837,1.8832,15.0476,0.0857,1.0476,1.6408,1.8159,1.9175,14.0521,14.0521,13.7989,13.7943,13.8034,13.8012,13.7966,0.6096,0.7935,0.8930,14.0575,-0.9045,0.0574,0.6506,0.8257,0.9273,13.0620,13.0620,12.8087,12.8041,12.8132,...,10.5995,10.5806,10.5423,13.1347,13.1347,10.4805,10.4423,10.5182,10.4994,10.4614,1.0000,0.7979,0.7950,0.8008,0.7994,0.7965,0.7979,0.7950,0.8008,0.7994,0.7965,0.9964,1.0036,1.0018,0.9982,1.0073,1.0055,1.0018,0.9982,0.9946,0.9964,-1.3453e+05,1.3453e+05,1329.3993,234.1073,-139.3329,141.9778,0.8354,26.8378,4.3943,-49.9491,3.9271,6.1358,0.9277,-2.3727,0.1881,-0.6354,-0.4376,1.1678,-0.0957
2,16.7864,15.8254,15.5363,15.3935,15.3500,4.7198e-04,16.6076,15.6866,15.4400,15.3217,15.2961,2,2,1.0225,1.0241,1.0210,1.0217,1.0233,0.0,0.9610,1.2501,1.3928,1.4364,16.7859,0.1787,1.0998,1.3463,1.4647,1.4903,14.7864,14.7864,15.7639,15.7623,15.7654,15.7647,15.7631,0.2891,0.4319,0.4754,15.8250,-0.7822,0.1388,0.3854,0.5037,0.5293,13.8254,13.8254,14.8029,14.8013,14.8044,...,15.0068,14.9959,14.9730,7.6480,7.6480,14.9595,14.9360,14.9817,14.9708,14.9479,1.0000,1.9560,1.9529,1.9589,1.9575,1.9545,1.9560,1.9529,1.9589,1.9575,1.9545,0.9984,1.0015,1.0008,0.9992,1.0031,1.0023,1.0008,0.9993,0.9977,0.9985,-2.0247e+05,2.0247e+05,1399.9650,333.2776,117.0187,206.6743,-0.3252,15.6501,-15.6121,-13.6220,-9.5625,8.5752,0.3751,-0.9788,0.0770,-0.7371,0.0022,0.2375,0.3560
3,25.6606,21.1887,20.2212,19.8949,19.6346,5.8143e-06,25.3536,20.9947,20.0873,19.7947,19.5552,4,3,1.2054,1.2061,1.2049,1.2051,1.2057,0.0,4.4719,5.4395,5.7657,6.0260,25.6606,0.3070,4.6660,5.5734,5.8660,6.1055,21.6606,22.6606,24.4552,24.4546,24.4558,24.4555,24.4549,0.9676,1.2938,1.5541,21.1887,-4.1649,0.1941,1.1015,1.3941,1.6335,17.1887,18.1887,19.9833,19.9827,19.9839,...,16.4288,16.4255,16.4174,4.8888,6.5184,16.2230,16.2141,16.2301,16.2267,16.2188,1.3333,3.3184,3.3166,3.3198,3.3192,3.3175,2.4888,2.4874,2.4899,2.4894,2.4882,0.9995,1.0004,1.0002,0.9997,1.0010,1.0008,1.0003,0.9998,0.9993,0.9995,-8.3498e+06,8.3498e+06,540693.7038,-255606.5334,48177.6402,58140.3174,-1.5553,19.1608,-19.4878,36.2563,12.6515,-3.8705,4.3914,10.1283,-5.9023,0.3165,-0.6332,-3.9224,-0.1401
4,24.4534,20.6992,19.0424,18.3242,17.9826,-3.3247e-05,23.7714,20.4338,18.8630,18.1903,17.8759,13,12,1.1939,1.1943,1.1937,1.1938,1.1941,0.0,3.7543,5.4111,6.1293,6.4708,24.4535,0.6820,4.0196,5.5904,6.2631,6.5775,11.4534,12.4534,23.2595,23.2591,23.2597,23.2596,23.2593,1.6568,2.3750,2.7165,20.6992,-3.0722,0.2653,1.8362,2.5089,2.8233,7.6992,8.6992,19.5052,19.5049,19.5054,...,15.2381,15.2370,15.2335,1.3751,1.4897,14.9721,14.9679,14.9747,14.9736,14.9702,1.0833,10.8883,10.8852,10.8902,10.8894,10.8869,10.0507,10.0479,10.0525,10.0517,10.0494,0.9997,1.0002,1.0001,0.9999,1.0005,1.0004,1.0002,0.9999,0.9997,0.9998,1.2285e+06,-1.2285e+06,-112285.4841,28776.9020,2431.2498,-1802.2420,2.0952,-45.3906,-15.2844,24.0859,13.5414,3.5764,2.3899,5.9035,-2.4057,0.1657,1.1459,-0.3625,-0.5340
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399995,19.6734,18.6617,18.2708,18.1225,18.0627,-3.1726e-04,19.3361,18.3933,18.0860,17.9861,17.9670,17,17,1.1858,1.1863,1.1855,1.1857,1.1861,,1.0117,1.4026,1.5509,1.6107,19.6737,0.3372,1.2801,1.5874,1.6873,1.7064,2.6734,2.6734,18.4875,18.4871,18.4879,18.4877,18.4873,0.3909,0.5392,0.5990,18.6620,-0.6745,0.2684,0.5757,0.6756,0.6947,1.6617,1.6617,17.4758,17.4753,17.4762,...,15.1718,15.1698,15.1645,1.0569,1.0569,15.1512,15.1451,15.1557,15.1537,15.1484,1.0000,14.3358,14.3300,14.3400,14.3381,14.3331,14.3358,14.3300,14.3400,14.3381,14.3331,0.9996,1.0003,1.0002,0.9998,1.0007,1.0006,1.0002,0.9999,0.9995,0.9997,3.1910e+03,-3.1910e+03,976.2744,-108.3388,-523.7569,224.4434,2.1475,-76.7207,-6.6688,2.6309,-7.0843,-3.4028,-2.0577,0.6967,0.7779,-0.0225,-0.5576,-0.0965,-0.0123
399996,21.5229,20.7283,20.6087,21.0049,20.8458,6.5455e-02,21.3993,20.6399,20.5451,20.9507,20.7827,8,4,1.2850,1.2908,1.2793,1.2821,1.2878,,0.7946,0.9142,0.5180,0.6771,21.4574,0.1235,0.8829,0.9778,0.5722,0.7402,13.5229,17.5229,20.2379,20.2321,20.2435,20.2407,20.2350,0.1196,-0.2766,-0.1175,20.6628,-0.6711,0.0884,0.1832,-0.2224,-0.0544,12.7283,16.7283,19.4433,19.4375,19.4490,...,16.3763,16.3407,16.2681,2.5978,5.1957,16.1738,16.1010,16.2450,16.2096,16.1377,2.0000,6.2259,6.1978,6.2533,6.2397,6.2120,3.1129,3.0989,3.1266,3.1198,3.1060,0.9955,1.0044,1.0022,0.9978,1.0089,1.0067,1.0023,0.9978,0.9934,0.9956,-1.2839e+05,1.2839e+05,1177.6816,235.3707,-134.0418,158.5533,-2.2023,-0.1329,-23.9801,21.9871,-6.7453,-20.9497,12.4153,0.8315,-1.5234,0.4739,0.6165,-1.5697,-1.4322
399997,19.3991,18.2411,18.1605,18.1978,18.2018,-8.4180e-04,18.9136,17.8753,17.9047,18.0043,18.0400,1,1,1.1885,1.1886,1.1885,1.1885,1.1885,,1.1580,1.2386,1.2013,1.1972,19.3999,0.4855,1.5237,1.4944,1.3948,1.3591,18.3991,18.3991,18.2106,18.2105,18.2105,18.2106,18.2106,0.0806,0.0433,0.0392,18.2419,-0.6725,0.3657,0.3364,0.2368,0.2011,17.2411,17.2411,17.0526,17.0525,17.0526,...,15.1485,15.1490,15.1487,18.0400,18.0400,15.1791,15.1780,15.1786,15.1791,15.1788,1.0000,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.8414,0.9999,1.0000,1.0000,1.0000,1.0000,1.0001,1.0001,1.0000,1.0000,1.0000,-7.8679e+04,7.8679e+04,1575.7238,325.5438,-441.0233,68.7280,-3.0811,47.3562,22.8207,-3.3286,-6.1932,-7.7851,-0.4672,2.6724,0.6055,-0.0312,0.6407,-1.0401,0.1324
399998,18.8997,17.6009,17.1329,16.8223,16.5946,4.2817e-02,18.7715,17.4880,17.0557,16.7660,16.5585,17,17,1.1804,1.1813,1.1797,1.1800,1.1809,,1.2989,1.7669,2.0774,2.3052,18.8569,0.1282,1.4118,1.8441,2.1338,2.3412,1.8997,1.8997,17.7193,17.7184,17.7201,17.7197,17.7189,0.4680,0.7785,1.0063,17.5581,-1.1707,0.1129,0.5452,0.8349,1.0424,0.6009,0.6009,16.4204,16.4195,16.4212,...,14.2126,14.2081,14.1981,0.9740,0.9740,14.0275,14.0168,14.0367,14.0323,14.0224,1.0000,14.4015,14.3906,14.4110,14.4065,14.3963,14.4015,14.3906,14.4110,14.4065,14.3963,0.9992,1.0007,1.0003,0.9996,1.0014,1.0011,1.0004,0.9997,0.9990,0.9993,-1.2858e+05,1.2858e+05,1207.3018,235.8713,-136.2204,161.1962,3.0984,-79.9384,-6.7238,-7.4420,-3.2682,1.9715,-1.7284,-0.6124,-0.0696,-0.0560,-0.6277,-0.4468,0.1871


In [30]:
# zip 함수를 이용한 row별, 시리즈 그룹별 max, min, max-min, std, sum 을 feature로 생성
# 그 외에도 mean, skew 등을 사용해 볼 수도 있긴 함 

for prefix, g in zip(['one','dered','airmass'], [oneChar_col, dered_col, airmass_col]):
    
    df[f'{prefix}_max'] = df[g].max(axis=1)
    
    df[f'{prefix}_min'] = df[g].min(axis=1)
    
    df[f'{prefix}_diff'] = df[f'{prefix}_max'] - df[f'{prefix}_min']
    
    df[f'{prefix}_std'] = df[g].std(axis=1)
    
    df[f'{prefix}_sum'] = df[g].sum(axis=1)
    
    

In [31]:
# 각 그룹별 max-max, min-min, sum-sum, std-std을 feature로 생성

for c in itertools.combinations(['one','dered','airmass'],2):
    df[f'{c[0]}_{c[1]}_max_diff'] = df[f'{c[0]}_max'] - df[f'{c[1]}_max']
    
    df[f'{c[0]}_{c[1]}_min_diff'] = df[f'{c[0]}_min'] - df[f'{c[1]}_min']
    
    df[f'{c[0]}_{c[1]}_sum_diff'] = df[f'{c[0]}_sum'] - df[f'{c[1]}_sum']
    
    df[f'{c[0]}_{c[1]}_std_diff'] = df[f'{c[0]}_std'] - df[f'{c[1]}_std']

In [32]:
df

Unnamed: 0_level_0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z,class,u_g_diff,u_r_diff,u_i_diff,u_z_diff,u_redshift_diff,u_dered_u_diff,u_dered_g_diff,u_dered_r_diff,u_dered_i_diff,u_dered_z_diff,u_nObserve_diff,u_nDetect_diff,u_airmass_u_diff,u_airmass_g_diff,u_airmass_r_diff,u_airmass_i_diff,u_airmass_z_diff,g_r_diff,g_i_diff,g_z_diff,g_redshift_diff,g_dered_u_diff,g_dered_g_diff,g_dered_r_diff,g_dered_i_diff,g_dered_z_diff,g_nObserve_diff,g_nDetect_diff,g_airmass_u_diff,g_airmass_g_diff,g_airmass_r_diff,...,airmass_g_airmass_z_divide,airmass_r_airmass_i_divide,airmass_r_airmass_z_divide,airmass_i_airmass_z_divide,PCA,incPCA0,incPCA1,incPCA2,incPCA3,incPCA4,incPCA5,incPCA6,incPCA7,incPCA8,incPCA9,incPCA10,incPCA11,incPCA12,incPCA13,incPCA14,incPCA15,incPCA16,incPCA17,one_max,one_min,one_diff,one_std,one_sum,dered_max,dered_min,dered_diff,dered_std,dered_sum,airmass_max,airmass_min,airmass_diff,airmass_std,airmass_sum,one_dered_max_diff,one_dered_min_diff,one_dered_sum_diff,one_dered_std_diff,one_airmass_max_diff,one_airmass_min_diff,one_airmass_sum_diff,one_airmass_std_diff,dered_airmass_max_diff,dered_airmass_min_diff,dered_airmass_sum_diff,dered_airmass_std_diff
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
0,23.2640,20.3368,19.0095,17.6724,16.9396,-8.1086e-05,23.1243,20.2578,18.9551,17.6321,16.9089,18,18,1.1898,1.1907,1.1890,1.1894,1.1902,0.0,2.9272,4.2545,5.5915,6.3243,23.2640,0.1397,3.0062,4.3088,5.6318,6.3550,5.2640,5.2640,22.0742,22.0733,22.0750,22.0746,22.0738,1.3273,2.6643,3.3972,20.3369,-2.7875,0.0790,1.3817,2.7047,3.4278,2.3368,2.3368,19.1470,19.1461,19.1478,...,1.0004,0.9997,0.9990,0.9993,4.1012e+05,-4.1012e+05,-44588.1645,700.0365,-705.3298,-2043.2987,3.8336,-80.0839,-3.4657,17.4298,12.8590,6.3963,-1.8975,0.1039,-1.1943,-0.1168,-0.3289,-1.2392,0.8164,23.2640,16.9396,6.3243,2.4994,97.2223,23.1243,16.9089,6.2153,2.4562,96.8782,1.1907,1.1890,1.7020e-03,6.7348e-04,5.9490,0.1397,0.0307,0.3441,0.0432,22.0733,15.7506,91.2733,2.4987,21.9336,15.7200,90.9292,2.4555
1,15.0521,14.0620,13.4524,13.2684,13.1689,4.5061e-03,14.9664,14.0045,13.4114,13.2363,13.1347,1,1,1.2533,1.2578,1.2488,1.2510,1.2555,1.0,0.9902,1.5997,1.7837,1.8832,15.0476,0.0857,1.0476,1.6408,1.8159,1.9175,14.0521,14.0521,13.7989,13.7943,13.8034,13.8012,13.7966,0.6096,0.7935,0.8930,14.0575,-0.9045,0.0574,0.6506,0.8257,0.9273,13.0620,13.0620,12.8087,12.8041,12.8132,...,1.0018,0.9982,0.9946,0.9964,-1.3453e+05,1.3453e+05,1329.3993,234.1073,-139.3329,141.9778,0.8354,26.8378,4.3943,-49.9491,3.9271,6.1358,0.9277,-2.3727,0.1881,-0.6354,-0.4376,1.1678,-0.0957,15.0521,13.1689,1.8832,0.7807,69.0039,14.9664,13.1347,1.8317,0.7586,68.7533,1.2578,1.2488,9.0740e-03,3.5867e-03,6.2664,0.0857,0.0343,0.2507,0.0222,13.7943,11.9202,62.7375,0.7771,13.7086,11.8859,62.4869,0.7550
2,16.7864,15.8254,15.5363,15.3935,15.3500,4.7198e-04,16.6076,15.6866,15.4400,15.3217,15.2961,2,2,1.0225,1.0241,1.0210,1.0217,1.0233,0.0,0.9610,1.2501,1.3928,1.4364,16.7859,0.1787,1.0998,1.3463,1.4647,1.4903,14.7864,14.7864,15.7639,15.7623,15.7654,15.7647,15.7631,0.2891,0.4319,0.4754,15.8250,-0.7822,0.1388,0.3854,0.5037,0.5293,13.8254,13.8254,14.8029,14.8013,14.8044,...,1.0008,0.9993,0.9977,0.9985,-2.0247e+05,2.0247e+05,1399.9650,333.2776,117.0187,206.6743,-0.3252,15.6501,-15.6121,-13.6220,-9.5625,8.5752,0.3751,-0.9788,0.0770,-0.7371,0.0022,0.2375,0.3560,16.7864,15.3500,1.4364,0.5934,78.8917,16.6076,15.2961,1.3116,0.5462,78.3521,1.0241,1.0210,3.1220e-03,1.2343e-03,5.1126,0.1787,0.0540,0.5396,0.0472,15.7623,14.3290,73.7791,0.5922,15.5835,14.2751,73.2395,0.5450
3,25.6606,21.1887,20.2212,19.8949,19.6346,5.8143e-06,25.3536,20.9947,20.0873,19.7947,19.5552,4,3,1.2054,1.2061,1.2049,1.2051,1.2057,0.0,4.4719,5.4395,5.7657,6.0260,25.6606,0.3070,4.6660,5.5734,5.8660,6.1055,21.6606,22.6606,24.4552,24.4546,24.4558,24.4555,24.4549,0.9676,1.2938,1.5541,21.1887,-4.1649,0.1941,1.1015,1.3941,1.6335,17.1887,18.1887,19.9833,19.9827,19.9839,...,1.0003,0.9998,0.9993,0.9995,-8.3498e+06,8.3498e+06,540693.7038,-255606.5334,48177.6402,58140.3174,-1.5553,19.1608,-19.4878,36.2563,12.6515,-3.8705,4.3914,10.1283,-5.9023,0.3165,-0.6332,-3.9224,-0.1401,25.6606,19.6346,6.0260,2.4968,106.6001,25.3536,19.5552,5.7985,2.4085,105.7854,1.2061,1.2049,1.1840e-03,4.6906e-04,6.0272,0.3070,0.0795,0.8147,0.0883,24.4546,18.4298,100.5730,2.4964,24.1476,18.3503,99.7582,2.4081
4,24.4534,20.6992,19.0424,18.3242,17.9826,-3.3247e-05,23.7714,20.4338,18.8630,18.1903,17.8759,13,12,1.1939,1.1943,1.1937,1.1938,1.1941,0.0,3.7543,5.4111,6.1293,6.4708,24.4535,0.6820,4.0196,5.5904,6.2631,6.5775,11.4534,12.4534,23.2595,23.2591,23.2597,23.2596,23.2593,1.6568,2.3750,2.7165,20.6992,-3.0722,0.2653,1.8362,2.5089,2.8233,7.6992,8.6992,19.5052,19.5049,19.5054,...,1.0002,0.9999,0.9997,0.9998,1.2285e+06,-1.2285e+06,-112285.4841,28776.9020,2431.2498,-1802.2420,2.0952,-45.3906,-15.2844,24.0859,13.5414,3.5764,2.3899,5.9035,-2.4057,0.1657,1.1459,-0.3625,-0.5340,24.4534,17.9826,6.4708,2.6489,100.5018,23.7714,17.8759,5.8955,2.4156,99.1344,1.1943,1.1937,5.4700e-04,2.1830e-04,5.9699,0.6820,0.1067,1.3673,0.2333,23.2591,16.7889,94.5319,2.6487,22.5771,16.6822,93.1645,2.4154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399995,19.6734,18.6617,18.2708,18.1225,18.0627,-3.1726e-04,19.3361,18.3933,18.0860,17.9861,17.9670,17,17,1.1858,1.1863,1.1855,1.1857,1.1861,,1.0117,1.4026,1.5509,1.6107,19.6737,0.3372,1.2801,1.5874,1.6873,1.7064,2.6734,2.6734,18.4875,18.4871,18.4879,18.4877,18.4873,0.3909,0.5392,0.5990,18.6620,-0.6745,0.2684,0.5757,0.6756,0.6947,1.6617,1.6617,17.4758,17.4753,17.4762,...,1.0002,0.9999,0.9995,0.9997,3.1910e+03,-3.1910e+03,976.2744,-108.3388,-523.7569,224.4434,2.1475,-76.7207,-6.6688,2.6309,-7.0843,-3.4028,-2.0577,0.6967,0.7779,-0.0225,-0.5576,-0.0965,-0.0123,19.6734,18.0627,1.6107,0.6656,92.7910,19.3361,17.9670,1.3692,0.5751,91.7685,1.1863,1.1855,8.2900e-04,3.2915e-04,5.9294,0.3372,0.0957,1.0225,0.0905,18.4871,16.8772,86.8616,0.6653,18.1498,16.7815,85.8392,0.5748
399996,21.5229,20.7283,20.6087,21.0049,20.8458,6.5455e-02,21.3993,20.6399,20.5451,20.9507,20.7827,8,4,1.2850,1.2908,1.2793,1.2821,1.2878,,0.7946,0.9142,0.5180,0.6771,21.4574,0.1235,0.8829,0.9778,0.5722,0.7402,13.5229,17.5229,20.2379,20.2321,20.2435,20.2407,20.2350,0.1196,-0.2766,-0.1175,20.6628,-0.6711,0.0884,0.1832,-0.2224,-0.0544,12.7283,16.7283,19.4433,19.4375,19.4490,...,1.0023,0.9978,0.9934,0.9956,-1.2839e+05,1.2839e+05,1177.6816,235.3707,-134.0418,158.5533,-2.2023,-0.1329,-23.9801,21.9871,-6.7453,-20.9497,12.4153,0.8315,-1.5234,0.4739,0.6165,-1.5697,-1.4322,21.5229,20.6087,0.9142,0.3562,104.7105,21.3993,20.5451,0.8543,0.3364,104.3177,1.2908,1.2793,1.1443e-02,4.5229e-03,6.4250,0.1235,0.0636,0.3928,0.0198,20.2321,19.3294,98.2855,0.3516,20.1086,19.2657,97.8927,0.3319
399997,19.3991,18.2411,18.1605,18.1978,18.2018,-8.4180e-04,18.9136,17.8753,17.9047,18.0043,18.0400,1,1,1.1885,1.1886,1.1885,1.1885,1.1885,,1.1580,1.2386,1.2013,1.1972,19.3999,0.4855,1.5237,1.4944,1.3948,1.3591,18.3991,18.3991,18.2106,18.2105,18.2105,18.2106,18.2106,0.0806,0.0433,0.0392,18.2419,-0.6725,0.3657,0.3364,0.2368,0.2011,17.2411,17.2411,17.0526,17.0525,17.0526,...,1.0001,1.0000,1.0000,1.0000,-7.8679e+04,7.8679e+04,1575.7238,325.5438,-441.0233,68.7280,-3.0811,47.3562,22.8207,-3.3286,-6.1932,-7.7851,-0.4672,2.6724,0.6055,-0.0312,0.6407,-1.0401,0.1324,19.3991,18.1605,1.2386,0.5369,92.2002,18.9136,17.8753,1.0383,0.4336,90.7379,1.1886,1.1885,8.8000e-05,3.5436e-05,5.9425,0.4855,0.2852,1.4623,0.1033,18.2105,16.9720,86.2577,0.5368,17.7250,16.6869,84.7953,0.4336
399998,18.8997,17.6009,17.1329,16.8223,16.5946,4.2817e-02,18.7715,17.4880,17.0557,16.7660,16.5585,17,17,1.1804,1.1813,1.1797,1.1800,1.1809,,1.2989,1.7669,2.0774,2.3052,18.8569,0.1282,1.4118,1.8441,2.1338,2.3412,1.8997,1.8997,17.7193,17.7184,17.7201,17.7197,17.7189,0.4680,0.7785,1.0063,17.5581,-1.1707,0.1129,0.5452,0.8349,1.0424,0.6009,0.6009,16.4204,16.4195,16.4212,...,1.0004,0.9997,0.9990,0.9993,-1.2858e+05,1.2858e+05,1207.3018,235.8713,-136.2204,161.1962,3.0984,-79.9384,-6.7238,-7.4420,-3.2682,1.9715,-1.7284,-0.6124,-0.0696,-0.0560,-0.6277,-0.4468,0.1871,18.8997,16.5946,2.3052,0.9142,87.0504,18.7715,16.5585,2.2130,0.8791,86.6396,1.1813,1.1797,1.6750e-03,6.6268e-04,5.9023,0.1282,0.0361,0.4108,0.0350,17.7184,15.4149,81.1481,0.9135,17.5902,15.3788,80.7373,0.8785


# 훈련셋과 테스트셋 나누기

In [33]:
trn2 = df.iloc[:319978]
tst2 = df.iloc[319978:]
tst2.drop('class', axis =1 , inplace = True)
trn2.shape, tst2.shape

((319978, 371), (80000, 370))

# outlier

In [34]:
# 전체 데이터가 아닌 각 클래스별(0,1,2)로 이상치 처리
# Train_copy1 = Train_set.copy()

def remove_outlier(data, column):
    
    print(column)
    # 클래스 0처리
    column_data_0 = data[data['class']==0][column]
    quan_25_0 = np.percentile(column_data_0.values, 0.1) # 1분위수
    quan_75_0 = np.percentile(column_data_0.values, 99.9) # 3분위수
    iqr_0 = quan_75_0 - quan_25_0 # IQR 계산
    iqr_0 *= 1.5
    lowest_0 = quan_25_0 - iqr_0 # 최솟값 설정
    highest_0 = quan_75_0 + iqr_0 # 최댓값 설정
    outlier_index_0 = column_data_0[(column_data_0 < lowest_0) | (column_data_0 > highest_0)].index # 최솟값 ~ 최댓값 사이의 값만 출력
    print(len(outlier_index_0))
    
    # 클래스 1처리
    column_data_1 = data[data['class']==1][column]
    quan_25_1 = np.percentile(column_data_1.values, 0.1) # 1분위수
    quan_75_1 = np.percentile(column_data_1.values, 99.9) # 3분위수
    iqr_1 = quan_75_1 - quan_25_1 # IQR 계산
    iqr_1 *= 1.5
    lowest_1 = quan_25_1 - iqr_1 # 최솟값 설정
    highest_1 = quan_75_1 + iqr_1 # 최댓값 설정
    outlier_index_1 = column_data_1[(column_data_1 < lowest_1 )| (column_data_1 > highest_1)].index # 최솟값 ~ 최댓값 사이의 값만 출력
    print(len(outlier_index_1))
    
    # 클래스 2 처리
    column_data_2 = data[data['class']==2][column]
    quan_25_2 = np.percentile(column_data_2.values, 0.1) # 1분위수
    quan_75_2 = np.percentile(column_data_2.values, 99.9) # 3분위수
    iqr_2 = quan_75_2 - quan_25_2 # IQR 계산
    iqr_2 *= 1.5
    lowest_2 = quan_25_2 - iqr_2 # 최솟값 설정
    highest_2 = quan_75_2 + iqr_2 # 최댓값 설정
    outlier_index_2 = column_data_2[(column_data_2 < lowest_2 )| (column_data_2 > highest_2)].index # 최솟값 ~ 최댓값 사이의 값만 출력
    print(len(outlier_index_2))
    # outlier 행 제거
    data.drop(list(set(list(outlier_index_0) + list(outlier_index_1) + list(outlier_index_2))), axis = 0, inplace = True)
    print(data.shape)
    return data

In [35]:
col = list(trn2.columns)
col.remove('redshift')
col.remove('class')
# col

['u',
 'g',
 'r',
 'i',
 'z',
 'dered_u',
 'dered_g',
 'dered_r',
 'dered_i',
 'dered_z',
 'nObserve',
 'nDetect',
 'airmass_u',
 'airmass_g',
 'airmass_r',
 'airmass_i',
 'airmass_z',
 'u_g_diff',
 'u_r_diff',
 'u_i_diff',
 'u_z_diff',
 'u_redshift_diff',
 'u_dered_u_diff',
 'u_dered_g_diff',
 'u_dered_r_diff',
 'u_dered_i_diff',
 'u_dered_z_diff',
 'u_nObserve_diff',
 'u_nDetect_diff',
 'u_airmass_u_diff',
 'u_airmass_g_diff',
 'u_airmass_r_diff',
 'u_airmass_i_diff',
 'u_airmass_z_diff',
 'g_r_diff',
 'g_i_diff',
 'g_z_diff',
 'g_redshift_diff',
 'g_dered_u_diff',
 'g_dered_g_diff',
 'g_dered_r_diff',
 'g_dered_i_diff',
 'g_dered_z_diff',
 'g_nObserve_diff',
 'g_nDetect_diff',
 'g_airmass_u_diff',
 'g_airmass_g_diff',
 'g_airmass_r_diff',
 'g_airmass_i_diff',
 'g_airmass_z_diff',
 'r_i_diff',
 'r_z_diff',
 'r_redshift_diff',
 'r_dered_u_diff',
 'r_dered_g_diff',
 'r_dered_r_diff',
 'r_dered_i_diff',
 'r_dered_z_diff',
 'r_nObserve_diff',
 'r_nDetect_diff',
 'r_airmass_u_diff',
 'r_a

In [36]:
df2 = trn2.copy()
for i in col:
    
    ROT = remove_outlier(df2, i)
    
    df2 = ROT

df2

u
5
1
2
(319970, 371)
g
0
0
0
(319970, 371)
r
1
0
2
(319967, 371)
i
1
1
0
(319965, 371)
z
1
1
0
(319963, 371)
dered_u
0
0
0
(319963, 371)
dered_g
0
0
0
(319963, 371)
dered_r
0
0
0
(319963, 371)
dered_i
0
0
0
(319963, 371)
dered_z
0
0
0
(319963, 371)
nObserve
0
0
0
(319963, 371)
nDetect
0
0
0
(319963, 371)
airmass_u
0
0
0
(319963, 371)
airmass_g
0
0
0
(319963, 371)
airmass_r
0
0
0
(319963, 371)
airmass_i
0
0
0
(319963, 371)
airmass_z
0
0
0
(319963, 371)
u_g_diff
1
4
7
(319951, 371)
u_r_diff
0
1
4
(319946, 371)
u_i_diff
0
0
1
(319945, 371)
u_z_diff
0
1
1
(319943, 371)
u_redshift_diff
24
0
0
(319919, 371)
u_dered_u_diff
1
8
18
(319892, 371)
u_dered_g_diff
0
0
0
(319892, 371)
u_dered_r_diff
0
0
0
(319892, 371)
u_dered_i_diff
0
0
0
(319892, 371)
u_dered_z_diff
0
0
0
(319892, 371)
u_nObserve_diff
0
0
0
(319892, 371)
u_nDetect_diff
0
0
0
(319892, 371)
u_airmass_u_diff
0
0
0
(319892, 371)
u_airmass_g_diff
0
0
0
(319892, 371)
u_airmass_r_diff
0
0
0
(319892, 371)
u_airmass_i_diff
0
0
0
(319892, 

0
0
(319150, 371)
i_z_divide
1
0
3
(319146, 371)
i_redshift_divide
1
0
0
(319145, 371)
i_dered_u_divide
0
0
0
(319145, 371)
i_dered_g_divide
0
0
0
(319145, 371)
i_dered_r_divide
0
4
0
(319141, 371)
i_dered_i_divide
0
0
0
(319141, 371)
i_dered_z_divide
0
0
0
(319141, 371)
i_nObserve_divide
0
0
0
(319141, 371)
i_nDetect_divide
0
0
0
(319141, 371)
i_airmass_u_divide
0
0
0
(319141, 371)
i_airmass_g_divide
0
0
0
(319141, 371)
i_airmass_r_divide
0
0
0
(319141, 371)
i_airmass_i_divide
0
0
0
(319141, 371)
i_airmass_z_divide
0
0
0
(319141, 371)
z_redshift_divide
0
1
1
(319139, 371)
z_dered_u_divide
0
0
0
(319139, 371)
z_dered_g_divide
2
0
0
(319137, 371)
z_dered_r_divide
0
4
0
(319133, 371)
z_dered_i_divide
0
4
5
(319124, 371)
z_dered_z_divide
0
1
0
(319123, 371)
z_nObserve_divide
0
0
0
(319123, 371)
z_nDetect_divide
0
0
0
(319123, 371)
z_airmass_u_divide
0
0
0
(319123, 371)
z_airmass_g_divide
0
0
0
(319123, 371)
z_airmass_r_divide
0
0
0
(319123, 371)
z_airmass_i_divide
0
0
0
(319123, 371)
z_ai

Unnamed: 0_level_0,u,g,r,i,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,nDetect,airmass_u,airmass_g,airmass_r,airmass_i,airmass_z,class,u_g_diff,u_r_diff,u_i_diff,u_z_diff,u_redshift_diff,u_dered_u_diff,u_dered_g_diff,u_dered_r_diff,u_dered_i_diff,u_dered_z_diff,u_nObserve_diff,u_nDetect_diff,u_airmass_u_diff,u_airmass_g_diff,u_airmass_r_diff,u_airmass_i_diff,u_airmass_z_diff,g_r_diff,g_i_diff,g_z_diff,g_redshift_diff,g_dered_u_diff,g_dered_g_diff,g_dered_r_diff,g_dered_i_diff,g_dered_z_diff,g_nObserve_diff,g_nDetect_diff,g_airmass_u_diff,g_airmass_g_diff,g_airmass_r_diff,...,airmass_g_airmass_z_divide,airmass_r_airmass_i_divide,airmass_r_airmass_z_divide,airmass_i_airmass_z_divide,PCA,incPCA0,incPCA1,incPCA2,incPCA3,incPCA4,incPCA5,incPCA6,incPCA7,incPCA8,incPCA9,incPCA10,incPCA11,incPCA12,incPCA13,incPCA14,incPCA15,incPCA16,incPCA17,one_max,one_min,one_diff,one_std,one_sum,dered_max,dered_min,dered_diff,dered_std,dered_sum,airmass_max,airmass_min,airmass_diff,airmass_std,airmass_sum,one_dered_max_diff,one_dered_min_diff,one_dered_sum_diff,one_dered_std_diff,one_airmass_max_diff,one_airmass_min_diff,one_airmass_sum_diff,one_airmass_std_diff,dered_airmass_max_diff,dered_airmass_min_diff,dered_airmass_sum_diff,dered_airmass_std_diff
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
0,23.2640,20.3368,19.0095,17.6724,16.9396,-8.1086e-05,23.1243,20.2578,18.9551,17.6321,16.9089,18,18,1.1898,1.1907,1.1890,1.1894,1.1902,0.0,2.9272,4.2545,5.5915,6.3243,23.2640,0.1397,3.0062,4.3088,5.6318,6.3550,5.2640,5.2640,22.0742,22.0733,22.0750,22.0746,22.0738,1.3273,2.6643,3.3972,20.3369,-2.7875,0.0790,1.3817,2.7047,3.4278,2.3368,2.3368,19.1470,19.1461,19.1478,...,1.0004,0.9997,0.9990,0.9993,4.1012e+05,-4.1012e+05,-44588.1645,700.0365,-705.3298,-2043.2987,3.8336,-80.0839,-3.4657,17.4298,12.8590,6.3963,-1.8975,0.1039,-1.1943,-0.1168,-0.3289,-1.2392,0.8164,23.2640,16.9396,6.3243,2.4994,97.2223,23.1243,16.9089,6.2153,2.4562,96.8782,1.1907,1.1890,0.0017,6.7348e-04,5.9490,0.1397,0.0307,0.3441,0.0432,22.0733,15.7506,91.2733,2.4987,21.9336,15.7200,90.9292,2.4555
1,15.0521,14.0620,13.4524,13.2684,13.1689,4.5061e-03,14.9664,14.0045,13.4114,13.2363,13.1347,1,1,1.2533,1.2578,1.2488,1.2510,1.2555,1.0,0.9902,1.5997,1.7837,1.8832,15.0476,0.0857,1.0476,1.6408,1.8159,1.9175,14.0521,14.0521,13.7989,13.7943,13.8034,13.8012,13.7966,0.6096,0.7935,0.8930,14.0575,-0.9045,0.0574,0.6506,0.8257,0.9273,13.0620,13.0620,12.8087,12.8041,12.8132,...,1.0018,0.9982,0.9946,0.9964,-1.3453e+05,1.3453e+05,1329.3993,234.1073,-139.3329,141.9778,0.8354,26.8378,4.3943,-49.9491,3.9271,6.1358,0.9277,-2.3727,0.1881,-0.6354,-0.4376,1.1678,-0.0957,15.0521,13.1689,1.8832,0.7807,69.0039,14.9664,13.1347,1.8317,0.7586,68.7533,1.2578,1.2488,0.0091,3.5867e-03,6.2664,0.0857,0.0343,0.2507,0.0222,13.7943,11.9202,62.7375,0.7771,13.7086,11.8859,62.4869,0.7550
2,16.7864,15.8254,15.5363,15.3935,15.3500,4.7198e-04,16.6076,15.6866,15.4400,15.3217,15.2961,2,2,1.0225,1.0241,1.0210,1.0217,1.0233,0.0,0.9610,1.2501,1.3928,1.4364,16.7859,0.1787,1.0998,1.3463,1.4647,1.4903,14.7864,14.7864,15.7639,15.7623,15.7654,15.7647,15.7631,0.2891,0.4319,0.4754,15.8250,-0.7822,0.1388,0.3854,0.5037,0.5293,13.8254,13.8254,14.8029,14.8013,14.8044,...,1.0008,0.9993,0.9977,0.9985,-2.0247e+05,2.0247e+05,1399.9650,333.2776,117.0187,206.6743,-0.3252,15.6501,-15.6121,-13.6220,-9.5625,8.5752,0.3751,-0.9788,0.0770,-0.7371,0.0022,0.2375,0.3560,16.7864,15.3500,1.4364,0.5934,78.8917,16.6076,15.2961,1.3116,0.5462,78.3521,1.0241,1.0210,0.0031,1.2343e-03,5.1126,0.1787,0.0540,0.5396,0.0472,15.7623,14.3290,73.7791,0.5922,15.5835,14.2751,73.2395,0.5450
3,25.6606,21.1887,20.2212,19.8949,19.6346,5.8143e-06,25.3536,20.9947,20.0873,19.7947,19.5552,4,3,1.2054,1.2061,1.2049,1.2051,1.2057,0.0,4.4719,5.4395,5.7657,6.0260,25.6606,0.3070,4.6660,5.5734,5.8660,6.1055,21.6606,22.6606,24.4552,24.4546,24.4558,24.4555,24.4549,0.9676,1.2938,1.5541,21.1887,-4.1649,0.1941,1.1015,1.3941,1.6335,17.1887,18.1887,19.9833,19.9827,19.9839,...,1.0003,0.9998,0.9993,0.9995,-8.3498e+06,8.3498e+06,540693.7038,-255606.5334,48177.6402,58140.3174,-1.5553,19.1608,-19.4878,36.2563,12.6515,-3.8705,4.3914,10.1283,-5.9023,0.3165,-0.6332,-3.9224,-0.1401,25.6606,19.6346,6.0260,2.4968,106.6001,25.3536,19.5552,5.7985,2.4085,105.7854,1.2061,1.2049,0.0012,4.6906e-04,6.0272,0.3070,0.0795,0.8147,0.0883,24.4546,18.4298,100.5730,2.4964,24.1476,18.3503,99.7582,2.4081
4,24.4534,20.6992,19.0424,18.3242,17.9826,-3.3247e-05,23.7714,20.4338,18.8630,18.1903,17.8759,13,12,1.1939,1.1943,1.1937,1.1938,1.1941,0.0,3.7543,5.4111,6.1293,6.4708,24.4535,0.6820,4.0196,5.5904,6.2631,6.5775,11.4534,12.4534,23.2595,23.2591,23.2597,23.2596,23.2593,1.6568,2.3750,2.7165,20.6992,-3.0722,0.2653,1.8362,2.5089,2.8233,7.6992,8.6992,19.5052,19.5049,19.5054,...,1.0002,0.9999,0.9997,0.9998,1.2285e+06,-1.2285e+06,-112285.4841,28776.9020,2431.2498,-1802.2420,2.0952,-45.3906,-15.2844,24.0859,13.5414,3.5764,2.3899,5.9035,-2.4057,0.1657,1.1459,-0.3625,-0.5340,24.4534,17.9826,6.4708,2.6489,100.5018,23.7714,17.8759,5.8955,2.4156,99.1344,1.1943,1.1937,0.0005,2.1830e-04,5.9699,0.6820,0.1067,1.3673,0.2333,23.2591,16.7889,94.5319,2.6487,22.5771,16.6822,93.1645,2.4154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319995,18.3656,17.2409,16.7739,16.4850,16.3404,5.1988e-02,18.1328,17.0406,16.6374,16.3861,16.2874,2,2,1.0930,1.0888,1.0973,1.0951,1.0909,2.0,1.1248,1.5917,1.8807,2.0252,18.3137,0.2328,1.3250,1.7282,1.9796,2.0782,16.3656,16.3656,17.2726,17.2768,17.2683,17.2705,17.2747,0.4670,0.7559,0.9005,17.1889,-0.8919,0.2002,0.6034,0.8548,0.9535,15.2409,15.2409,16.1479,16.1521,16.1436,...,0.9981,1.0020,1.0059,1.0039,-1.2841e+05,1.2841e+05,1201.3652,236.2419,-137.9441,160.7124,-0.7656,20.2988,-13.5854,-5.0372,-4.9776,3.4179,0.0426,-0.3847,0.3102,-0.2283,0.1679,0.1391,0.2074,18.3656,16.3404,2.0252,0.8164,85.2058,18.1328,16.2874,1.8454,0.7496,84.4844,1.0973,1.0888,0.0085,3.3528e-03,5.4652,0.2328,0.0530,0.7214,0.0667,17.2683,15.2516,79.7406,0.8130,17.0355,15.1986,79.0192,0.7463
319996,18.6856,17.6658,17.1904,16.8169,16.6367,8.7261e-02,18.5207,17.5131,17.0871,16.7436,16.6017,2,2,1.0517,1.0508,1.0526,1.0522,1.0513,1.0,1.0198,1.4952,1.8688,2.0490,18.5984,0.1649,1.1725,1.5985,1.9420,2.0839,16.6856,16.6856,17.6340,17.6348,17.6330,17.6335,17.6344,0.4754,0.8489,1.0291,17.5785,-0.8549,0.1527,0.5787,0.9222,1.0641,15.6658,15.6658,16.6141,16.6150,16.6132,...,0.9996,1.0005,1.0013,1.0009,-1.2812e+05,1.2812e+05,1197.6386,236.8330,-138.8175,160.6590,-1.1503,21.6672,-12.4225,2.1796,-7.2285,4.7415,-0.1325,-1.0503,0.2701,-0.1605,0.1712,0.2074,0.3020,18.6856,16.6367,2.0490,0.8201,86.9954,18.5207,16.6017,1.9190,0.7710,86.4663,1.0526,1.0508,0.0018,7.1252e-04,5.2586,0.1649,0.0350,0.5291,0.0491,17.6330,15.5859,81.7368,0.8194,17.4681,15.5509,81.2077,0.7703
319997,21.1890,18.4234,17.2956,16.9011,16.6841,-1.0148e-05,20.9890,18.3076,17.2152,16.8403,16.6355,31,30,1.1933,1.1941,1.1925,1.1929,1.1937,0.0,2.7656,3.8934,4.2879,4.5048,21.1890,0.2000,2.8814,3.9738,4.3487,4.5534,-9.8110,-8.8110,19.9957,19.9948,19.9965,19.9961,19.9953,1.1277,1.5223,1.7392,18.4234,-2.5656,0.1158,1.2082,1.5831,1.7878,-12.5766,-11.5766,17.2301,17.2292,17.2308,...,1.0004,0.9997,0.9990,0.9993,3.8700e+06,-3.8700e+06,-229349.9152,66968.2494,-1450.2517,-3434.2567,6.0126,-158.0064,25.8490,-2.6939,3.5065,7.9574,0.2980,2.7889,-2.4996,-0.1132,-0.1236,-0.3659,-0.2731,21.1890,16.6841,4.5048,1.8531,90.4931,20.9890,16.6355,4.3534,1.7924,89.9876,1.1941,1.1925,0.0016,6.4536e-04,5.9665,0.2000,0.0486,0.5055,0.0607,19.9948,15.4916,84.5267,1.8524,19.7948,15.4430,84.0211,1.7918
319998,18.8472,17.3186,16.5057,16.1806,15.9116,-4.5209e-05,16.3900,15.4058,15.1823,15.1969,15.1793,1,1,1.1725,1.1726,1.1725,1.1725,1.1725,0.0,1.5286,2.3415,2.6666,2.9356,18.8472,2.4572,3.4414,3.6649,3.6503,3.6678,17.8472,17.8472,17.6747,17.6746,17.6747,17.6747,17.6747,0.8129,1.1380,1.4070,17.3187,0.9286,1.9128,2.1363,2.1217,2.1393,16.3186,16.3186,16.1461,16.1460,16.1461,...,1.0001,1.0000,1.0000,1.0000,7.1201e+05,-7.1201e+05,-25700.2755,1480.2958,-1465.5173,1714.2524,1.1539,37.9561,14.8542,-21.4989,-0.0263,2.4617,1.5093,5.2010,9.7316,-0.1775,0.3776,-0.7429,-0.5846,18.8472,15.9116,2.9356,1.1835,84.7637,16.3900,15.1793,1.2106,0.5226,77.3543,1.1726,1.1725,0.0001,4.3154e-05,5.8624,2.4572,0.7323,7.4094,0.6609,17.6746,14.7392,78.9013,1.1835,15.2174,14.0069,71.4919,0.5225


In [37]:
target_col = 'class'
feature_col = [x for x in df.columns if df[x].dtype in [np.int64, np.float64] and x !=target_col]
print(feature_col)
pred_col='pred'


['u', 'g', 'r', 'i', 'z', 'redshift', 'dered_u', 'dered_g', 'dered_r', 'dered_i', 'dered_z', 'nObserve', 'nDetect', 'airmass_u', 'airmass_g', 'airmass_r', 'airmass_i', 'airmass_z', 'u_g_diff', 'u_r_diff', 'u_i_diff', 'u_z_diff', 'u_redshift_diff', 'u_dered_u_diff', 'u_dered_g_diff', 'u_dered_r_diff', 'u_dered_i_diff', 'u_dered_z_diff', 'u_nObserve_diff', 'u_nDetect_diff', 'u_airmass_u_diff', 'u_airmass_g_diff', 'u_airmass_r_diff', 'u_airmass_i_diff', 'u_airmass_z_diff', 'g_r_diff', 'g_i_diff', 'g_z_diff', 'g_redshift_diff', 'g_dered_u_diff', 'g_dered_g_diff', 'g_dered_r_diff', 'g_dered_i_diff', 'g_dered_z_diff', 'g_nObserve_diff', 'g_nDetect_diff', 'g_airmass_u_diff', 'g_airmass_g_diff', 'g_airmass_r_diff', 'g_airmass_i_diff', 'g_airmass_z_diff', 'r_i_diff', 'r_z_diff', 'r_redshift_diff', 'r_dered_u_diff', 'r_dered_g_diff', 'r_dered_r_diff', 'r_dered_i_diff', 'r_dered_z_diff', 'r_nObserve_diff', 'r_nDetect_diff', 'r_airmass_u_diff', 'r_airmass_g_diff', 'r_airmass_r_diff', 'r_airmass_i_

In [41]:
from sklearn.feature_selection import SelectFromModel
#LGBM 모델을 기반으로 속성들 선택
select = SelectFromModel(LGBMClassifier(max_depth=27, learning_rate = 0.005, n_estimators = 1000, boosting_type='gbrt',
                                       ))#, threshold = 0.3)
select.fit(df2[feature_col], df2[target_col])

train_sel = select.transform(df2[feature_col])

test_sel = select.transform(tst2[feature_col])



In [42]:
# SelectFromModel?

## 모델링

In [43]:
df2.shape

(318740, 371)

In [44]:
train_sel.shape

(318740, 90)

In [45]:
y = df2[target_col]
df2.drop(target_col, axis=1, inplace=True)

feature_name = df2.columns.tolist()
print(y.shape, df2.shape, tst2.shape)

(318740,) (318740, 370) (80000, 370)


In [46]:
X_trn, X_val, y_trn, y_val = train_test_split(train_sel, y, test_size=.01, random_state=seed)
print(X_trn.shape, X_val.shape, y_trn.shape, y_val.shape)

clf = lgb.LGBMClassifier(objective='multiclass',
                         n_estimators=12000,
                         num_leaves=512,
                         learning_rate=0.001,
                         min_child_samples=10,
                         subsample=.5,
                         subsample_freq=1,
                         colsample_bytree=.8,
                         random_state=seed,
                         max_depth = 27,
                         n_jobs=-1)
                        #boosting_type = 'goss')
clf.fit(X_trn, y_trn,
        eval_set=[(X_val, y_val)],
        eval_metric='multiclass',
        early_stopping_rounds=100)

p_val = clf.predict(X_val)


print(f'{accuracy_score(y_val, p_val) * 100:.4f}%')

(315552, 90) (3188, 90) (315552,) (3188,)
[1]	valid_0's multi_logloss: 0.990721
Training until validation scores don't improve for 100 rounds
[2]	valid_0's multi_logloss: 0.989141
[3]	valid_0's multi_logloss: 0.987557
[4]	valid_0's multi_logloss: 0.985981
[5]	valid_0's multi_logloss: 0.984422
[6]	valid_0's multi_logloss: 0.982854
[7]	valid_0's multi_logloss: 0.981284
[8]	valid_0's multi_logloss: 0.979731
[9]	valid_0's multi_logloss: 0.978187
[10]	valid_0's multi_logloss: 0.976643
[11]	valid_0's multi_logloss: 0.97511
[12]	valid_0's multi_logloss: 0.973568
[13]	valid_0's multi_logloss: 0.972027
[14]	valid_0's multi_logloss: 0.970493
[15]	valid_0's multi_logloss: 0.968962
[16]	valid_0's multi_logloss: 0.96743
[17]	valid_0's multi_logloss: 0.965911
[18]	valid_0's multi_logloss: 0.964393
[19]	valid_0's multi_logloss: 0.96289
[20]	valid_0's multi_logloss: 0.961392
[21]	valid_0's multi_logloss: 0.959884
[22]	valid_0's multi_logloss: 0.958379
[23]	valid_0's multi_logloss: 0.956885
[24]	valid_

[207]	valid_0's multi_logloss: 0.737042
[208]	valid_0's multi_logloss: 0.736085
[209]	valid_0's multi_logloss: 0.735124
[210]	valid_0's multi_logloss: 0.734166
[211]	valid_0's multi_logloss: 0.733212
[212]	valid_0's multi_logloss: 0.732267
[213]	valid_0's multi_logloss: 0.731324
[214]	valid_0's multi_logloss: 0.730375
[215]	valid_0's multi_logloss: 0.729429
[216]	valid_0's multi_logloss: 0.728481
[217]	valid_0's multi_logloss: 0.727538
[218]	valid_0's multi_logloss: 0.726603
[219]	valid_0's multi_logloss: 0.725668
[220]	valid_0's multi_logloss: 0.724735
[221]	valid_0's multi_logloss: 0.723802
[222]	valid_0's multi_logloss: 0.72287
[223]	valid_0's multi_logloss: 0.721937
[224]	valid_0's multi_logloss: 0.721009
[225]	valid_0's multi_logloss: 0.72008
[226]	valid_0's multi_logloss: 0.719151
[227]	valid_0's multi_logloss: 0.718227
[228]	valid_0's multi_logloss: 0.717307
[229]	valid_0's multi_logloss: 0.71638
[230]	valid_0's multi_logloss: 0.715471
[231]	valid_0's multi_logloss: 0.714556
[23

[413]	valid_0's multi_logloss: 0.574898
[414]	valid_0's multi_logloss: 0.574252
[415]	valid_0's multi_logloss: 0.573605
[416]	valid_0's multi_logloss: 0.572967
[417]	valid_0's multi_logloss: 0.572329
[418]	valid_0's multi_logloss: 0.57169
[419]	valid_0's multi_logloss: 0.571049
[420]	valid_0's multi_logloss: 0.570417
[421]	valid_0's multi_logloss: 0.56979
[422]	valid_0's multi_logloss: 0.569159
[423]	valid_0's multi_logloss: 0.56853
[424]	valid_0's multi_logloss: 0.567902
[425]	valid_0's multi_logloss: 0.567273
[426]	valid_0's multi_logloss: 0.566641
[427]	valid_0's multi_logloss: 0.566011
[428]	valid_0's multi_logloss: 0.565381
[429]	valid_0's multi_logloss: 0.564755
[430]	valid_0's multi_logloss: 0.564129
[431]	valid_0's multi_logloss: 0.563501
[432]	valid_0's multi_logloss: 0.562875
[433]	valid_0's multi_logloss: 0.562264
[434]	valid_0's multi_logloss: 0.561643
[435]	valid_0's multi_logloss: 0.561023
[436]	valid_0's multi_logloss: 0.560405
[437]	valid_0's multi_logloss: 0.559788
[43

[620]	valid_0's multi_logloss: 0.463425
[621]	valid_0's multi_logloss: 0.462975
[622]	valid_0's multi_logloss: 0.462525
[623]	valid_0's multi_logloss: 0.462075
[624]	valid_0's multi_logloss: 0.461623
[625]	valid_0's multi_logloss: 0.461176
[626]	valid_0's multi_logloss: 0.46072
[627]	valid_0's multi_logloss: 0.460279
[628]	valid_0's multi_logloss: 0.459838
[629]	valid_0's multi_logloss: 0.4594
[630]	valid_0's multi_logloss: 0.458959
[631]	valid_0's multi_logloss: 0.458514
[632]	valid_0's multi_logloss: 0.458076
[633]	valid_0's multi_logloss: 0.457635
[634]	valid_0's multi_logloss: 0.457198
[635]	valid_0's multi_logloss: 0.45676
[636]	valid_0's multi_logloss: 0.456318
[637]	valid_0's multi_logloss: 0.455882
[638]	valid_0's multi_logloss: 0.455447
[639]	valid_0's multi_logloss: 0.455013
[640]	valid_0's multi_logloss: 0.454579
[641]	valid_0's multi_logloss: 0.454142
[642]	valid_0's multi_logloss: 0.45371
[643]	valid_0's multi_logloss: 0.453278
[644]	valid_0's multi_logloss: 0.452847
[645]

[826]	valid_0's multi_logloss: 0.384759
[827]	valid_0's multi_logloss: 0.384431
[828]	valid_0's multi_logloss: 0.384112
[829]	valid_0's multi_logloss: 0.383795
[830]	valid_0's multi_logloss: 0.383478
[831]	valid_0's multi_logloss: 0.383156
[832]	valid_0's multi_logloss: 0.382835
[833]	valid_0's multi_logloss: 0.382515
[834]	valid_0's multi_logloss: 0.382194
[835]	valid_0's multi_logloss: 0.381876
[836]	valid_0's multi_logloss: 0.38156
[837]	valid_0's multi_logloss: 0.381243
[838]	valid_0's multi_logloss: 0.380928
[839]	valid_0's multi_logloss: 0.380612
[840]	valid_0's multi_logloss: 0.380296
[841]	valid_0's multi_logloss: 0.37998
[842]	valid_0's multi_logloss: 0.379662
[843]	valid_0's multi_logloss: 0.379348
[844]	valid_0's multi_logloss: 0.379031
[845]	valid_0's multi_logloss: 0.378718
[846]	valid_0's multi_logloss: 0.378404
[847]	valid_0's multi_logloss: 0.378086
[848]	valid_0's multi_logloss: 0.37777
[849]	valid_0's multi_logloss: 0.377463
[850]	valid_0's multi_logloss: 0.37715
[851

[1032]	valid_0's multi_logloss: 0.327688
[1033]	valid_0's multi_logloss: 0.327452
[1034]	valid_0's multi_logloss: 0.327217
[1035]	valid_0's multi_logloss: 0.326981
[1036]	valid_0's multi_logloss: 0.32675
[1037]	valid_0's multi_logloss: 0.326514
[1038]	valid_0's multi_logloss: 0.326278
[1039]	valid_0's multi_logloss: 0.326043
[1040]	valid_0's multi_logloss: 0.325808
[1041]	valid_0's multi_logloss: 0.325576
[1042]	valid_0's multi_logloss: 0.325341
[1043]	valid_0's multi_logloss: 0.325109
[1044]	valid_0's multi_logloss: 0.324875
[1045]	valid_0's multi_logloss: 0.324642
[1046]	valid_0's multi_logloss: 0.324409
[1047]	valid_0's multi_logloss: 0.324177
[1048]	valid_0's multi_logloss: 0.323943
[1049]	valid_0's multi_logloss: 0.323713
[1050]	valid_0's multi_logloss: 0.323486
[1051]	valid_0's multi_logloss: 0.323253
[1052]	valid_0's multi_logloss: 0.323021
[1053]	valid_0's multi_logloss: 0.322789
[1054]	valid_0's multi_logloss: 0.322562
[1055]	valid_0's multi_logloss: 0.322331
[1056]	valid_0's 

[1234]	valid_0's multi_logloss: 0.28626
[1235]	valid_0's multi_logloss: 0.28608
[1236]	valid_0's multi_logloss: 0.285904
[1237]	valid_0's multi_logloss: 0.285723
[1238]	valid_0's multi_logloss: 0.285541
[1239]	valid_0's multi_logloss: 0.285362
[1240]	valid_0's multi_logloss: 0.285188
[1241]	valid_0's multi_logloss: 0.285009
[1242]	valid_0's multi_logloss: 0.284835
[1243]	valid_0's multi_logloss: 0.284659
[1244]	valid_0's multi_logloss: 0.284488
[1245]	valid_0's multi_logloss: 0.284312
[1246]	valid_0's multi_logloss: 0.28414
[1247]	valid_0's multi_logloss: 0.283968
[1248]	valid_0's multi_logloss: 0.28379
[1249]	valid_0's multi_logloss: 0.28362
[1250]	valid_0's multi_logloss: 0.283449
[1251]	valid_0's multi_logloss: 0.283276
[1252]	valid_0's multi_logloss: 0.283107
[1253]	valid_0's multi_logloss: 0.282934
[1254]	valid_0's multi_logloss: 0.282766
[1255]	valid_0's multi_logloss: 0.282601
[1256]	valid_0's multi_logloss: 0.282431
[1257]	valid_0's multi_logloss: 0.28226
[1258]	valid_0's multi

[1436]	valid_0's multi_logloss: 0.25521
[1437]	valid_0's multi_logloss: 0.255076
[1438]	valid_0's multi_logloss: 0.254941
[1439]	valid_0's multi_logloss: 0.254809
[1440]	valid_0's multi_logloss: 0.254673
[1441]	valid_0's multi_logloss: 0.254536
[1442]	valid_0's multi_logloss: 0.254406
[1443]	valid_0's multi_logloss: 0.25427
[1444]	valid_0's multi_logloss: 0.254135
[1445]	valid_0's multi_logloss: 0.254003
[1446]	valid_0's multi_logloss: 0.253873
[1447]	valid_0's multi_logloss: 0.253741
[1448]	valid_0's multi_logloss: 0.253609
[1449]	valid_0's multi_logloss: 0.253477
[1450]	valid_0's multi_logloss: 0.253344
[1451]	valid_0's multi_logloss: 0.253214
[1452]	valid_0's multi_logloss: 0.253081
[1453]	valid_0's multi_logloss: 0.252956
[1454]	valid_0's multi_logloss: 0.252825
[1455]	valid_0's multi_logloss: 0.252696
[1456]	valid_0's multi_logloss: 0.252566
[1457]	valid_0's multi_logloss: 0.252436
[1458]	valid_0's multi_logloss: 0.252313
[1459]	valid_0's multi_logloss: 0.252181
[1460]	valid_0's m

[1638]	valid_0's multi_logloss: 0.231683
[1639]	valid_0's multi_logloss: 0.231584
[1640]	valid_0's multi_logloss: 0.231484
[1641]	valid_0's multi_logloss: 0.231382
[1642]	valid_0's multi_logloss: 0.231278
[1643]	valid_0's multi_logloss: 0.231172
[1644]	valid_0's multi_logloss: 0.231069
[1645]	valid_0's multi_logloss: 0.230967
[1646]	valid_0's multi_logloss: 0.230868
[1647]	valid_0's multi_logloss: 0.23077
[1648]	valid_0's multi_logloss: 0.230675
[1649]	valid_0's multi_logloss: 0.230577
[1650]	valid_0's multi_logloss: 0.230478
[1651]	valid_0's multi_logloss: 0.230379
[1652]	valid_0's multi_logloss: 0.230283
[1653]	valid_0's multi_logloss: 0.230185
[1654]	valid_0's multi_logloss: 0.230083
[1655]	valid_0's multi_logloss: 0.229981
[1656]	valid_0's multi_logloss: 0.229878
[1657]	valid_0's multi_logloss: 0.229783
[1658]	valid_0's multi_logloss: 0.229683
[1659]	valid_0's multi_logloss: 0.229582
[1660]	valid_0's multi_logloss: 0.22948
[1661]	valid_0's multi_logloss: 0.229384
[1662]	valid_0's m

[1839]	valid_0's multi_logloss: 0.213803
[1840]	valid_0's multi_logloss: 0.213727
[1841]	valid_0's multi_logloss: 0.213651
[1842]	valid_0's multi_logloss: 0.213572
[1843]	valid_0's multi_logloss: 0.213495
[1844]	valid_0's multi_logloss: 0.213417
[1845]	valid_0's multi_logloss: 0.21334
[1846]	valid_0's multi_logloss: 0.213261
[1847]	valid_0's multi_logloss: 0.213186
[1848]	valid_0's multi_logloss: 0.213109
[1849]	valid_0's multi_logloss: 0.213035
[1850]	valid_0's multi_logloss: 0.212959
[1851]	valid_0's multi_logloss: 0.212885
[1852]	valid_0's multi_logloss: 0.21281
[1853]	valid_0's multi_logloss: 0.212731
[1854]	valid_0's multi_logloss: 0.212656
[1855]	valid_0's multi_logloss: 0.212578
[1856]	valid_0's multi_logloss: 0.212501
[1857]	valid_0's multi_logloss: 0.21243
[1858]	valid_0's multi_logloss: 0.212357
[1859]	valid_0's multi_logloss: 0.212282
[1860]	valid_0's multi_logloss: 0.212203
[1861]	valid_0's multi_logloss: 0.212129
[1862]	valid_0's multi_logloss: 0.212053
[1863]	valid_0's mu

[2041]	valid_0's multi_logloss: 0.199928
[2042]	valid_0's multi_logloss: 0.199864
[2043]	valid_0's multi_logloss: 0.199806
[2044]	valid_0's multi_logloss: 0.199742
[2045]	valid_0's multi_logloss: 0.199686
[2046]	valid_0's multi_logloss: 0.19963
[2047]	valid_0's multi_logloss: 0.199576
[2048]	valid_0's multi_logloss: 0.199514
[2049]	valid_0's multi_logloss: 0.199453
[2050]	valid_0's multi_logloss: 0.19939
[2051]	valid_0's multi_logloss: 0.199335
[2052]	valid_0's multi_logloss: 0.19928
[2053]	valid_0's multi_logloss: 0.199221
[2054]	valid_0's multi_logloss: 0.19916
[2055]	valid_0's multi_logloss: 0.199106
[2056]	valid_0's multi_logloss: 0.199049
[2057]	valid_0's multi_logloss: 0.198993
[2058]	valid_0's multi_logloss: 0.19893
[2059]	valid_0's multi_logloss: 0.198873
[2060]	valid_0's multi_logloss: 0.19881
[2061]	valid_0's multi_logloss: 0.198755
[2062]	valid_0's multi_logloss: 0.198699
[2063]	valid_0's multi_logloss: 0.19864
[2064]	valid_0's multi_logloss: 0.198582
[2065]	valid_0's multi_

[2243]	valid_0's multi_logloss: 0.189147
[2244]	valid_0's multi_logloss: 0.189097
[2245]	valid_0's multi_logloss: 0.189049
[2246]	valid_0's multi_logloss: 0.189006
[2247]	valid_0's multi_logloss: 0.188959
[2248]	valid_0's multi_logloss: 0.188912
[2249]	valid_0's multi_logloss: 0.188863
[2250]	valid_0's multi_logloss: 0.188822
[2251]	valid_0's multi_logloss: 0.188773
[2252]	valid_0's multi_logloss: 0.188726
[2253]	valid_0's multi_logloss: 0.188675
[2254]	valid_0's multi_logloss: 0.188627
[2255]	valid_0's multi_logloss: 0.18858
[2256]	valid_0's multi_logloss: 0.188535
[2257]	valid_0's multi_logloss: 0.188492
[2258]	valid_0's multi_logloss: 0.188443
[2259]	valid_0's multi_logloss: 0.188395
[2260]	valid_0's multi_logloss: 0.188348
[2261]	valid_0's multi_logloss: 0.188306
[2262]	valid_0's multi_logloss: 0.18826
[2263]	valid_0's multi_logloss: 0.188216
[2264]	valid_0's multi_logloss: 0.188172
[2265]	valid_0's multi_logloss: 0.188128
[2266]	valid_0's multi_logloss: 0.188078
[2267]	valid_0's m

[2444]	valid_0's multi_logloss: 0.180754
[2445]	valid_0's multi_logloss: 0.180715
[2446]	valid_0's multi_logloss: 0.18068
[2447]	valid_0's multi_logloss: 0.180648
[2448]	valid_0's multi_logloss: 0.18061
[2449]	valid_0's multi_logloss: 0.180574
[2450]	valid_0's multi_logloss: 0.180539
[2451]	valid_0's multi_logloss: 0.1805
[2452]	valid_0's multi_logloss: 0.180465
[2453]	valid_0's multi_logloss: 0.180429
[2454]	valid_0's multi_logloss: 0.180392
[2455]	valid_0's multi_logloss: 0.180355
[2456]	valid_0's multi_logloss: 0.18032
[2457]	valid_0's multi_logloss: 0.180283
[2458]	valid_0's multi_logloss: 0.180246
[2459]	valid_0's multi_logloss: 0.180207
[2460]	valid_0's multi_logloss: 0.180171
[2461]	valid_0's multi_logloss: 0.180137
[2462]	valid_0's multi_logloss: 0.180106
[2463]	valid_0's multi_logloss: 0.180072
[2464]	valid_0's multi_logloss: 0.180035
[2465]	valid_0's multi_logloss: 0.179994
[2466]	valid_0's multi_logloss: 0.179954
[2467]	valid_0's multi_logloss: 0.179917
[2468]	valid_0's mult

[2645]	valid_0's multi_logloss: 0.174137
[2646]	valid_0's multi_logloss: 0.174108
[2647]	valid_0's multi_logloss: 0.174079
[2648]	valid_0's multi_logloss: 0.174049
[2649]	valid_0's multi_logloss: 0.174021
[2650]	valid_0's multi_logloss: 0.173995
[2651]	valid_0's multi_logloss: 0.173968
[2652]	valid_0's multi_logloss: 0.173939
[2653]	valid_0's multi_logloss: 0.173909
[2654]	valid_0's multi_logloss: 0.173881
[2655]	valid_0's multi_logloss: 0.173856
[2656]	valid_0's multi_logloss: 0.173827
[2657]	valid_0's multi_logloss: 0.173797
[2658]	valid_0's multi_logloss: 0.17377
[2659]	valid_0's multi_logloss: 0.173743
[2660]	valid_0's multi_logloss: 0.173713
[2661]	valid_0's multi_logloss: 0.173682
[2662]	valid_0's multi_logloss: 0.173654
[2663]	valid_0's multi_logloss: 0.173626
[2664]	valid_0's multi_logloss: 0.173596
[2665]	valid_0's multi_logloss: 0.173569
[2666]	valid_0's multi_logloss: 0.173541
[2667]	valid_0's multi_logloss: 0.173517
[2668]	valid_0's multi_logloss: 0.17349
[2669]	valid_0's m

[2846]	valid_0's multi_logloss: 0.168963
[2847]	valid_0's multi_logloss: 0.168936
[2848]	valid_0's multi_logloss: 0.168915
[2849]	valid_0's multi_logloss: 0.168892
[2850]	valid_0's multi_logloss: 0.16887
[2851]	valid_0's multi_logloss: 0.168848
[2852]	valid_0's multi_logloss: 0.168827
[2853]	valid_0's multi_logloss: 0.168806
[2854]	valid_0's multi_logloss: 0.168784
[2855]	valid_0's multi_logloss: 0.168761
[2856]	valid_0's multi_logloss: 0.168739
[2857]	valid_0's multi_logloss: 0.168719
[2858]	valid_0's multi_logloss: 0.168698
[2859]	valid_0's multi_logloss: 0.168674
[2860]	valid_0's multi_logloss: 0.168651
[2861]	valid_0's multi_logloss: 0.168627
[2862]	valid_0's multi_logloss: 0.168601
[2863]	valid_0's multi_logloss: 0.168575
[2864]	valid_0's multi_logloss: 0.168552
[2865]	valid_0's multi_logloss: 0.16853
[2866]	valid_0's multi_logloss: 0.168509
[2867]	valid_0's multi_logloss: 0.168485
[2868]	valid_0's multi_logloss: 0.168459
[2869]	valid_0's multi_logloss: 0.168438
[2870]	valid_0's m

[3047]	valid_0's multi_logloss: 0.164796
[3048]	valid_0's multi_logloss: 0.164779
[3049]	valid_0's multi_logloss: 0.164757
[3050]	valid_0's multi_logloss: 0.164735
[3051]	valid_0's multi_logloss: 0.164719
[3052]	valid_0's multi_logloss: 0.164699
[3053]	valid_0's multi_logloss: 0.164679
[3054]	valid_0's multi_logloss: 0.16466
[3055]	valid_0's multi_logloss: 0.164648
[3056]	valid_0's multi_logloss: 0.164627
[3057]	valid_0's multi_logloss: 0.164608
[3058]	valid_0's multi_logloss: 0.164591
[3059]	valid_0's multi_logloss: 0.164576
[3060]	valid_0's multi_logloss: 0.164554
[3061]	valid_0's multi_logloss: 0.164535
[3062]	valid_0's multi_logloss: 0.164517
[3063]	valid_0's multi_logloss: 0.164501
[3064]	valid_0's multi_logloss: 0.16448
[3065]	valid_0's multi_logloss: 0.164462
[3066]	valid_0's multi_logloss: 0.164444
[3067]	valid_0's multi_logloss: 0.164427
[3068]	valid_0's multi_logloss: 0.164407
[3069]	valid_0's multi_logloss: 0.164387
[3070]	valid_0's multi_logloss: 0.164369
[3071]	valid_0's m

[3248]	valid_0's multi_logloss: 0.161476
[3249]	valid_0's multi_logloss: 0.161458
[3250]	valid_0's multi_logloss: 0.161443
[3251]	valid_0's multi_logloss: 0.161429
[3252]	valid_0's multi_logloss: 0.161415
[3253]	valid_0's multi_logloss: 0.161404
[3254]	valid_0's multi_logloss: 0.161388
[3255]	valid_0's multi_logloss: 0.161375
[3256]	valid_0's multi_logloss: 0.161359
[3257]	valid_0's multi_logloss: 0.161347
[3258]	valid_0's multi_logloss: 0.161333
[3259]	valid_0's multi_logloss: 0.161321
[3260]	valid_0's multi_logloss: 0.161306
[3261]	valid_0's multi_logloss: 0.16129
[3262]	valid_0's multi_logloss: 0.16128
[3263]	valid_0's multi_logloss: 0.161264
[3264]	valid_0's multi_logloss: 0.161249
[3265]	valid_0's multi_logloss: 0.161236
[3266]	valid_0's multi_logloss: 0.161223
[3267]	valid_0's multi_logloss: 0.161209
[3268]	valid_0's multi_logloss: 0.161193
[3269]	valid_0's multi_logloss: 0.161177
[3270]	valid_0's multi_logloss: 0.16116
[3271]	valid_0's multi_logloss: 0.161147
[3272]	valid_0's mu

[3450]	valid_0's multi_logloss: 0.158754
[3451]	valid_0's multi_logloss: 0.15874
[3452]	valid_0's multi_logloss: 0.158723
[3453]	valid_0's multi_logloss: 0.158716
[3454]	valid_0's multi_logloss: 0.158705
[3455]	valid_0's multi_logloss: 0.158692
[3456]	valid_0's multi_logloss: 0.158682
[3457]	valid_0's multi_logloss: 0.158668
[3458]	valid_0's multi_logloss: 0.158655
[3459]	valid_0's multi_logloss: 0.158642
[3460]	valid_0's multi_logloss: 0.158632
[3461]	valid_0's multi_logloss: 0.158621
[3462]	valid_0's multi_logloss: 0.158599
[3463]	valid_0's multi_logloss: 0.158589
[3464]	valid_0's multi_logloss: 0.15858
[3465]	valid_0's multi_logloss: 0.158563
[3466]	valid_0's multi_logloss: 0.158549
[3467]	valid_0's multi_logloss: 0.158538
[3468]	valid_0's multi_logloss: 0.158525
[3469]	valid_0's multi_logloss: 0.158512
[3470]	valid_0's multi_logloss: 0.158503
[3471]	valid_0's multi_logloss: 0.158495
[3472]	valid_0's multi_logloss: 0.158487
[3473]	valid_0's multi_logloss: 0.158475
[3474]	valid_0's m

[3651]	valid_0's multi_logloss: 0.156536
[3652]	valid_0's multi_logloss: 0.156524
[3653]	valid_0's multi_logloss: 0.156511
[3654]	valid_0's multi_logloss: 0.156501
[3655]	valid_0's multi_logloss: 0.156495
[3656]	valid_0's multi_logloss: 0.156488
[3657]	valid_0's multi_logloss: 0.156479
[3658]	valid_0's multi_logloss: 0.156473
[3659]	valid_0's multi_logloss: 0.156466
[3660]	valid_0's multi_logloss: 0.156456
[3661]	valid_0's multi_logloss: 0.156445
[3662]	valid_0's multi_logloss: 0.156437
[3663]	valid_0's multi_logloss: 0.156423
[3664]	valid_0's multi_logloss: 0.156411
[3665]	valid_0's multi_logloss: 0.156399
[3666]	valid_0's multi_logloss: 0.156386
[3667]	valid_0's multi_logloss: 0.156371
[3668]	valid_0's multi_logloss: 0.156361
[3669]	valid_0's multi_logloss: 0.15635
[3670]	valid_0's multi_logloss: 0.156341
[3671]	valid_0's multi_logloss: 0.156333
[3672]	valid_0's multi_logloss: 0.156321
[3673]	valid_0's multi_logloss: 0.156315
[3674]	valid_0's multi_logloss: 0.156304
[3675]	valid_0's 

[3853]	valid_0's multi_logloss: 0.154651
[3854]	valid_0's multi_logloss: 0.15464
[3855]	valid_0's multi_logloss: 0.154634
[3856]	valid_0's multi_logloss: 0.154623
[3857]	valid_0's multi_logloss: 0.154615
[3858]	valid_0's multi_logloss: 0.154604
[3859]	valid_0's multi_logloss: 0.154601
[3860]	valid_0's multi_logloss: 0.154591
[3861]	valid_0's multi_logloss: 0.154579
[3862]	valid_0's multi_logloss: 0.154574
[3863]	valid_0's multi_logloss: 0.154567
[3864]	valid_0's multi_logloss: 0.15456
[3865]	valid_0's multi_logloss: 0.154551
[3866]	valid_0's multi_logloss: 0.154543
[3867]	valid_0's multi_logloss: 0.154536
[3868]	valid_0's multi_logloss: 0.15453
[3869]	valid_0's multi_logloss: 0.154523
[3870]	valid_0's multi_logloss: 0.154514
[3871]	valid_0's multi_logloss: 0.154506
[3872]	valid_0's multi_logloss: 0.154495
[3873]	valid_0's multi_logloss: 0.15449
[3874]	valid_0's multi_logloss: 0.154484
[3875]	valid_0's multi_logloss: 0.154476
[3876]	valid_0's multi_logloss: 0.154467
[3877]	valid_0's mul

[4054]	valid_0's multi_logloss: 0.153167
[4055]	valid_0's multi_logloss: 0.153161
[4056]	valid_0's multi_logloss: 0.153152
[4057]	valid_0's multi_logloss: 0.153144
[4058]	valid_0's multi_logloss: 0.153137
[4059]	valid_0's multi_logloss: 0.153132
[4060]	valid_0's multi_logloss: 0.153126
[4061]	valid_0's multi_logloss: 0.153119
[4062]	valid_0's multi_logloss: 0.153115
[4063]	valid_0's multi_logloss: 0.153109
[4064]	valid_0's multi_logloss: 0.153103
[4065]	valid_0's multi_logloss: 0.153094
[4066]	valid_0's multi_logloss: 0.153085
[4067]	valid_0's multi_logloss: 0.153083
[4068]	valid_0's multi_logloss: 0.153077
[4069]	valid_0's multi_logloss: 0.153072
[4070]	valid_0's multi_logloss: 0.153065
[4071]	valid_0's multi_logloss: 0.153062
[4072]	valid_0's multi_logloss: 0.153059
[4073]	valid_0's multi_logloss: 0.15305
[4074]	valid_0's multi_logloss: 0.15304
[4075]	valid_0's multi_logloss: 0.153035
[4076]	valid_0's multi_logloss: 0.153028
[4077]	valid_0's multi_logloss: 0.153024
[4078]	valid_0's m

[4255]	valid_0's multi_logloss: 0.151909
[4256]	valid_0's multi_logloss: 0.151904
[4257]	valid_0's multi_logloss: 0.151895
[4258]	valid_0's multi_logloss: 0.151889
[4259]	valid_0's multi_logloss: 0.151885
[4260]	valid_0's multi_logloss: 0.15188
[4261]	valid_0's multi_logloss: 0.151875
[4262]	valid_0's multi_logloss: 0.151873
[4263]	valid_0's multi_logloss: 0.151866
[4264]	valid_0's multi_logloss: 0.15186
[4265]	valid_0's multi_logloss: 0.151854
[4266]	valid_0's multi_logloss: 0.151849
[4267]	valid_0's multi_logloss: 0.151843
[4268]	valid_0's multi_logloss: 0.15184
[4269]	valid_0's multi_logloss: 0.151835
[4270]	valid_0's multi_logloss: 0.151832
[4271]	valid_0's multi_logloss: 0.151826
[4272]	valid_0's multi_logloss: 0.151821
[4273]	valid_0's multi_logloss: 0.151814
[4274]	valid_0's multi_logloss: 0.151808
[4275]	valid_0's multi_logloss: 0.1518
[4276]	valid_0's multi_logloss: 0.151793
[4277]	valid_0's multi_logloss: 0.151791
[4278]	valid_0's multi_logloss: 0.151785
[4279]	valid_0's mult

[4457]	valid_0's multi_logloss: 0.15085
[4458]	valid_0's multi_logloss: 0.150843
[4459]	valid_0's multi_logloss: 0.150834
[4460]	valid_0's multi_logloss: 0.150827
[4461]	valid_0's multi_logloss: 0.150821
[4462]	valid_0's multi_logloss: 0.150817
[4463]	valid_0's multi_logloss: 0.150816
[4464]	valid_0's multi_logloss: 0.150812
[4465]	valid_0's multi_logloss: 0.150807
[4466]	valid_0's multi_logloss: 0.150801
[4467]	valid_0's multi_logloss: 0.150795
[4468]	valid_0's multi_logloss: 0.150792
[4469]	valid_0's multi_logloss: 0.150785
[4470]	valid_0's multi_logloss: 0.150777
[4471]	valid_0's multi_logloss: 0.150772
[4472]	valid_0's multi_logloss: 0.150768
[4473]	valid_0's multi_logloss: 0.150762
[4474]	valid_0's multi_logloss: 0.150762
[4475]	valid_0's multi_logloss: 0.150757
[4476]	valid_0's multi_logloss: 0.15075
[4477]	valid_0's multi_logloss: 0.150747
[4478]	valid_0's multi_logloss: 0.15074
[4479]	valid_0's multi_logloss: 0.150733
[4480]	valid_0's multi_logloss: 0.150734
[4481]	valid_0's mu

[4658]	valid_0's multi_logloss: 0.14999
[4659]	valid_0's multi_logloss: 0.149987
[4660]	valid_0's multi_logloss: 0.149984
[4661]	valid_0's multi_logloss: 0.149981
[4662]	valid_0's multi_logloss: 0.149976
[4663]	valid_0's multi_logloss: 0.149972
[4664]	valid_0's multi_logloss: 0.149971
[4665]	valid_0's multi_logloss: 0.149966
[4666]	valid_0's multi_logloss: 0.14996
[4667]	valid_0's multi_logloss: 0.149958
[4668]	valid_0's multi_logloss: 0.149956
[4669]	valid_0's multi_logloss: 0.14995
[4670]	valid_0's multi_logloss: 0.149947
[4671]	valid_0's multi_logloss: 0.149941
[4672]	valid_0's multi_logloss: 0.149937
[4673]	valid_0's multi_logloss: 0.149927
[4674]	valid_0's multi_logloss: 0.149922
[4675]	valid_0's multi_logloss: 0.149922
[4676]	valid_0's multi_logloss: 0.149919
[4677]	valid_0's multi_logloss: 0.149917
[4678]	valid_0's multi_logloss: 0.149913
[4679]	valid_0's multi_logloss: 0.149914
[4680]	valid_0's multi_logloss: 0.149911
[4681]	valid_0's multi_logloss: 0.149908
[4682]	valid_0's mu

[4860]	valid_0's multi_logloss: 0.149271
[4861]	valid_0's multi_logloss: 0.14927
[4862]	valid_0's multi_logloss: 0.149266
[4863]	valid_0's multi_logloss: 0.149262
[4864]	valid_0's multi_logloss: 0.14926
[4865]	valid_0's multi_logloss: 0.149257
[4866]	valid_0's multi_logloss: 0.149257
[4867]	valid_0's multi_logloss: 0.149252
[4868]	valid_0's multi_logloss: 0.149245
[4869]	valid_0's multi_logloss: 0.149243
[4870]	valid_0's multi_logloss: 0.149239
[4871]	valid_0's multi_logloss: 0.149238
[4872]	valid_0's multi_logloss: 0.149235
[4873]	valid_0's multi_logloss: 0.149231
[4874]	valid_0's multi_logloss: 0.149228
[4875]	valid_0's multi_logloss: 0.149223
[4876]	valid_0's multi_logloss: 0.149218
[4877]	valid_0's multi_logloss: 0.149213
[4878]	valid_0's multi_logloss: 0.149209
[4879]	valid_0's multi_logloss: 0.149205
[4880]	valid_0's multi_logloss: 0.149203
[4881]	valid_0's multi_logloss: 0.149199
[4882]	valid_0's multi_logloss: 0.149196
[4883]	valid_0's multi_logloss: 0.149193
[4884]	valid_0's m

[5061]	valid_0's multi_logloss: 0.148622
[5062]	valid_0's multi_logloss: 0.148618
[5063]	valid_0's multi_logloss: 0.148613
[5064]	valid_0's multi_logloss: 0.148611
[5065]	valid_0's multi_logloss: 0.148608
[5066]	valid_0's multi_logloss: 0.148605
[5067]	valid_0's multi_logloss: 0.148603
[5068]	valid_0's multi_logloss: 0.1486
[5069]	valid_0's multi_logloss: 0.148599
[5070]	valid_0's multi_logloss: 0.148594
[5071]	valid_0's multi_logloss: 0.148591
[5072]	valid_0's multi_logloss: 0.14859
[5073]	valid_0's multi_logloss: 0.148591
[5074]	valid_0's multi_logloss: 0.148586
[5075]	valid_0's multi_logloss: 0.148583
[5076]	valid_0's multi_logloss: 0.148581
[5077]	valid_0's multi_logloss: 0.148577
[5078]	valid_0's multi_logloss: 0.148573
[5079]	valid_0's multi_logloss: 0.14857
[5080]	valid_0's multi_logloss: 0.148567
[5081]	valid_0's multi_logloss: 0.14856
[5082]	valid_0's multi_logloss: 0.148554
[5083]	valid_0's multi_logloss: 0.148547
[5084]	valid_0's multi_logloss: 0.148545
[5085]	valid_0's mult

[5263]	valid_0's multi_logloss: 0.148075
[5264]	valid_0's multi_logloss: 0.148071
[5265]	valid_0's multi_logloss: 0.148067
[5266]	valid_0's multi_logloss: 0.148063
[5267]	valid_0's multi_logloss: 0.148058
[5268]	valid_0's multi_logloss: 0.148054
[5269]	valid_0's multi_logloss: 0.14805
[5270]	valid_0's multi_logloss: 0.14805
[5271]	valid_0's multi_logloss: 0.148044
[5272]	valid_0's multi_logloss: 0.148038
[5273]	valid_0's multi_logloss: 0.148034
[5274]	valid_0's multi_logloss: 0.148031
[5275]	valid_0's multi_logloss: 0.148029
[5276]	valid_0's multi_logloss: 0.148024
[5277]	valid_0's multi_logloss: 0.148018
[5278]	valid_0's multi_logloss: 0.148009
[5279]	valid_0's multi_logloss: 0.148009
[5280]	valid_0's multi_logloss: 0.148004
[5281]	valid_0's multi_logloss: 0.148
[5282]	valid_0's multi_logloss: 0.148
[5283]	valid_0's multi_logloss: 0.147996
[5284]	valid_0's multi_logloss: 0.147992
[5285]	valid_0's multi_logloss: 0.147991
[5286]	valid_0's multi_logloss: 0.147987
[5287]	valid_0's multi_l

[5464]	valid_0's multi_logloss: 0.147598
[5465]	valid_0's multi_logloss: 0.147596
[5466]	valid_0's multi_logloss: 0.147594
[5467]	valid_0's multi_logloss: 0.147592
[5468]	valid_0's multi_logloss: 0.147593
[5469]	valid_0's multi_logloss: 0.147591
[5470]	valid_0's multi_logloss: 0.14759
[5471]	valid_0's multi_logloss: 0.147589
[5472]	valid_0's multi_logloss: 0.147588
[5473]	valid_0's multi_logloss: 0.147586
[5474]	valid_0's multi_logloss: 0.147584
[5475]	valid_0's multi_logloss: 0.147582
[5476]	valid_0's multi_logloss: 0.147581
[5477]	valid_0's multi_logloss: 0.147583
[5478]	valid_0's multi_logloss: 0.147581
[5479]	valid_0's multi_logloss: 0.147583
[5480]	valid_0's multi_logloss: 0.147578
[5481]	valid_0's multi_logloss: 0.147577
[5482]	valid_0's multi_logloss: 0.147577
[5483]	valid_0's multi_logloss: 0.147577
[5484]	valid_0's multi_logloss: 0.147575
[5485]	valid_0's multi_logloss: 0.147573
[5486]	valid_0's multi_logloss: 0.147571
[5487]	valid_0's multi_logloss: 0.147564
[5488]	valid_0's 

[5665]	valid_0's multi_logloss: 0.147243
[5666]	valid_0's multi_logloss: 0.14724
[5667]	valid_0's multi_logloss: 0.147238
[5668]	valid_0's multi_logloss: 0.147234
[5669]	valid_0's multi_logloss: 0.147236
[5670]	valid_0's multi_logloss: 0.147236
[5671]	valid_0's multi_logloss: 0.147234
[5672]	valid_0's multi_logloss: 0.147235
[5673]	valid_0's multi_logloss: 0.147233
[5674]	valid_0's multi_logloss: 0.147232
[5675]	valid_0's multi_logloss: 0.147233
[5676]	valid_0's multi_logloss: 0.147228
[5677]	valid_0's multi_logloss: 0.147226
[5678]	valid_0's multi_logloss: 0.147221
[5679]	valid_0's multi_logloss: 0.147218
[5680]	valid_0's multi_logloss: 0.147217
[5681]	valid_0's multi_logloss: 0.147215
[5682]	valid_0's multi_logloss: 0.147216
[5683]	valid_0's multi_logloss: 0.147216
[5684]	valid_0's multi_logloss: 0.147213
[5685]	valid_0's multi_logloss: 0.147208
[5686]	valid_0's multi_logloss: 0.147209
[5687]	valid_0's multi_logloss: 0.147203
[5688]	valid_0's multi_logloss: 0.147202
[5689]	valid_0's 

[5867]	valid_0's multi_logloss: 0.146898
[5868]	valid_0's multi_logloss: 0.146895
[5869]	valid_0's multi_logloss: 0.146894
[5870]	valid_0's multi_logloss: 0.146894
[5871]	valid_0's multi_logloss: 0.146893
[5872]	valid_0's multi_logloss: 0.146888
[5873]	valid_0's multi_logloss: 0.146885
[5874]	valid_0's multi_logloss: 0.146888
[5875]	valid_0's multi_logloss: 0.146886
[5876]	valid_0's multi_logloss: 0.146884
[5877]	valid_0's multi_logloss: 0.146885
[5878]	valid_0's multi_logloss: 0.146884
[5879]	valid_0's multi_logloss: 0.146883
[5880]	valid_0's multi_logloss: 0.146879
[5881]	valid_0's multi_logloss: 0.146876
[5882]	valid_0's multi_logloss: 0.146873
[5883]	valid_0's multi_logloss: 0.146871
[5884]	valid_0's multi_logloss: 0.146869
[5885]	valid_0's multi_logloss: 0.146869
[5886]	valid_0's multi_logloss: 0.146866
[5887]	valid_0's multi_logloss: 0.146866
[5888]	valid_0's multi_logloss: 0.146863
[5889]	valid_0's multi_logloss: 0.146859
[5890]	valid_0's multi_logloss: 0.146855
[5891]	valid_0's

[6069]	valid_0's multi_logloss: 0.146554
[6070]	valid_0's multi_logloss: 0.146552
[6071]	valid_0's multi_logloss: 0.146552
[6072]	valid_0's multi_logloss: 0.146552
[6073]	valid_0's multi_logloss: 0.14655
[6074]	valid_0's multi_logloss: 0.146547
[6075]	valid_0's multi_logloss: 0.146545
[6076]	valid_0's multi_logloss: 0.146541
[6077]	valid_0's multi_logloss: 0.146539
[6078]	valid_0's multi_logloss: 0.14654
[6079]	valid_0's multi_logloss: 0.14654
[6080]	valid_0's multi_logloss: 0.14654
[6081]	valid_0's multi_logloss: 0.146537
[6082]	valid_0's multi_logloss: 0.146534
[6083]	valid_0's multi_logloss: 0.146532
[6084]	valid_0's multi_logloss: 0.146533
[6085]	valid_0's multi_logloss: 0.146533
[6086]	valid_0's multi_logloss: 0.146529
[6087]	valid_0's multi_logloss: 0.146526
[6088]	valid_0's multi_logloss: 0.146525
[6089]	valid_0's multi_logloss: 0.146526
[6090]	valid_0's multi_logloss: 0.146526
[6091]	valid_0's multi_logloss: 0.14653
[6092]	valid_0's multi_logloss: 0.146532
[6093]	valid_0's mult

[6270]	valid_0's multi_logloss: 0.146307
[6271]	valid_0's multi_logloss: 0.146304
[6272]	valid_0's multi_logloss: 0.146305
[6273]	valid_0's multi_logloss: 0.146304
[6274]	valid_0's multi_logloss: 0.146304
[6275]	valid_0's multi_logloss: 0.146303
[6276]	valid_0's multi_logloss: 0.146303
[6277]	valid_0's multi_logloss: 0.146305
[6278]	valid_0's multi_logloss: 0.146305
[6279]	valid_0's multi_logloss: 0.146306
[6280]	valid_0's multi_logloss: 0.146307
[6281]	valid_0's multi_logloss: 0.146301
[6282]	valid_0's multi_logloss: 0.146298
[6283]	valid_0's multi_logloss: 0.146299
[6284]	valid_0's multi_logloss: 0.146297
[6285]	valid_0's multi_logloss: 0.146297
[6286]	valid_0's multi_logloss: 0.146295
[6287]	valid_0's multi_logloss: 0.146293
[6288]	valid_0's multi_logloss: 0.146291
[6289]	valid_0's multi_logloss: 0.146293
[6290]	valid_0's multi_logloss: 0.146291
[6291]	valid_0's multi_logloss: 0.14629
[6292]	valid_0's multi_logloss: 0.146286
[6293]	valid_0's multi_logloss: 0.146285
[6294]	valid_0's 

[6472]	valid_0's multi_logloss: 0.146144
[6473]	valid_0's multi_logloss: 0.146142
[6474]	valid_0's multi_logloss: 0.146143
[6475]	valid_0's multi_logloss: 0.14614
[6476]	valid_0's multi_logloss: 0.146137
[6477]	valid_0's multi_logloss: 0.146136
[6478]	valid_0's multi_logloss: 0.146134
[6479]	valid_0's multi_logloss: 0.146135
[6480]	valid_0's multi_logloss: 0.146136
[6481]	valid_0's multi_logloss: 0.146137
[6482]	valid_0's multi_logloss: 0.146136
[6483]	valid_0's multi_logloss: 0.146138
[6484]	valid_0's multi_logloss: 0.146133
[6485]	valid_0's multi_logloss: 0.146132
[6486]	valid_0's multi_logloss: 0.146134
[6487]	valid_0's multi_logloss: 0.146136
[6488]	valid_0's multi_logloss: 0.146137
[6489]	valid_0's multi_logloss: 0.146134
[6490]	valid_0's multi_logloss: 0.146132
[6491]	valid_0's multi_logloss: 0.146132
[6492]	valid_0's multi_logloss: 0.146132
[6493]	valid_0's multi_logloss: 0.146132
[6494]	valid_0's multi_logloss: 0.146133
[6495]	valid_0's multi_logloss: 0.146129
[6496]	valid_0's 

[6673]	valid_0's multi_logloss: 0.145943
[6674]	valid_0's multi_logloss: 0.145947
[6675]	valid_0's multi_logloss: 0.145951
[6676]	valid_0's multi_logloss: 0.145949
[6677]	valid_0's multi_logloss: 0.145948
[6678]	valid_0's multi_logloss: 0.145942
[6679]	valid_0's multi_logloss: 0.14594
[6680]	valid_0's multi_logloss: 0.14594
[6681]	valid_0's multi_logloss: 0.14594
[6682]	valid_0's multi_logloss: 0.145942
[6683]	valid_0's multi_logloss: 0.145943
[6684]	valid_0's multi_logloss: 0.145944
[6685]	valid_0's multi_logloss: 0.145946
[6686]	valid_0's multi_logloss: 0.145947
[6687]	valid_0's multi_logloss: 0.145945
[6688]	valid_0's multi_logloss: 0.145947
[6689]	valid_0's multi_logloss: 0.145947
[6690]	valid_0's multi_logloss: 0.145946
[6691]	valid_0's multi_logloss: 0.145948
[6692]	valid_0's multi_logloss: 0.145949
[6693]	valid_0's multi_logloss: 0.145947
[6694]	valid_0's multi_logloss: 0.145946
[6695]	valid_0's multi_logloss: 0.145949
[6696]	valid_0's multi_logloss: 0.145946
[6697]	valid_0's mu

[6875]	valid_0's multi_logloss: 0.145841
[6876]	valid_0's multi_logloss: 0.145841
[6877]	valid_0's multi_logloss: 0.145842
[6878]	valid_0's multi_logloss: 0.14584
[6879]	valid_0's multi_logloss: 0.14584
[6880]	valid_0's multi_logloss: 0.145837
[6881]	valid_0's multi_logloss: 0.145836
[6882]	valid_0's multi_logloss: 0.145833
[6883]	valid_0's multi_logloss: 0.145832
[6884]	valid_0's multi_logloss: 0.14583
[6885]	valid_0's multi_logloss: 0.145825
[6886]	valid_0's multi_logloss: 0.145823
[6887]	valid_0's multi_logloss: 0.145823
[6888]	valid_0's multi_logloss: 0.145822
[6889]	valid_0's multi_logloss: 0.145824
[6890]	valid_0's multi_logloss: 0.145821
[6891]	valid_0's multi_logloss: 0.145822
[6892]	valid_0's multi_logloss: 0.145824
[6893]	valid_0's multi_logloss: 0.145825
[6894]	valid_0's multi_logloss: 0.145824
[6895]	valid_0's multi_logloss: 0.145822
[6896]	valid_0's multi_logloss: 0.145822
[6897]	valid_0's multi_logloss: 0.145821
[6898]	valid_0's multi_logloss: 0.14582
[6899]	valid_0's mul

[7077]	valid_0's multi_logloss: 0.145664
[7078]	valid_0's multi_logloss: 0.145661
[7079]	valid_0's multi_logloss: 0.145657
[7080]	valid_0's multi_logloss: 0.145657
[7081]	valid_0's multi_logloss: 0.145657
[7082]	valid_0's multi_logloss: 0.145658
[7083]	valid_0's multi_logloss: 0.145659
[7084]	valid_0's multi_logloss: 0.145656
[7085]	valid_0's multi_logloss: 0.145657
[7086]	valid_0's multi_logloss: 0.145657
[7087]	valid_0's multi_logloss: 0.145659
[7088]	valid_0's multi_logloss: 0.145659
[7089]	valid_0's multi_logloss: 0.145659
[7090]	valid_0's multi_logloss: 0.145658
[7091]	valid_0's multi_logloss: 0.145656
[7092]	valid_0's multi_logloss: 0.145658
[7093]	valid_0's multi_logloss: 0.145658
[7094]	valid_0's multi_logloss: 0.145661
[7095]	valid_0's multi_logloss: 0.145661
[7096]	valid_0's multi_logloss: 0.145659
[7097]	valid_0's multi_logloss: 0.145662
[7098]	valid_0's multi_logloss: 0.14566
[7099]	valid_0's multi_logloss: 0.145663
[7100]	valid_0's multi_logloss: 0.145661
[7101]	valid_0's 

[7279]	valid_0's multi_logloss: 0.145549
[7280]	valid_0's multi_logloss: 0.145546
[7281]	valid_0's multi_logloss: 0.145546
[7282]	valid_0's multi_logloss: 0.145545
[7283]	valid_0's multi_logloss: 0.145545
[7284]	valid_0's multi_logloss: 0.145544
[7285]	valid_0's multi_logloss: 0.145543
[7286]	valid_0's multi_logloss: 0.145541
[7287]	valid_0's multi_logloss: 0.145541
[7288]	valid_0's multi_logloss: 0.145543
[7289]	valid_0's multi_logloss: 0.145542
[7290]	valid_0's multi_logloss: 0.145541
[7291]	valid_0's multi_logloss: 0.145542
[7292]	valid_0's multi_logloss: 0.145539
[7293]	valid_0's multi_logloss: 0.145536
[7294]	valid_0's multi_logloss: 0.14554
[7295]	valid_0's multi_logloss: 0.145538
[7296]	valid_0's multi_logloss: 0.145537
[7297]	valid_0's multi_logloss: 0.145538
[7298]	valid_0's multi_logloss: 0.145535
[7299]	valid_0's multi_logloss: 0.145537
[7300]	valid_0's multi_logloss: 0.145539
[7301]	valid_0's multi_logloss: 0.145541
[7302]	valid_0's multi_logloss: 0.14554
[7303]	valid_0's m

In [47]:
print(f'{accuracy_score(y_val, p_val) * 100:.4f}%')
p_tst = clf.predict(test_sel)

93.8519%


In [48]:
#submission
Submission_set = pd.read_csv(sample_file, index_col=0)

In [49]:
Submission_set['class'] = p_tst
print(Submission_set.shape)
Submission_set.head()

(80000, 1)


Unnamed: 0_level_0,class
id,Unnamed: 1_level_1
320000,2.0
320001,0.0
320002,2.0
320003,0.0
320004,2.0


In [50]:
#class 값 카운팅
Submission_set[target_col].value_counts()

2.0    40580
0.0    29998
1.0     9422
Name: class, dtype: int64

In [51]:
#저장하기
Submission_set.to_csv(submission_file)
