In [36]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from tqdm import tqdm
import gc

from scipy import signal
import lightgbm as lgb
from sklearn.metrics import accuracy_score

pd.set_option('display.max_rows', 600)
pd.set_option('display.max_columns', 200)

import warnings
warnings.filterwarnings('ignore')

from keras.layers import * # Keras is the most friendly Neural Network library, this Kernel use a lot of layers classes
from keras.models import Model
from keras import backend as K # The backend give us access to tensorflow operations and allow us to create the Attention class
from keras import optimizers # Allow us to access the Adam class to modify some parameters
from sklearn.model_selection import GridSearchCV, StratifiedKFold # Used to use Kfold to train our model
from keras.callbacks import * # This object helps the model to train in a smarter way, avoiding overfitting

from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [2]:
X_train = pd.read_csv('../input/X_train.csv')
y_train = pd.read_csv('../input/y_train.csv')
X_test  = pd.read_csv('../input/X_test.csv')

In [3]:
targets = y_train['surface'].value_counts().index
print(targets)
print(y_train['surface'].value_counts())

Index(['concrete', 'soft_pvc', 'wood', 'tiled', 'fine_concrete',
       'hard_tiles_large_space', 'soft_tiles', 'carpet', 'hard_tiles'],
      dtype='object')
concrete                  779
soft_pvc                  732
wood                      607
tiled                     514
fine_concrete             363
hard_tiles_large_space    308
soft_tiles                297
carpet                    189
hard_tiles                 21
Name: surface, dtype: int64


In [4]:
target_to_id = {}
id_to_target = {}

for target in targets:
    if target not in target_to_id:
        new_id = len(target_to_id)
        target_to_id[target] = new_id
        id_to_target[new_id] = target

In [5]:
y_id = []
for i in range(len(y_train)):
    y_id.append(target_to_id[y_train['surface'].values[i]])
y_id = np.asarray(y_id)

In [6]:
class_num = len(targets)

In [7]:
y_id_one = np.zeros((len(y_id), class_num))
for i in range(len(y_id)):
    y_id_one[i, y_id[i]] = 1 

In [8]:
def norm_quat(df):
    norm = (df['orientation_X']**2 + df['orientation_Y']**2 + df['orientation_Z']**2 + df['orientation_W']**2)**0.5
    df['orientation_X'] /= norm
    df['orientation_Y'] /= norm
    df['orientation_Z'] /= norm
    df['orientation_W'] /= norm
    return df

In [9]:
import math

def quaternion_to_euler(x, y, z, w):
    t0 = +2.0 * (w * x + y * z)
    t1 = +1.0 - 2.0 * (x * x + y * y)
    X = math.atan2(t0, t1)

    t2 = +2.0 * (w * y - z * x)
    t2 = +1.0 if t2 > +1.0 else t2
    t2 = -1.0 if t2 < -1.0 else t2
    Y = math.asin(t2)

    t3 = +2.0 * (w * z + x * y)
    t4 = +1.0 - 2.0 * (y * y + z * z)
    Z = math.atan2(t3, t4)

    return X, Y, Z

In [10]:
X_train = norm_quat(X_train)
X_test = norm_quat(X_test)

In [11]:
def fe_step1 (df):
    """Quaternions to Euler Angles"""
    
    x = df['orientation_X'].values
    y = df['orientation_Y'].values
    z = df['orientation_Z'].values
    w = df['orientation_W'].values
    nx, ny, nz = np.zeros(len(x)), np.zeros(len(y)), np.zeros(len(z)),
    for i in tqdm(range(len(x))):
        xx, yy, zz = quaternion_to_euler(x[i], y[i], z[i], w[i])
        nx[i] = xx
        ny[i] = yy
        nz[i] = zz
    
    df['euler_X'] = nx
    df['euler_Y'] = ny
    df['euler_Z'] = nz
    return df

In [12]:
X_train = fe_step1(X_train)
X_test = fe_step1(X_test)

100%|██████████| 487680/487680 [00:01<00:00, 302695.50it/s]
100%|██████████| 488448/488448 [00:01<00:00, 299882.01it/s]


In [13]:
def highpass_filter(df):
    n_samples = 128
    sample_duration = 0.01
    sample_rate = n_samples * (1 / sample_duration)

    nyquist = 0.5 * sample_rate
    norm_low_cutoff = 500 / nyquist

    sos = signal.butter(10, Wn=[norm_low_cutoff], btype='highpass', output='sos')
    filtered_sig = signal.sosfilt(sos, df)
    
    return filtered_sig

In [14]:
def lowpass_filter(data, r=0.3):
    f_data = np.zeros_like(data)
    f_data[0] = data[0]
    for i in range(1, len(data)):
        if i % 128 == 0:
            f_data[i] = data[i]
        else:
            f_data[i] = r * data[i] + (1-r) * f_data[i-1]
    return f_data

In [15]:
for data in [X_train, X_test]:
    data['linear_acceleration_X'] = lowpass_filter(data['linear_acceleration_X'])
    data['linear_acceleration_Y'] = lowpass_filter(data['linear_acceleration_Y'])
    data['linear_acceleration_Z'] = lowpass_filter(data['linear_acceleration_Z'])
    data['angular_velocity_X'] = highpass_filter(data['angular_velocity_X'])
    data['angular_velocity_Y'] = highpass_filter(data['angular_velocity_Y'])
    data['angular_velocity_Z'] = highpass_filter(data['angular_velocity_Z'])

In [16]:
def mean_change_of_abs_change(x):
        return np.mean(np.diff(np.abs(np.diff(x))))

In [44]:
train = X_train.loc[:, 'orientation_X':'orientation_W']
test = X_test.loc[:, 'orientation_X':'orientation_W']

train.columns = ['X','Y','Z','W']
test.columns = ['X','Y','Z','W']

for i in range(4):
    for j in range(i+1, 4):
        train[train.columns[i]+'-'+train.columns[j]] = train.iloc[:, i] - train.iloc[:, j]
        test[test.columns[i]+'-'+test.columns[j]] = test.iloc[:, i] - test.iloc[:, j]
        
train = pd.concat([train, X_train.loc[:, 'euler_X':'euler_Z']], axis=1)
test = pd.concat([test, X_test.loc[:, 'euler_X':'euler_Z']], axis=1)

In [46]:
for data in [train, test]:
    data['euler_X-Y'] = data['euler_X']-data['euler_Y']
    data['euler_X-Z'] = data['euler_X']-data['euler_Z']
    data['euler_Y-Z'] = data['euler_Y']-data['euler_Z']
    
train = pd.concat([X_train['series_id'], train], axis=1)
test = pd.concat([X_test['series_id'], test], axis=1)

In [19]:
X_train.head()

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z,euler_X,euler_Y,euler_Z
0,0_0,0,0,-0.758531,-0.634351,-0.10488,-0.10597,0.049,0.007993,0.000349,-0.74857,2.103,-9.7532,2.843273,-0.024668,1.396667
1,0_1,0,1,-0.758532,-0.634342,-0.1049,-0.106,-0.045981,0.001089,0.000993,-0.422014,1.92402,-9.65108,2.843201,-0.024662,1.396651
2,0_2,0,2,-0.758528,-0.634348,-0.10492,-0.10597,-0.060919,-0.010782,-0.005251,-0.374697,1.824474,-9.373766,2.843222,-0.024728,1.396677
3,0_3,0,3,-0.758516,-0.634357,-0.10495,-0.10597,-0.008044,-0.01407,-0.000219,-0.134236,1.606922,-9.590436,2.843183,-0.024769,1.396712
4,0_4,0,4,-0.758522,-0.634352,-0.10495,-0.10596,0.040927,-0.007411,0.010239,-0.246872,1.565515,-9.845605,2.843197,-0.024785,1.396698


In [20]:
def rotation_matrix(q0, q1, q2, q3):
    r00 = q0**2 - q1**2 - q2**2 + q3**2
    r01 = 2 * (q0*q3 + q1*q2)
    r02 = 2 * (q1*q3 - q0*q2)
    r10 = 2 * (q1*q2 - q0*q3)
    r11 = q0**2 - q1**2 + q2**2 - q3**2
    r12 = 2 * (q2*q3 + q0*q1)
    r20 = 2 * (q0*q2 + q1*q3)
    r21 = 2 * (-q0*q1 + q2*q3)
    r22 = q0**2 - q1**2 - q2**2 + q3**2
    R = np.array([[r00, r01, r02],[r10, r11, r12],[r20, r21, r22]])
    return R

In [21]:
def rotated_acceleration(df):
    
    df['rotated_acceleration_X'] = 0
    df['rotated_acceleration_Y'] = 0
    df['rotated_acceleration_Z'] = 0
    #df['rotated_angvel_X'] = 0
    #df['rotated_angvel_Y'] = 0
    #df['rotated_angvel_Z'] = 0
    
    q0_all = df['orientation_X'].values
    q1_all = df['orientation_Y'].values
    q2_all = df['orientation_Z'].values
    q3_all = df['orientation_W'].values
    la_X = df['linear_acceleration_X'].values
    la_Y = df['linear_acceleration_Y'].values
    la_Z = df['linear_acceleration_Z'].values
    #av_X = df['angular_velocity_X'].values
    #av_Y = df['angular_velocity_Y'].values
    #av_Z = df['angular_velocity_Z'].values
    
    for i in tqdm(range(len(df))):
        q0 = q0_all[i]
        q1 = q1_all[i]
        q2 = q2_all[i]
        q3 = q3_all[i]
        
        R = rotation_matrix(q0, q1, q2, q3)
        la = np.array([la_X[i], la_Y[i], la_Z[i]])
        #av = np.array([av_X[i], av_Y[i], av_Z[i]])
        rotated_la = np.dot(R.T, la)
        #rotated_av = np.dot(R.T, av)
        
        df.loc[i, 'rotated_acceleration_X'] = rotated_la[0]
        df.loc[i, 'rotated_acceleration_Y'] = rotated_la[1]
        df.loc[i, 'rotated_acceleration_Z'] = rotated_la[2]
        #df.loc[i, 'rotated_angvel_X'] = rotated_av[0]
        #df.loc[i, 'rotated_angvel_Y'] = rotated_av[1]
        #df.loc[i, 'rotated_angvel_Z'] = rotated_av[2]
    
    return df

In [22]:
X_train = rotated_acceleration(X_train)

100%|██████████| 487680/487680 [21:10<00:00, 383.74it/s]


In [23]:
X_test = rotated_acceleration(X_test)

100%|██████████| 488448/488448 [22:03<00:00, 368.93it/s]


In [24]:
X_train['rotated_acceleration_Y'] = X_train['rotated_acceleration_Y'] - 9.80665
X_test['rotated_acceleration_Y'] = X_test['rotated_acceleration_Y'] - 9.80665

from multiprocessing import Pool

def process_subtrain(arg_tuple):
    start, end, idx = arg_tuple
    X_tr = X_train.iloc[start:end, :]
    X_tr = rotated_acceleration(X_tr)
    return idx, X_tr

all_chunks = []

num_cores = 8 
total_size = len(X_train)
chunk_size = total_size/num_cores

for i in range(8):
    start_idx = int(i * chunk_size)
    end_idx = int(start_idx + chunk_size)
    chunk = (start_idx, end_idx, i)
    all_chunks.append(chunk)

pool = Pool()
results = pool.map(process_subtrain, all_chunks)    
results = sorted(results, key=lambda tup: tup[0])


X = np.concatenate([item[1] for item in results], axis=0)

train = pd.concat([train, X_train.loc[:, 'rotated_acceleration_X':'rotated_acceleration_Z']], axis=1)
test = pd.concat([test, X_test.loc[:, 'rotated_acceleration_X':'rotated_acceleration_Z']], axis=1)

In [48]:
def feat_eng(df):
    data = pd.DataFrame()
    #df['total_rotated_acce'] = (df['rotated_acceleration_X']**2 + df['rotated_acceleration_Y']**2 + df['rotated_acceleration_Z']**2)**0.5
    
    df_columns = df.columns[1:]
    for col in tqdm(df_columns):
        data[col + '_mean'] = df.groupby(['series_id'])[col].mean()
        #data[col + '_median'] = df.groupby(['series_id'])[col].median()
        data[col + '_max'] = df.groupby(['series_id'])[col].max()
        data[col + '_min'] = df.groupby(['series_id'])[col].min()
        data[col + '_std'] = df.groupby(['series_id'])[col].std()
        data[col + '_range'] = data[col + '_max'] - data[col + '_min']
        data[col + '_maxtoMin'] = data[col + '_max'] / data[col + '_min']
        
        data[col + '_mean_abs_chg'] = df.groupby(['series_id'])[col].apply(lambda x: np.mean(np.abs(np.diff(x))))
        data[col + '_mean_change_of_abs_change'] = df.groupby('series_id')[col].apply(mean_change_of_abs_change)
        data[col + '_abs_max'] = df.groupby(['series_id'])[col].apply(lambda x: np.max(np.abs(x)))
        data[col + '_abs_min'] = df.groupby(['series_id'])[col].apply(lambda x: np.min(np.abs(x)))
        data[col + '_abs_avg'] = (data[col + '_abs_min'] + data[col + '_abs_max'])/2
        
        
    #data['corr_linZangX'] = np.correlate(np.abs(tmp_df[:, 0]), np.abs(tmp_df[:, 1]))
    return data

In [49]:
train = feat_eng(train)
test = feat_eng(test)

100%|██████████| 16/16 [00:33<00:00,  2.07s/it]
100%|██████████| 16/16 [00:33<00:00,  2.07s/it]


In [50]:
data_col = train.columns

In [51]:
train.head()

Unnamed: 0_level_0,X_mean,X_max,X_min,X_std,X_range,X_maxtoMin,X_mean_abs_chg,X_mean_change_of_abs_change,X_abs_max,X_abs_min,X_abs_avg,Y_mean,Y_max,Y_min,Y_std,Y_range,Y_maxtoMin,Y_mean_abs_chg,Y_mean_change_of_abs_change,Y_abs_max,Y_abs_min,Y_abs_avg,Z_mean,Z_max,Z_min,Z_std,Z_range,Z_maxtoMin,Z_mean_abs_chg,Z_mean_change_of_abs_change,Z_abs_max,Z_abs_min,Z_abs_avg,W_mean,W_max,W_min,W_std,W_range,W_maxtoMin,W_mean_abs_chg,W_mean_change_of_abs_change,W_abs_max,W_abs_min,W_abs_avg,X-Y_mean,X-Y_max,X-Y_min,X-Y_std,X-Y_range,X-Y_maxtoMin,X-Y_mean_abs_chg,X-Y_mean_change_of_abs_change,X-Y_abs_max,X-Y_abs_min,X-Y_abs_avg,X-Z_mean,X-Z_max,X-Z_min,X-Z_std,X-Z_range,X-Z_maxtoMin,X-Z_mean_abs_chg,X-Z_mean_change_of_abs_change,X-Z_abs_max,X-Z_abs_min,X-Z_abs_avg,X-W_mean,X-W_max,X-W_min,X-W_std,X-W_range,X-W_maxtoMin,X-W_mean_abs_chg,X-W_mean_change_of_abs_change,X-W_abs_max,X-W_abs_min,X-W_abs_avg,Y-Z_mean,Y-Z_max,Y-Z_min,Y-Z_std,Y-Z_range,Y-Z_maxtoMin,Y-Z_mean_abs_chg,Y-Z_mean_change_of_abs_change,Y-Z_abs_max,Y-Z_abs_min,Y-Z_abs_avg,Y-W_mean,Y-W_max,Y-W_min,Y-W_std,Y-W_range,Y-W_maxtoMin,Y-W_mean_abs_chg,Y-W_mean_change_of_abs_change,Y-W_abs_max,Y-W_abs_min,Y-W_abs_avg,Z-W_mean,Z-W_max,Z-W_min,Z-W_std,Z-W_range,Z-W_maxtoMin,Z-W_mean_abs_chg,Z-W_mean_change_of_abs_change,Z-W_abs_max,Z-W_abs_min,Z-W_abs_avg,euler_X_mean,euler_X_max,euler_X_min,euler_X_std,euler_X_range,euler_X_maxtoMin,euler_X_mean_abs_chg,euler_X_mean_change_of_abs_change,euler_X_abs_max,euler_X_abs_min,euler_X_abs_avg,euler_Y_mean,euler_Y_max,euler_Y_min,euler_Y_std,euler_Y_range,euler_Y_maxtoMin,euler_Y_mean_abs_chg,euler_Y_mean_change_of_abs_change,euler_Y_abs_max,euler_Y_abs_min,euler_Y_abs_avg,euler_Z_mean,euler_Z_max,euler_Z_min,euler_Z_std,euler_Z_range,euler_Z_maxtoMin,euler_Z_mean_abs_chg,euler_Z_mean_change_of_abs_change,euler_Z_abs_max,euler_Z_abs_min,euler_Z_abs_avg,euler_X-Y_mean,euler_X-Y_max,euler_X-Y_min,euler_X-Y_std,euler_X-Y_range,euler_X-Y_maxtoMin,euler_X-Y_mean_abs_chg,euler_X-Y_mean_change_of_abs_change,euler_X-Y_abs_max,euler_X-Y_abs_min,euler_X-Y_abs_avg,euler_X-Z_mean,euler_X-Z_max,euler_X-Z_min,euler_X-Z_std,euler_X-Z_range,euler_X-Z_maxtoMin,euler_X-Z_mean_abs_chg,euler_X-Z_mean_change_of_abs_change,euler_X-Z_abs_max,euler_X-Z_abs_min,euler_X-Z_abs_avg,euler_Y-Z_mean,euler_Y-Z_max,euler_Y-Z_min,euler_Y-Z_std,euler_Y-Z_range,euler_Y-Z_maxtoMin,euler_Y-Z_mean_abs_chg,euler_Y-Z_mean_change_of_abs_change,euler_Y-Z_abs_max,euler_Y-Z_abs_min,euler_Y-Z_abs_avg
series_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1
0,-0.758666,-0.758219,-0.759527,0.000363,0.001308,0.998278,1.6e-05,2.093952e-07,0.759527,0.758219,0.758873,-0.634007,-0.633057,-0.63456,0.000471,0.001502,0.997632,1.8e-05,2.393871e-08,0.63456,0.633057,0.633808,-0.105474,-0.10461,-0.10614,0.000432,0.00153,0.985582,5.1e-05,8.154597e-08,0.10614,0.10461,0.105375,-0.10647,-0.10559,-0.10705,0.000389,0.00146,0.986358,4.2e-05,4.78427e-07,0.10705,0.10559,0.10632,-0.124658,-0.1237,-0.126469,0.000827,0.00277,0.978098,3.1e-05,2.333339e-07,0.126469,0.1237,0.125084,-0.653191,-0.652286,-0.654917,0.000619,0.002631,0.995983,6e-05,3.037773e-07,0.654917,0.652286,0.653602,-0.652196,-0.651518,-0.652918,0.00035,0.0014,0.997856,4.6e-05,7.006584e-07,0.652918,0.651518,0.652218,-0.528533,-0.527307,-0.529639,0.000648,0.002332,0.995597,5.6e-05,-9.038861e-08,0.529639,0.527307,0.528473,-0.527538,-0.52614,-0.528858,0.000803,0.002719,0.99486,5.6e-05,3.064924e-07,0.528858,0.52614,0.527499,0.000995,0.00229,0.00041,0.000416,0.00188,5.585338,4.6e-05,3.968811e-07,0.00229,0.00041,0.00135,2.841734,2.843933,2.84021,0.001001,0.003723,1.001311,0.000113,8.529782e-07,2.843933,2.84021,2.842072,-0.025037,-0.023562,-0.025795,0.000503,0.002233,0.913436,6.5e-05,5.642347e-07,0.025795,0.023562,0.024678,1.396035,1.397395,1.393224,0.001221,0.00417,1.002993,4.9e-05,2.346825e-07,1.397395,1.393224,1.395309,2.866771,2.868977,2.865416,0.00093,0.003561,1.001243,0.000112,1.417213e-06,2.868977,2.865416,2.867197,1.445699,1.449096,1.443602,0.001315,0.005494,1.003805,0.000126,1.332646e-06,1.449096,1.443602,1.446349,-1.421072,-1.416831,-1.422772,0.001519,0.00594,0.995825,9.9e-05,8.456646e-08,1.422772,1.416831,1.419801
1,-0.958606,-0.958368,-0.95896,0.000151,0.000592,0.999383,2.4e-05,-4.594215e-07,0.95896,0.958368,0.958664,0.241867,0.2427,0.24074,0.000499,0.00196,1.008142,7e-05,1.544648e-07,0.2427,0.24074,0.24172,0.03165,0.032341,0.030504,0.000508,0.001837,1.060226,4.8e-05,3.271199e-07,0.032341,0.030504,0.031422,-0.146875,-0.14587,-0.14809,0.000521,0.00222,0.985008,0.000103,-1.976156e-06,0.14809,0.14587,0.14698,-1.200473,-1.199637,-1.201135,0.000384,0.001498,0.998753,5.5e-05,4.107781e-07,1.201135,1.199637,1.200386,-0.990256,-0.98934,-0.990791,0.00039,0.001451,0.998536,4.3e-05,3.110039e-07,0.990791,0.98934,0.990065,-0.811731,-0.81031,-0.812798,0.000601,0.002488,0.996939,0.00012,-2.504587e-06,0.812798,0.81031,0.811554,0.210217,0.211105,0.209558,0.00039,0.001548,1.007385,7.7e-05,9.248902e-07,0.211105,0.209558,0.210331,0.388742,0.39,0.387319,0.000661,0.002681,1.006921,0.00012,-2.915365e-06,0.39,0.387319,0.38866,0.178526,0.180008,0.176543,0.000823,0.003465,1.019625,0.000129,-1.649036e-06,0.180008,0.176543,0.178276,2.840129,2.842328,2.837615,0.001117,0.004713,1.001661,0.000212,-3.708373e-06,2.842328,2.837615,2.839972,-0.010369,-0.009109,-0.012073,0.000822,0.002963,0.754541,8.7e-05,8.4248e-11,0.012073,0.009109,0.010591,-0.49273,-0.490164,-0.494641,0.001131,0.004478,0.990948,0.000147,1.136899e-07,0.494641,0.490164,0.492402,2.850497,2.854401,2.848084,0.001546,0.006317,1.002218,0.000226,-1.823962e-06,2.854401,2.848084,2.851242,3.332859,3.335664,3.329928,0.001539,0.005736,1.001723,0.000264,-1.914639e-06,3.335664,3.329928,3.332796,0.482362,0.485394,0.478307,0.001833,0.007087,1.014817,0.000174,-9.067669e-08,0.485394,0.478307,0.481851
2,-0.512057,-0.509443,-0.514337,0.001377,0.004895,0.990483,4.1e-05,-2.461925e-08,0.514337,0.509443,0.51189,-0.846171,-0.844896,-0.847794,0.000785,0.002899,0.996581,2.4e-05,-6.737182e-07,0.847794,0.844896,0.846345,-0.129371,-0.12852,-0.1303,0.000541,0.00178,0.986338,5.9e-05,-1.743085e-06,0.1303,0.12852,0.12941,-0.071082,-0.070378,-0.071535,0.000278,0.001157,0.983832,4.4e-05,-1.839622e-06,0.071535,0.070378,0.070957,0.334114,0.338352,0.330558,0.00216,0.007793,1.023577,6.4e-05,-6.983374e-07,0.338352,0.330558,0.334455,-0.382687,-0.379529,-0.385778,0.001808,0.006249,0.983801,8.1e-05,-9.613932e-07,0.385778,0.379529,0.382653,-0.440975,-0.438862,-0.443056,0.001309,0.004194,0.990535,5.7e-05,-1.057931e-06,0.443056,0.438862,0.440959,-0.7168,-0.71581,-0.718334,0.000596,0.002523,0.996487,6.6e-05,-2.635619e-06,0.718334,0.71581,0.717072,-0.775089,-0.773591,-0.777416,0.000939,0.003825,0.99508,5.7e-05,-2.732156e-06,0.777416,0.773591,0.775503,-0.058289,-0.057278,-0.059375,0.000509,0.002097,0.96468,5.3e-05,9.653731e-08,0.059375,0.057278,0.058326,2.845528,2.847297,2.843556,0.001087,0.003741,1.001315,0.000134,-4.923401e-06,2.847297,2.843556,2.845426,-0.012195,-0.011795,-0.012734,0.000173,0.000939,0.926271,6.7e-05,-2.344399e-07,0.012734,0.011795,0.012264,2.055021,2.061297,2.049675,0.00322,0.011622,1.00567,9.7e-05,-7.319409e-07,2.061297,2.049675,2.055486,2.857723,2.859675,2.855612,0.001083,0.004063,1.001423,0.000148,-5.946296e-06,2.859675,2.855612,2.857643,0.790507,0.79708,0.783851,0.003906,0.013229,1.016877,0.000174,-2.519893e-06,0.79708,0.783851,0.790466,-2.067216,-2.06147,-2.07387,0.003318,0.0124,0.994021,0.000132,-9.663808e-07,2.07387,2.06147,2.06767
3,-0.939169,-0.938839,-0.939682,0.000227,0.000843,0.999103,2.6e-05,-6.608851e-07,0.939682,0.938839,0.93926,0.31014,0.311469,0.309429,0.000453,0.002041,1.006595,3.6e-05,-1.585737e-08,0.311469,0.309429,0.310449,0.038955,0.039799,0.037922,0.000449,0.001877,1.049496,6.6e-05,-8.393125e-07,0.039799,0.037922,0.038861,-0.142319,-0.13934,-0.14437,0.001371,0.005029,0.965163,0.000195,-4.357886e-06,0.14437,0.13934,0.141855,-1.249309,-1.248535,-1.250488,0.000432,0.001952,0.998439,4.9e-05,-6.987718e-07,1.250488,1.248535,1.249511,-0.978124,-0.977423,-0.978674,0.000291,0.001251,0.998722,4.8e-05,-1.563981e-07,0.978674,0.977423,0.978049,-0.79685,-0.794479,-0.800342,0.001554,0.005863,0.992674,0.00022,-5.0408e-06,0.800342,0.794479,0.79741,0.271185,0.272896,0.270013,0.000697,0.002883,1.010676,8.5e-05,-8.551699e-07,0.272896,0.270013,0.271454,0.452459,0.454439,0.449411,0.001294,0.005028,1.011189,0.000176,-4.342029e-06,0.454439,0.449411,0.451925,0.181274,0.184089,0.177262,0.001804,0.006826,1.03851,0.000243,-5.197198e-06,0.184089,0.177262,0.180676,2.845777,2.852147,2.841355,0.002907,0.010792,1.003798,0.000403,-8.899584e-06,2.852147,2.841355,2.846751,-0.015107,-0.014645,-0.015697,0.000234,0.001053,0.932944,8.5e-05,-1.048903e-06,0.015697,0.014645,0.015171,-0.635656,-0.634289,-0.638338,0.000931,0.004048,0.993658,7.2e-05,1.306422e-07,0.638338,0.634289,0.636313,2.860885,2.867289,2.856275,0.002904,0.011014,1.003856,0.000389,-7.726782e-06,2.867289,2.856275,2.861782,3.481433,3.487844,3.476248,0.003228,0.011596,1.003336,0.000441,-8.741006e-06,3.487844,3.476248,3.482046,0.620549,0.62327,0.619112,0.00095,0.004158,1.006716,0.000122,-1.014224e-06,0.62327,0.619112,0.621191
4,-0.891301,-0.886729,-0.896891,0.002955,0.010162,0.98867,8e-05,4.880628e-08,0.896891,0.886729,0.89181,0.428144,0.4374,0.41646,0.006165,0.020939,1.050279,0.000165,2.523805e-07,0.4374,0.41646,0.42693,0.060056,0.061771,0.058247,0.000985,0.003524,1.060499,3.4e-05,3.374836e-08,0.061771,0.058247,0.060009,-0.13646,-0.13538,-0.13732,0.000541,0.001941,0.985868,4.8e-05,-2.334249e-07,0.13732,0.13538,0.13635,-1.319445,-1.313351,-1.324129,0.003211,0.010778,0.991861,8.5e-05,2.035742e-07,1.324129,1.313351,1.31874,-0.951357,-0.9485,-0.955138,0.001995,0.006638,0.993051,5.3e-05,1.505792e-08,0.955138,0.9485,0.951819,-0.75484,-0.750429,-0.759941,0.002556,0.009511,0.987484,8.7e-05,-1.846186e-07,0.759941,0.750429,0.755185,0.368088,0.375629,0.358213,0.005204,0.017415,1.048617,0.000137,2.186322e-07,0.375629,0.358213,0.366921,0.564605,0.573699,0.55341,0.005747,0.020289,1.036662,0.00016,1.895563e-08,0.573699,0.55341,0.563555,0.196517,0.198071,0.195197,0.000732,0.002874,1.014722,7e-05,-1.996765e-07,0.198071,0.195197,0.196634,2.842442,2.843737,2.841193,0.000742,0.002545,1.000896,0.000104,-4.21445e-07,2.843737,2.841193,2.842465,-0.009793,-0.009358,-0.010192,0.000241,0.000834,0.918147,2.8e-05,2.084157e-07,0.010192,0.009358,0.009775,-0.894147,-0.867984,-0.915038,0.013836,0.047054,0.948578,0.000371,5.328633e-07,0.915038,0.867984,0.891511,2.852235,2.853489,2.850834,0.000791,0.002655,1.000931,0.000101,-2.130293e-07,2.853489,2.850834,2.852162,3.73659,3.756366,3.710972,0.013859,0.045394,1.012232,0.000358,9.543084e-07,3.756366,3.710972,3.733669,0.884354,0.905351,0.858398,0.013888,0.046953,1.054698,0.00037,7.41279e-07,0.905351,0.858398,0.881874


In [52]:
for data in [train, test]:
    data.fillna(0,inplace=True)
    data.replace(-np.inf,0,inplace=True)
    data.replace(np.inf,0,inplace=True)

In [33]:
y_gr = y_train['group_id']

In [88]:
N_SPLITS=4

splits = list(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=2019).split(train, y_gr))

clf = []
val_acc = []
val_pred = []
val_y = []

lgb_params = {
               'feature_fraction': 0.8,
               'metric': 'multi_logloss',
               'nthread':8, 
               'learning_rate': 0.1, 
               'objective': 'multiclass',
               'num_class': 73,
               'num_leaves': 2**4,
               'verbose':0, 
               'seed':123
              }

for train_idx, val_idx in splits:
    X_tr, y_tr = np.asarray(train)[train_idx, :], y_gr[train_idx]
    X_val, y_val = np.asarray(train)[val_idx, :], y_gr[val_idx]
    
    model_lgb = lgb.train(lgb_params, lgb.Dataset(X_tr, label=y_tr), 500,\
                           valid_sets=lgb.Dataset(X_val, label=y_val), early_stopping_rounds=30)
    pred_lgb = model_lgb.predict(X_val)
    val_pred.append(pred_lgb)
    val_y.append(y_val)
    
    val_acc.append(accuracy_score(y_val, np.argmax(pred_lgb, axis=1)))

#pred_lgb = pd.DataFrame(pred_lgb, index=np.where(np.mean(adv_val, axis=0)>0.3))
#val_pred = pd.concat([val_pred, pred_lgb], axis=0)

    clf.append(model_lgb)

[1]	valid_0's multi_logloss: 2.61216
Training until validation scores don't improve for 30 rounds.
[2]	valid_0's multi_logloss: 2.33308
[3]	valid_0's multi_logloss: 2.1428
[4]	valid_0's multi_logloss: 1.99276
[5]	valid_0's multi_logloss: 1.86935
[6]	valid_0's multi_logloss: 1.76401
[7]	valid_0's multi_logloss: 1.67278
[8]	valid_0's multi_logloss: 1.59233
[9]	valid_0's multi_logloss: 1.52392
[10]	valid_0's multi_logloss: 1.46028
[11]	valid_0's multi_logloss: 1.40418
[12]	valid_0's multi_logloss: 1.35263
[13]	valid_0's multi_logloss: 1.30613
[14]	valid_0's multi_logloss: 1.26478
[15]	valid_0's multi_logloss: 1.22567
[16]	valid_0's multi_logloss: 1.19131
[17]	valid_0's multi_logloss: 1.15795
[18]	valid_0's multi_logloss: 1.12816
[19]	valid_0's multi_logloss: 1.10026
[20]	valid_0's multi_logloss: 1.07663
[21]	valid_0's multi_logloss: 1.05186
[22]	valid_0's multi_logloss: 1.03121
[23]	valid_0's multi_logloss: 1.01191
[24]	valid_0's multi_logloss: 0.992796
[25]	valid_0's multi_logloss: 0.974

[1]	valid_0's multi_logloss: 2.63501
Training until validation scores don't improve for 30 rounds.
[2]	valid_0's multi_logloss: 2.36883
[3]	valid_0's multi_logloss: 2.16968
[4]	valid_0's multi_logloss: 2.01905
[5]	valid_0's multi_logloss: 1.89268
[6]	valid_0's multi_logloss: 1.7891
[7]	valid_0's multi_logloss: 1.69712
[8]	valid_0's multi_logloss: 1.61554
[9]	valid_0's multi_logloss: 1.54716
[10]	valid_0's multi_logloss: 1.48692
[11]	valid_0's multi_logloss: 1.43058
[12]	valid_0's multi_logloss: 1.37905
[13]	valid_0's multi_logloss: 1.33328
[14]	valid_0's multi_logloss: 1.29112
[15]	valid_0's multi_logloss: 1.25271
[16]	valid_0's multi_logloss: 1.21929
[17]	valid_0's multi_logloss: 1.18885
[18]	valid_0's multi_logloss: 1.15799
[19]	valid_0's multi_logloss: 1.12811
[20]	valid_0's multi_logloss: 1.10078
[21]	valid_0's multi_logloss: 1.07483
[22]	valid_0's multi_logloss: 1.05193
[23]	valid_0's multi_logloss: 1.03071
[24]	valid_0's multi_logloss: 1.01152
[25]	valid_0's multi_logloss: 0.9924

[95]	valid_0's multi_logloss: 0.704738
[96]	valid_0's multi_logloss: 0.705433
[97]	valid_0's multi_logloss: 0.707329
[98]	valid_0's multi_logloss: 0.707893
[99]	valid_0's multi_logloss: 0.70973
[100]	valid_0's multi_logloss: 0.711295
[101]	valid_0's multi_logloss: 0.712052
[102]	valid_0's multi_logloss: 0.71261
[103]	valid_0's multi_logloss: 0.713793
[104]	valid_0's multi_logloss: 0.715572
[105]	valid_0's multi_logloss: 0.717781
[106]	valid_0's multi_logloss: 0.719854
[107]	valid_0's multi_logloss: 0.721311
[108]	valid_0's multi_logloss: 0.722432
[109]	valid_0's multi_logloss: 0.723538
Early stopping, best iteration is:
[79]	valid_0's multi_logloss: 0.692735


In [89]:
val_acc

[0.8113590263691683, 0.7739583333333333, 0.8044871794871795, 0.802801724137931]

In [55]:
feature_imp = np.zeros(train.shape[1])
for model in clf:
    feature_imp+=model.feature_importance(importance_type='gain')

pd.DataFrame(feature_imp/4, index=data_col).sort_values(by=0, ascending=False)

Unnamed: 0,0
euler_Y_mean_abs_chg,4224.489498
X-Z_mean,3373.631012
Z-W_min,3237.186289
X-Z_min,2890.458451
X-Y_max,2838.941982
Z_mean_abs_chg,2696.913772
Z_min,2695.329689
X_min,2609.048634
euler_Y_mean,2459.761239
X-Z_max,2359.875565


In [40]:
from sklearn.metrics import roc_auc_score

In [61]:
X_train_reserve = X_train.copy()
X_test_reserve = X_test.copy()

In [152]:
X_train_reserve.to_csv('./rotated_train.csv')
X_test_reserve.to_csv('./rotated_test.csv')

In [70]:
def feat_eng(df):
    data = pd.DataFrame()

    df['total_acce'] = (df['linear_acceleration_X']**2 + df['linear_acceleration_Y']**2 + df['linear_acceleration_Z']**2)**0.5
    df['total_rotated'] = (df['rotated_acceleration_X']**2 + df['rotated_acceleration_Y']**2 + df['rotated_acceleration_Z']**2)**0.5
    #df['total_ang_cos'] = (df['angular_velocity_X_cos']**2 + df['angular_velocity_Y_cos']**2 + df['angular_velocity_Z_cos']**2)**0.5
    
    df = df.drop(columns=['euler_X'], axis=0)
    df = df.drop(columns=['euler_Y'], axis=0)
    df = df.drop(columns=['euler_Z'], axis=0)
    #df['total_angu'] = (df['angular_velocity_X']**2 + df['angular_velocity_Y']**2 + df['angular_velocity_Z']**2)**0.5
    
    #df['total_eule'] = (df['euler_X']**2 + df['euler_Y']**2 + df['euler_Z']**2)**0.5
    #df['total_xyz'] = (df['orientation_X']**2 + df['orientation_Y']**2 + df['orientation_Z']**2)**0.5
    #df['total_acc/vel'] = df['total_acce']/df['total_angu']
    
    df_columns = df.columns[7:]
    for col in tqdm(df_columns):
        data[col + '_mean'] = df.groupby(['series_id'])[col].mean()
        data[col + '_median'] = df.groupby(['series_id'])[col].median()
        data[col + '_max'] = df.groupby(['series_id'])[col].max()
        data[col + '_min'] = df.groupby(['series_id'])[col].min()
        data[col + '_std'] = df.groupby(['series_id'])[col].std()
        data[col + '_range'] = data[col + '_max'] - data[col + '_min']
        data[col + '_maxtoMin'] = data[col + '_max'] / data[col + '_min']
        data[col + '_mean_abs_chg'] = df.groupby(['series_id'])[col].apply(lambda x: np.mean(np.abs(np.diff(x))))
        data[col + '_mean_change_of_abs_change'] = df.groupby('series_id')[col].apply(mean_change_of_abs_change)
        data[col + '_abs_max'] = df.groupby(['series_id'])[col].apply(lambda x: np.max(np.abs(x)))
        data[col + '_abs_min'] = df.groupby(['series_id'])[col].apply(lambda x: np.min(np.abs(x)))
        data[col + '_abs_avg'] = (data[col + '_abs_min'] + data[col + '_abs_max'])/2
        
        
    #data['corr_linZangX'] = np.correlate(np.abs(tmp_df[:, 0]), np.abs(tmp_df[:, 1]))
    return data

In [72]:
X_train.head()

Unnamed: 0_level_0,angular_velocity_X_mean,angular_velocity_X_median,angular_velocity_X_max,angular_velocity_X_min,angular_velocity_X_std,angular_velocity_X_range,angular_velocity_X_maxtoMin,angular_velocity_X_mean_abs_chg,angular_velocity_X_mean_change_of_abs_change,angular_velocity_X_abs_max,angular_velocity_X_abs_min,angular_velocity_X_abs_avg,angular_velocity_Y_mean,angular_velocity_Y_median,angular_velocity_Y_max,angular_velocity_Y_min,angular_velocity_Y_std,angular_velocity_Y_range,angular_velocity_Y_maxtoMin,angular_velocity_Y_mean_abs_chg,angular_velocity_Y_mean_change_of_abs_change,angular_velocity_Y_abs_max,angular_velocity_Y_abs_min,angular_velocity_Y_abs_avg,angular_velocity_Z_mean,angular_velocity_Z_median,angular_velocity_Z_max,angular_velocity_Z_min,angular_velocity_Z_std,angular_velocity_Z_range,angular_velocity_Z_maxtoMin,angular_velocity_Z_mean_abs_chg,angular_velocity_Z_mean_change_of_abs_change,angular_velocity_Z_abs_max,angular_velocity_Z_abs_min,angular_velocity_Z_abs_avg,linear_acceleration_X_mean,linear_acceleration_X_median,linear_acceleration_X_max,linear_acceleration_X_min,linear_acceleration_X_std,linear_acceleration_X_range,linear_acceleration_X_maxtoMin,linear_acceleration_X_mean_abs_chg,linear_acceleration_X_mean_change_of_abs_change,linear_acceleration_X_abs_max,linear_acceleration_X_abs_min,linear_acceleration_X_abs_avg,linear_acceleration_Y_mean,linear_acceleration_Y_median,linear_acceleration_Y_max,linear_acceleration_Y_min,linear_acceleration_Y_std,linear_acceleration_Y_range,linear_acceleration_Y_maxtoMin,linear_acceleration_Y_mean_abs_chg,linear_acceleration_Y_mean_change_of_abs_change,linear_acceleration_Y_abs_max,linear_acceleration_Y_abs_min,linear_acceleration_Y_abs_avg,linear_acceleration_Z_mean,linear_acceleration_Z_median,linear_acceleration_Z_max,linear_acceleration_Z_min,linear_acceleration_Z_std,linear_acceleration_Z_range,linear_acceleration_Z_maxtoMin,linear_acceleration_Z_mean_abs_chg,linear_acceleration_Z_mean_change_of_abs_change,linear_acceleration_Z_abs_max,linear_acceleration_Z_abs_min,linear_acceleration_Z_abs_avg,rotated_acceleration_X_mean,rotated_acceleration_X_median,rotated_acceleration_X_max,rotated_acceleration_X_min,rotated_acceleration_X_std,rotated_acceleration_X_range,rotated_acceleration_X_maxtoMin,rotated_acceleration_X_mean_abs_chg,rotated_acceleration_X_mean_change_of_abs_change,rotated_acceleration_X_abs_max,rotated_acceleration_X_abs_min,rotated_acceleration_X_abs_avg,rotated_acceleration_Y_mean,rotated_acceleration_Y_median,rotated_acceleration_Y_max,rotated_acceleration_Y_min,rotated_acceleration_Y_std,rotated_acceleration_Y_range,rotated_acceleration_Y_maxtoMin,rotated_acceleration_Y_mean_abs_chg,rotated_acceleration_Y_mean_change_of_abs_change,rotated_acceleration_Y_abs_max,rotated_acceleration_Y_abs_min,rotated_acceleration_Y_abs_avg,rotated_acceleration_Z_mean,rotated_acceleration_Z_median,rotated_acceleration_Z_max,rotated_acceleration_Z_min,rotated_acceleration_Z_std,rotated_acceleration_Z_range,rotated_acceleration_Z_maxtoMin,rotated_acceleration_Z_mean_abs_chg,rotated_acceleration_Z_mean_change_of_abs_change,rotated_acceleration_Z_abs_max,rotated_acceleration_Z_abs_min,rotated_acceleration_Z_abs_avg,total_acce_mean,total_acce_median,total_acce_max,total_acce_min,total_acce_std,total_acce_range,total_acce_maxtoMin,total_acce_mean_abs_chg,total_acce_mean_change_of_abs_change,total_acce_abs_max,total_acce_abs_min,total_acce_abs_avg,total_rotated_mean,total_rotated_median,total_rotated_max,total_rotated_min,total_rotated_std,total_rotated_range,total_rotated_maxtoMin,total_rotated_mean_abs_chg,total_rotated_mean_change_of_abs_change,total_rotated_abs_max,total_rotated_abs_min,total_rotated_abs_avg
series_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1
0,0.000606,-0.003281,0.097597,-0.100359,0.037477,0.197956,-0.972476,0.028929,-0.00054,0.100359,0.000107,0.050233,7.1e-05,-0.000564,0.04539,-0.063419,0.020777,0.10881,-0.71572,0.021811,0.000169,0.063419,0.000168,0.031794,-0.0004,-0.000551,0.032482,-0.024468,0.009399,0.05695,-1.327492,0.009038,9.3e-05,0.032482,7.7e-05,0.016279,0.261341,0.285042,1.648423,-1.268349,0.708672,2.916772,-1.299661,0.160546,-0.00225,1.648423,0.016849,0.832636,3.002412,3.392663,4.623803,0.54023,1.236915,4.083573,8.558946,0.173883,-0.000399,4.623803,0.54023,2.582017,-9.314346,-9.312408,-8.324252,-10.735116,0.473874,2.410865,0.775423,0.272874,0.001272,10.735116,8.324252,9.529684,-2.786008,-2.784012,-2.381788,-3.199514,0.188563,0.817726,0.744422,0.085946,-0.000212,3.199514,2.381788,2.790651,-0.457298,-0.454697,0.731032,-1.704916,0.459176,2.435948,-0.428779,0.244647,0.001634,1.704916,0.003739,0.854328,1.329938,1.747534,2.953737,-1.233324,1.243816,4.187061,-2.39494,0.197722,6e-05,2.953737,0.0299,1.491819,9.892753,9.941641,10.888026,8.659276,0.462628,2.22875,1.257383,0.24121,0.00069,10.888026,8.659276,9.773651,3.37356,3.349282,4.198141,2.658943,0.373286,1.539198,1.578876,0.089777,-0.000314,4.198141,2.658943,3.428542
1,-0.00011,-0.004165,0.179486,-0.198174,0.079273,0.37766,-0.905697,0.063771,-0.001595,0.198174,0.000626,0.0994,0.000108,-0.00141,0.113061,-0.131297,0.044342,0.244358,-0.861107,0.043876,0.00019,0.131297,5.3e-05,0.065675,0.000853,0.001625,0.093227,-0.090793,0.048279,0.184021,-1.026808,0.019224,-0.000366,0.093227,0.000149,0.046688,0.163601,0.005907,2.1848,-0.871621,0.626577,3.056421,-2.506593,0.265054,-0.003635,2.1848,0.003728,1.094264,2.772722,2.871693,5.489065,-0.283137,1.557537,5.772201,-19.386629,0.296476,-0.001489,5.489065,0.208964,2.849015,-9.382948,-9.333242,-7.274143,-13.10935,0.847922,5.835207,0.554882,0.527628,-0.006774,13.10935,7.274143,10.191747,0.640771,0.457868,2.743717,-1.006617,0.845558,3.750334,-2.725682,0.249657,-0.00461,2.743717,0.001901,1.372809,-11.693977,-11.459131,-8.766004,-15.946338,1.393617,7.180335,0.549719,0.433182,-0.003622,15.946338,8.766004,12.356171,-9.580283,-9.62191,-7.165724,-11.425438,0.891246,4.259714,0.627173,0.40463,-0.005102,11.425438,7.165724,9.295581,9.930341,9.926521,13.113474,8.015228,0.811051,5.098246,1.63607,0.46638,-0.006679,13.113474,8.015228,10.564351,15.20769,15.101457,19.710547,12.518797,1.049165,7.19175,1.574476,0.536155,-0.006686,19.710547,12.518797,16.114672
2,-0.000104,0.004176,0.093036,-0.125226,0.036439,0.218262,-0.742945,0.028808,0.000165,0.125226,0.000693,0.06296,-0.000265,0.000333,0.090551,-0.101291,0.035535,0.191842,-0.89396,0.03483,-0.000128,0.101291,4.9e-05,0.05067,-0.000197,0.000286,0.086293,-0.095494,0.032242,0.181786,-0.903647,0.01475,-2.6e-05,0.095494,0.000132,0.047813,0.17719,0.214934,1.052559,-1.108061,0.346165,2.16062,-0.949911,0.193576,4.9e-05,1.108061,0.001093,0.554577,2.897345,3.076755,5.655919,-0.462089,1.529911,6.118008,-12.239887,0.249995,-5.6e-05,5.655919,0.085409,2.870664,-9.329791,-9.340382,-5.7442,-10.710896,0.675885,4.966696,0.536295,0.2876,0.000202,10.710896,5.7442,8.227548,-2.016696,-1.99744,-1.098183,-2.840675,0.35468,1.742493,0.386592,0.134776,2.7e-05,2.840675,1.098183,1.969429,-3.122883,-3.219814,-0.556854,-6.446817,1.084209,5.889964,0.086377,0.327579,0.000164,6.446817,0.556854,3.501836,6.903779,7.032563,9.080385,4.360574,1.227132,4.71981,2.082383,0.203334,0.000118,9.080385,4.360574,6.72048,9.902159,9.945025,11.163415,6.794325,0.56143,4.36909,1.64305,0.240515,0.000238,11.163415,6.794325,8.97887,7.905883,8.046596,10.389493,5.222576,1.333328,5.166917,1.989343,0.217048,-0.000898,10.389493,5.222576,7.806034
3,-0.00098,-0.004165,0.410808,-0.398074,0.138277,0.808882,-1.03199,0.107176,-0.002837,0.410808,0.00046,0.205634,0.000937,-0.003439,0.125392,-0.154037,0.043004,0.279429,-0.814034,0.0423,-0.000254,0.154037,0.000146,0.077091,-0.000461,0.00122,0.06488,-0.0716,0.024167,0.13648,-0.906142,0.019724,-0.000494,0.0716,0.000571,0.036086,0.226659,0.161048,2.651966,-0.989965,0.686565,3.641932,-2.678848,0.36932,0.004151,2.651966,0.001294,1.32663,2.720704,3.166031,8.812608,-3.2518,3.64241,12.064407,-2.710071,0.555371,-0.003488,8.812608,0.018518,4.415563,-9.44036,-9.290498,-6.28389,-13.624317,1.300192,7.340427,0.461226,0.865445,-0.011674,13.624317,6.28389,9.954104,1.036367,1.131861,4.644175,-1.460856,1.112844,6.105031,-3.179078,0.364618,0.00066,4.644175,0.01367,2.328923,-13.050474,-12.409666,-7.298656,-19.902808,3.205823,12.604152,0.366715,0.821326,-0.010548,19.902808,7.298656,13.600732,-9.213461,-9.209364,-4.364744,-13.063699,1.952122,8.698955,0.334112,0.588761,-0.007304,13.063699,4.364744,8.714221,10.4994,10.662237,13.971006,7.221134,1.287646,6.749872,1.934738,0.746214,-0.008815,13.971006,7.221134,10.59607,16.361546,15.965073,22.117047,12.389549,1.952732,9.727498,1.785137,0.880926,-0.012699,22.117047,12.389549,17.253298
4,0.00039,-0.000655,0.079432,-0.081673,0.031496,0.161105,-0.972554,0.023166,-0.000116,0.081673,0.000981,0.041327,-0.000955,-0.002589,0.052542,-0.072168,0.017189,0.12471,-0.728048,0.010807,-0.000318,0.072168,7.2e-05,0.03612,0.000408,0.001068,0.072612,-0.041661,0.022075,0.114273,-1.742955,0.007394,-0.000696,0.072612,7.5e-05,0.036344,-0.071336,-0.027444,0.481878,-1.017074,0.338622,1.498953,-0.473789,0.073541,0.000354,1.017074,0.002765,0.50992,2.981143,3.207887,4.41396,0.574627,1.054487,3.839333,7.681434,0.128102,0.001003,4.41396,0.574627,2.494294,-9.358524,-9.358517,-8.515674,-10.188441,0.385351,1.672767,0.835817,0.210181,0.00031,10.188441,8.515674,9.352057,1.476937,1.561688,2.300707,0.745411,0.3638,1.555296,3.086493,0.073744,0.000412,2.300707,0.745411,1.523059,-15.036692,-14.968034,-13.517113,-17.030841,0.77925,3.513728,0.793684,0.194222,0.000159,17.030841,13.517113,15.273977,-8.178337,-8.493046,-6.270627,-9.284312,0.827988,3.013685,0.6754,0.13275,0.000925,9.284312,6.270627,7.777469,9.882643,9.930058,10.659495,8.868287,0.416354,1.791208,1.201979,0.192187,0.000683,10.659495,8.868287,9.763891,17.215018,17.188742,18.452243,16.13265,0.477049,2.319593,1.143783,0.211437,0.000303,18.452243,16.13265,17.292446


In [71]:
X_train = feat_eng(X_train)
X_test = feat_eng(X_test)


  0%|          | 0/11 [00:00<?, ?it/s][A
  9%|▉         | 1/11 [00:02<00:20,  2.07s/it][A
 18%|█▊        | 2/11 [00:04<00:18,  2.07s/it][A
 27%|██▋       | 3/11 [00:06<00:16,  2.07s/it][A
 36%|███▋      | 4/11 [00:08<00:14,  2.08s/it][A
 45%|████▌     | 5/11 [00:10<00:12,  2.08s/it][A
 55%|█████▍    | 6/11 [00:12<00:10,  2.07s/it][A
 64%|██████▎   | 7/11 [00:14<00:08,  2.08s/it][A
 73%|███████▎  | 8/11 [00:16<00:06,  2.09s/it][A
 82%|████████▏ | 9/11 [00:18<00:04,  2.08s/it][A
 91%|█████████ | 10/11 [00:20<00:02,  2.08s/it][A
100%|██████████| 11/11 [00:22<00:00,  2.07s/it][A
[A
  0%|          | 0/11 [00:00<?, ?it/s][A
  9%|▉         | 1/11 [00:02<00:21,  2.10s/it][A
 18%|█▊        | 2/11 [00:04<00:18,  2.09s/it][A
 27%|██▋       | 3/11 [00:06<00:16,  2.10s/it][A
 36%|███▋      | 4/11 [00:08<00:14,  2.13s/it][A
 45%|████▌     | 5/11 [00:10<00:12,  2.16s/it][A
 55%|█████▍    | 6/11 [00:12<00:10,  2.19s/it][A
 64%|██████▎   | 7/11 [00:15<00:08,  2.20s/it][A
 73%|████

In [73]:
y = np.concatenate((np.zeros(len(X_train)), np.ones(len(X_test))), axis=0)
X = np.concatenate((X_train, X_test), axis=0)

In [74]:
splits = list(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=2019).split(X, y))

clf_2 = []
val_roc = []
val_pred = pd.DataFrame()

lgb_params = {
               'feature_fraction': 0.8,
               'metric': 'binary_logloss',
               'nthread':8, 
               'learning_rate': 0.1, 
               'objective': 'binary',
               #'num_class': class_num,
               'num_leaves': 2**4,
               'verbose':0, 
               'seed':123
              }


for train_idx, val_idx in splits:
    X_tr, y_tr = X[train_idx, :], y[train_idx]
    X_val, y_val = X[val_idx, :], y[val_idx]
 
    model_lgb = lgb.train(lgb_params, lgb.Dataset(X_tr, label=y_tr), 500,\
                           valid_sets=lgb.Dataset(X_val, label=y_val), early_stopping_rounds=30)
    pred_lgb = model_lgb.predict(X_val)
    
    val_roc.append(roc_auc_score(y_val, (pred_lgb>0.5).astype(int)))
    
    pred_lgb = pd.DataFrame(pred_lgb, index=val_idx)
    val_pred = pd.concat([val_pred, pred_lgb], axis=0)
    
    clf_2.append(model_lgb)

[1]	valid_0's binary_logloss: 0.68334
Training until validation scores don't improve for 30 rounds.
[2]	valid_0's binary_logloss: 0.676336
[3]	valid_0's binary_logloss: 0.669112
[4]	valid_0's binary_logloss: 0.662645
[5]	valid_0's binary_logloss: 0.657857
[6]	valid_0's binary_logloss: 0.65382
[7]	valid_0's binary_logloss: 0.648721
[8]	valid_0's binary_logloss: 0.643948
[9]	valid_0's binary_logloss: 0.639753
[10]	valid_0's binary_logloss: 0.636565
[11]	valid_0's binary_logloss: 0.634422
[12]	valid_0's binary_logloss: 0.631367
[13]	valid_0's binary_logloss: 0.629279
[14]	valid_0's binary_logloss: 0.626843
[15]	valid_0's binary_logloss: 0.625203
[16]	valid_0's binary_logloss: 0.623948
[17]	valid_0's binary_logloss: 0.622189
[18]	valid_0's binary_logloss: 0.619698
[19]	valid_0's binary_logloss: 0.618736
[20]	valid_0's binary_logloss: 0.618589
[21]	valid_0's binary_logloss: 0.618156
[22]	valid_0's binary_logloss: 0.617454
[23]	valid_0's binary_logloss: 0.615696
[24]	valid_0's binary_logloss

[244]	valid_0's binary_logloss: 0.589481
[245]	valid_0's binary_logloss: 0.589458
[246]	valid_0's binary_logloss: 0.589428
[247]	valid_0's binary_logloss: 0.589406
[248]	valid_0's binary_logloss: 0.589521
[249]	valid_0's binary_logloss: 0.590175
[250]	valid_0's binary_logloss: 0.59002
[251]	valid_0's binary_logloss: 0.58989
[252]	valid_0's binary_logloss: 0.589826
[253]	valid_0's binary_logloss: 0.589901
[254]	valid_0's binary_logloss: 0.590144
Early stopping, best iteration is:
[224]	valid_0's binary_logloss: 0.588657
[1]	valid_0's binary_logloss: 0.682864
Training until validation scores don't improve for 30 rounds.
[2]	valid_0's binary_logloss: 0.675315
[3]	valid_0's binary_logloss: 0.668563
[4]	valid_0's binary_logloss: 0.662549
[5]	valid_0's binary_logloss: 0.657628
[6]	valid_0's binary_logloss: 0.654022
[7]	valid_0's binary_logloss: 0.650957
[8]	valid_0's binary_logloss: 0.647431
[9]	valid_0's binary_logloss: 0.643978
[10]	valid_0's binary_logloss: 0.640878
[11]	valid_0's binary_

[112]	valid_0's binary_logloss: 0.604223
[113]	valid_0's binary_logloss: 0.604268
[114]	valid_0's binary_logloss: 0.603257
[115]	valid_0's binary_logloss: 0.602546
[116]	valid_0's binary_logloss: 0.602824
[117]	valid_0's binary_logloss: 0.60275
[118]	valid_0's binary_logloss: 0.602784
[119]	valid_0's binary_logloss: 0.603102
[120]	valid_0's binary_logloss: 0.603076
[121]	valid_0's binary_logloss: 0.603562
[122]	valid_0's binary_logloss: 0.603468
[123]	valid_0's binary_logloss: 0.603964
[124]	valid_0's binary_logloss: 0.604026
[125]	valid_0's binary_logloss: 0.604208
[126]	valid_0's binary_logloss: 0.604041
[127]	valid_0's binary_logloss: 0.603874
[128]	valid_0's binary_logloss: 0.603798
[129]	valid_0's binary_logloss: 0.604355
[130]	valid_0's binary_logloss: 0.604712
[131]	valid_0's binary_logloss: 0.60483
Early stopping, best iteration is:
[101]	valid_0's binary_logloss: 0.60127
[1]	valid_0's binary_logloss: 0.685324
Training until validation scores don't improve for 30 rounds.
[2]	va

In [75]:
val_roc

[0.6759152934240542,
 0.6622741601606754,
 0.6616006465479274,
 0.6752495023166499]

In [79]:
feature_imp = np.zeros(X.shape[1])
for model in clf_2:
    feature_imp+=model.feature_importance(importance_type='gain')

pd.DataFrame(feature_imp, index=X_train.columns).sort_values(by=0, ascending=False)

Unnamed: 0,0
rotated_acceleration_Y_mean,2166.857557
rotated_acceleration_Y_median,2056.640193
rotated_acceleration_Z_max,1886.185429
rotated_acceleration_X_median,1777.885679
rotated_acceleration_Z_mean,1746.509181
total_rotated_mean_abs_chg,1366.213458
total_rotated_mean,1351.796233
rotated_acceleration_X_mean,1343.400647
rotated_acceleration_Z_mean_abs_chg,1341.344091
linear_acceleration_Y_mean,1227.917332


In [120]:
splits = list(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=2019).split(X_train, y_id))

clf_3 = []
val_acc = []
val_pred = pd.DataFrame()

lgb_params = {
               'feature_fraction': 0.8,
               'metric': 'multi_logloss',
               'nthread':8, 
               'learning_rate': 0.1, 
               'objective': 'multiclass',
               'num_class': class_num,
               'num_leaves': 2**4,
               'verbose':0, 
               'seed':123
              }

for train_idx, val_idx in splits:
    X_tr, y_tr = X_train.iloc[train_idx, :], y_id[train_idx]
    X_val, y_val = X_train.iloc[val_idx, :], y_id[val_idx]
    
    model_lgb = lgb.train(lgb_params, lgb.Dataset(X_tr, label=y_tr), 500,\
                           valid_sets=lgb.Dataset(X_val, label=y_val), early_stopping_rounds=30)
    pred_lgb = model_lgb.predict(X_val)

    val_acc.append(accuracy_score(y_val, np.argmax(pred_lgb, axis=1)))

#pred_lgb = pd.DataFrame(pred_lgb, index=np.where(np.mean(adv_val, axis=0)>0.3))
#val_pred = pd.concat([val_pred, pred_lgb], axis=0)

clf_3.append(model_lgb)

[1]	valid_0's multi_logloss: 1.84563
Training until validation scores don't improve for 30 rounds.
[2]	valid_0's multi_logloss: 1.71864
[3]	valid_0's multi_logloss: 1.62027
[4]	valid_0's multi_logloss: 1.53175
[5]	valid_0's multi_logloss: 1.45628
[6]	valid_0's multi_logloss: 1.389
[7]	valid_0's multi_logloss: 1.33392
[8]	valid_0's multi_logloss: 1.28629
[9]	valid_0's multi_logloss: 1.23916
[10]	valid_0's multi_logloss: 1.19933
[11]	valid_0's multi_logloss: 1.16176
[12]	valid_0's multi_logloss: 1.12726
[13]	valid_0's multi_logloss: 1.09842
[14]	valid_0's multi_logloss: 1.06967
[15]	valid_0's multi_logloss: 1.04272
[16]	valid_0's multi_logloss: 1.01995
[17]	valid_0's multi_logloss: 0.995429
[18]	valid_0's multi_logloss: 0.974075
[19]	valid_0's multi_logloss: 0.955057
[20]	valid_0's multi_logloss: 0.936545
[21]	valid_0's multi_logloss: 0.920179
[22]	valid_0's multi_logloss: 0.903914
[23]	valid_0's multi_logloss: 0.889527
[24]	valid_0's multi_logloss: 0.875519
[25]	valid_0's multi_logloss:

[87]	valid_0's multi_logloss: 0.671339
[88]	valid_0's multi_logloss: 0.67081
[89]	valid_0's multi_logloss: 0.670876
[90]	valid_0's multi_logloss: 0.670847
[91]	valid_0's multi_logloss: 0.670136
[92]	valid_0's multi_logloss: 0.670109
[93]	valid_0's multi_logloss: 0.670409
[94]	valid_0's multi_logloss: 0.670491
[95]	valid_0's multi_logloss: 0.669626
[96]	valid_0's multi_logloss: 0.668974
[97]	valid_0's multi_logloss: 0.669573
[98]	valid_0's multi_logloss: 0.669157
[99]	valid_0's multi_logloss: 0.669887
[100]	valid_0's multi_logloss: 0.669467
[101]	valid_0's multi_logloss: 0.669935
[102]	valid_0's multi_logloss: 0.668433
[103]	valid_0's multi_logloss: 0.669496
[104]	valid_0's multi_logloss: 0.669043
[105]	valid_0's multi_logloss: 0.668878
[106]	valid_0's multi_logloss: 0.668653
[107]	valid_0's multi_logloss: 0.668574
[108]	valid_0's multi_logloss: 0.668479
[109]	valid_0's multi_logloss: 0.668754
[110]	valid_0's multi_logloss: 0.668444
[111]	valid_0's multi_logloss: 0.668867
[112]	valid_0'

[34]	valid_0's multi_logloss: 0.77081
[35]	valid_0's multi_logloss: 0.763369
[36]	valid_0's multi_logloss: 0.75726
[37]	valid_0's multi_logloss: 0.751872
[38]	valid_0's multi_logloss: 0.746955
[39]	valid_0's multi_logloss: 0.740279
[40]	valid_0's multi_logloss: 0.73561
[41]	valid_0's multi_logloss: 0.731141
[42]	valid_0's multi_logloss: 0.727812
[43]	valid_0's multi_logloss: 0.723896
[44]	valid_0's multi_logloss: 0.722271
[45]	valid_0's multi_logloss: 0.719072
[46]	valid_0's multi_logloss: 0.715133
[47]	valid_0's multi_logloss: 0.711604
[48]	valid_0's multi_logloss: 0.707766
[49]	valid_0's multi_logloss: 0.7055
[50]	valid_0's multi_logloss: 0.703284
[51]	valid_0's multi_logloss: 0.699102
[52]	valid_0's multi_logloss: 0.697729
[53]	valid_0's multi_logloss: 0.694169
[54]	valid_0's multi_logloss: 0.692314
[55]	valid_0's multi_logloss: 0.689731
[56]	valid_0's multi_logloss: 0.687826
[57]	valid_0's multi_logloss: 0.686024
[58]	valid_0's multi_logloss: 0.683758
[59]	valid_0's multi_logloss: 

In [121]:
val_acc

[0.7771966527196653,
 0.7670514165792235,
 0.7468487394957983,
 0.7818756585879874]

In [122]:
feature_imp = np.zeros(X_train.shape[1])
for model in clf_3:
    feature_imp+=model.feature_importance(importance_type='gain')

pd.DataFrame(feature_imp, index=X_train.columns).sort_values(by=0, ascending=False)

Unnamed: 0,0
angular_velocity_Y_mean_abs_chg,4546.861339
rotated_acceleration_Z_mean_abs_chg,2537.041681
rotated_acceleration_Z_mean,2431.624621
rotated_acceleration_Z_abs_max,2071.468028
rotated_acceleration_X_mean_abs_chg,2029.43026
rotated_acceleration_Y_mean_abs_chg,1823.943768
rotated_acceleration_Y_mean,1502.416432
rotated_acceleration_Z_median,1488.764854
linear_acceleration_X_mean_abs_chg,1377.471178
linear_acceleration_Z_std,1317.577646


In [115]:
test_pred = []
for model in clf:
    pred_test = model.predict(test)
    test_pred.append(pred_test)

In [105]:
test_pred = (test_pred[0]+test_pred[1]+test_pred[2]+test_pred[3])/4

In [106]:
test_pr = pd.concat([pd.Series(np.argmax(test_pred, axis=1)), pd.Series(np.max(test_pred, axis=1))], axis=1)
test_pr.columns = ['group_id', 'prob']

In [107]:
test_pr

Unnamed: 0,group_id,prob
0,33,0.886673
1,31,0.750421
2,55,0.990812
3,15,0.517175
4,15,0.982744
5,62,0.996977
6,61,0.619200
7,61,0.425734
8,65,0.548020
9,65,0.590919


In [110]:
gr_sr = pd.DataFrame()
for i in range(73):
    gr = pd.DataFrame({'group_id':[i], 'surface':[y_train[y_train['group_id']==i]['surface'].iloc[0]]})
    gr_sr = pd.concat([gr_sr, gr], axis=0)

In [111]:
test_pr = test_pr.merge(gr_sr, how='left', on='group_id', sort=False)

In [125]:
test_gr_idx = test_pr[test_pr['prob']<=0.9].index

In [126]:
X_test.loc[test_gr_idx, :].head()

Unnamed: 0,angular_velocity_X_mean,angular_velocity_X_median,angular_velocity_X_max,angular_velocity_X_min,angular_velocity_X_std,angular_velocity_X_range,angular_velocity_X_maxtoMin,angular_velocity_X_mean_abs_chg,angular_velocity_X_mean_change_of_abs_change,angular_velocity_X_abs_max,angular_velocity_X_abs_min,angular_velocity_X_abs_avg,angular_velocity_Y_mean,angular_velocity_Y_median,angular_velocity_Y_max,angular_velocity_Y_min,angular_velocity_Y_std,angular_velocity_Y_range,angular_velocity_Y_maxtoMin,angular_velocity_Y_mean_abs_chg,angular_velocity_Y_mean_change_of_abs_change,angular_velocity_Y_abs_max,angular_velocity_Y_abs_min,angular_velocity_Y_abs_avg,angular_velocity_Z_mean,angular_velocity_Z_median,angular_velocity_Z_max,angular_velocity_Z_min,angular_velocity_Z_std,angular_velocity_Z_range,angular_velocity_Z_maxtoMin,angular_velocity_Z_mean_abs_chg,angular_velocity_Z_mean_change_of_abs_change,angular_velocity_Z_abs_max,angular_velocity_Z_abs_min,angular_velocity_Z_abs_avg,linear_acceleration_X_mean,linear_acceleration_X_median,linear_acceleration_X_max,linear_acceleration_X_min,linear_acceleration_X_std,linear_acceleration_X_range,linear_acceleration_X_maxtoMin,linear_acceleration_X_mean_abs_chg,linear_acceleration_X_mean_change_of_abs_change,linear_acceleration_X_abs_max,linear_acceleration_X_abs_min,linear_acceleration_X_abs_avg,linear_acceleration_Y_mean,linear_acceleration_Y_median,linear_acceleration_Y_max,linear_acceleration_Y_min,linear_acceleration_Y_std,linear_acceleration_Y_range,linear_acceleration_Y_maxtoMin,linear_acceleration_Y_mean_abs_chg,linear_acceleration_Y_mean_change_of_abs_change,linear_acceleration_Y_abs_max,linear_acceleration_Y_abs_min,linear_acceleration_Y_abs_avg,linear_acceleration_Z_mean,linear_acceleration_Z_median,linear_acceleration_Z_max,linear_acceleration_Z_min,linear_acceleration_Z_std,linear_acceleration_Z_range,linear_acceleration_Z_maxtoMin,linear_acceleration_Z_mean_abs_chg,linear_acceleration_Z_mean_change_of_abs_change,linear_acceleration_Z_abs_max,linear_acceleration_Z_abs_min,linear_acceleration_Z_abs_avg,rotated_acceleration_X_mean,rotated_acceleration_X_median,rotated_acceleration_X_max,rotated_acceleration_X_min,rotated_acceleration_X_std,rotated_acceleration_X_range,rotated_acceleration_X_maxtoMin,rotated_acceleration_X_mean_abs_chg,rotated_acceleration_X_mean_change_of_abs_change,rotated_acceleration_X_abs_max,rotated_acceleration_X_abs_min,rotated_acceleration_X_abs_avg,rotated_acceleration_Y_mean,rotated_acceleration_Y_median,rotated_acceleration_Y_max,rotated_acceleration_Y_min,rotated_acceleration_Y_std,rotated_acceleration_Y_range,rotated_acceleration_Y_maxtoMin,rotated_acceleration_Y_mean_abs_chg,rotated_acceleration_Y_mean_change_of_abs_change,rotated_acceleration_Y_abs_max,rotated_acceleration_Y_abs_min,rotated_acceleration_Y_abs_avg,rotated_acceleration_Z_mean,rotated_acceleration_Z_median,rotated_acceleration_Z_max,rotated_acceleration_Z_min,rotated_acceleration_Z_std,rotated_acceleration_Z_range,rotated_acceleration_Z_maxtoMin,rotated_acceleration_Z_mean_abs_chg,rotated_acceleration_Z_mean_change_of_abs_change,rotated_acceleration_Z_abs_max,rotated_acceleration_Z_abs_min,rotated_acceleration_Z_abs_avg,total_acce_mean,total_acce_median,total_acce_max,total_acce_min,total_acce_std,total_acce_range,total_acce_maxtoMin,total_acce_mean_abs_chg,total_acce_mean_change_of_abs_change,total_acce_abs_max,total_acce_abs_min,total_acce_abs_avg,total_rotated_mean,total_rotated_median,total_rotated_max,total_rotated_min,total_rotated_std,total_rotated_range,total_rotated_maxtoMin,total_rotated_mean_abs_chg,total_rotated_mean_change_of_abs_change,total_rotated_abs_max,total_rotated_abs_min,total_rotated_abs_avg
0,0.000527,-0.002536,0.209879,-0.195518,0.080999,0.405396,-1.07345,0.067576,0.000313,0.209879,0.000658,0.105268,0.000534,-0.000728,0.078114,-0.092062,0.03802,0.170176,-0.848501,0.039607,-0.000194,0.092062,0.000488,0.046275,-0.001544,-0.002706,0.079124,-0.077438,0.032418,0.156562,-1.021769,0.019806,0.00027,0.079124,0.000638,0.039881,0.137738,0.09961,1.892403,-1.462907,0.644636,3.35531,-1.293591,0.337759,-0.001286,1.892403,0.011278,0.951841,2.893241,2.919398,4.679984,1.530144,0.612544,3.14984,3.058525,0.29356,0.003107,4.679984,1.530144,3.105064,-9.272861,-9.253101,-5.5439,-11.78881,0.924199,6.24491,0.470268,0.607199,0.003984,11.78881,5.5439,8.666355,0.696565,0.724738,2.341777,-0.927213,0.672642,3.26899,-2.525609,0.330683,-8.3e-05,2.341777,0.022455,1.182116,-12.04709,-11.978648,-10.536158,-13.919588,0.638918,3.383431,0.75693,0.311925,0.003931,13.919588,10.536158,12.227873,9.403173,9.376556,11.955161,5.745154,0.913195,6.210006,2.080912,0.599245,0.003801,11.955161,5.745154,8.850158,9.765265,9.684894,12.311647,6.959611,0.807782,5.352036,1.769014,0.538158,0.003676,12.311647,6.959611,9.635629,15.340652,15.345223,17.463499,14.135123,0.621292,3.328376,1.235468,0.279557,8e-05,17.463499,14.135123,15.799311
1,-0.001429,0.001512,0.139683,-0.218541,0.047952,0.358224,-0.639161,0.031169,-0.000402,0.218541,4.4e-05,0.109292,-0.000342,5.6e-05,0.051398,-0.045701,0.021691,0.097099,-1.124649,0.022339,0.000232,0.051398,6.1e-05,0.025729,0.001503,0.000834,0.058009,-0.051589,0.027321,0.109599,-1.124443,0.009928,0.000161,0.058009,0.000792,0.029401,0.041258,-0.011506,1.472815,-1.106017,0.584671,2.578831,-1.331638,0.146329,-0.00242,1.472815,0.000602,0.736708,2.951856,3.386089,5.64867,-0.429825,1.795417,6.078496,-13.141779,0.239658,-0.001963,5.64867,0.000899,2.824784,-9.388576,-9.369192,-7.494543,-11.249769,0.531594,3.755226,0.666195,0.279108,0.004986,11.249769,7.494543,9.372156,0.972746,0.822856,2.957158,-0.154395,0.707127,3.111553,-19.153235,0.137695,0.002237,2.957158,0.001307,1.479233,-13.270252,-12.949131,-10.879253,-16.034048,1.488349,5.154795,0.678509,0.310772,-0.002129,16.034048,10.879253,13.456651,-9.157161,-9.3793,-7.11577,-10.753614,1.028349,3.637844,0.66171,0.2046,0.007317,10.753614,7.11577,8.934692,10.019777,10.026302,11.700192,7.778298,0.533571,3.921893,1.50421,0.228699,0.006541,11.700192,7.778298,9.739245,16.245338,16.018893,18.114826,14.361959,0.858725,3.752866,1.261306,0.303182,0.002147,18.114826,14.361959,16.238392
3,-0.000106,0.000209,0.048927,-0.077246,0.021186,0.126173,-0.633402,0.016593,-0.000248,0.077246,8.4e-05,0.038665,-0.000455,0.000295,0.018543,-0.022694,0.008853,0.041237,-0.817082,0.007044,-6e-06,0.022694,1e-05,0.011352,0.002761,0.002596,0.081411,-0.068555,0.026704,0.149966,-1.187522,0.006611,8.6e-05,0.081411,3.9e-05,0.040725,0.175677,0.153055,0.62251,-0.369008,0.204049,0.991518,-1.686983,0.065853,-0.000405,0.62251,0.003487,0.312999,2.968651,2.987517,3.357884,2.517275,0.198081,0.840609,1.333936,0.065201,0.000272,3.357884,2.517275,2.937579,-9.341113,-9.391062,-8.5885,-9.73327,0.229445,1.14477,0.882386,0.137535,-0.001084,9.73327,8.5885,9.160885,-0.11683,-0.114188,0.396096,-0.48812,0.192276,0.884216,-0.811473,0.063063,0.000262,0.48812,0.000357,0.244238,-9.485447,-9.466972,-9.037052,-9.880726,0.206839,0.843674,0.914614,0.087215,0.000577,9.880726,9.037052,9.458889,9.795344,9.814986,10.170945,9.169525,0.213204,1.001419,1.109212,0.121294,-0.00053,10.170945,9.169525,9.670235,9.807544,9.825914,10.179361,9.204143,0.212234,0.975218,1.105954,0.122297,-0.000426,10.179361,9.204143,9.691752,13.639133,13.658738,13.9987,13.203263,0.186204,0.795438,1.060246,0.060282,0.001132,13.9987,13.203263,13.600982
6,0.000617,-0.004446,0.144813,-0.07583,0.040604,0.220642,-1.909703,0.029915,-0.00126,0.144813,0.000291,0.072552,0.000787,0.000556,0.17437,-0.155939,0.045643,0.330309,-1.118195,0.038355,-0.000166,0.17437,0.000132,0.087251,-0.002907,0.001021,0.204577,-0.293419,0.084786,0.497996,-0.697217,0.022658,-0.000636,0.293419,0.000693,0.147056,0.186091,0.167608,1.694091,-1.122858,0.652448,2.81695,-1.508731,0.244058,-0.003527,1.694091,0.006734,0.850413,3.108106,3.099533,6.548771,-0.450903,2.03011,6.999673,-14.52369,0.259023,-0.000659,6.548771,0.003082,3.275926,-9.307932,-9.285323,-7.683653,-11.257965,0.64321,3.574312,0.682508,0.310979,0.005104,11.257965,7.683653,9.470809,2.364421,2.368827,3.351227,1.612933,0.363772,1.738294,2.077723,0.124344,0.000351,3.351227,1.612933,2.48208,-17.254588,-17.013065,-14.61731,-19.887521,1.006635,5.270211,0.734999,0.334413,0.002644,19.887521,14.61731,17.252415,-5.893706,-5.728794,-2.570708,-9.114565,1.858758,6.543857,0.282044,0.233771,0.000899,9.114565,2.570708,5.842637,10.036029,9.908676,11.860798,8.649857,0.732712,3.210941,1.371213,0.281036,0.004255,11.860798,8.649857,10.255328,18.497383,18.480507,20.558593,17.071075,0.679838,3.487518,1.204294,0.314209,0.004248,20.558593,17.071075,18.814834
7,0.001221,0.019509,0.685003,-0.664819,0.237565,1.349822,-1.03036,0.183251,-0.002203,0.685003,0.00617,0.345587,-0.000544,-0.002047,0.194662,-0.177762,0.057858,0.372424,-1.095071,0.051763,-0.000279,0.194662,0.000155,0.097408,0.000874,0.003797,0.093126,-0.118083,0.049403,0.211209,-0.788642,0.036377,-0.000121,0.118083,0.000358,0.059221,-0.065923,-0.203137,5.366221,-6.819955,2.954766,12.186176,-0.786841,0.665497,-0.002725,6.819955,0.138788,3.479371,3.053736,3.232261,5.584511,0.014867,1.377536,5.569644,375.632836,0.737491,0.002895,5.584511,0.014867,2.799689,-9.478258,-9.290565,-0.907846,-15.4641,2.315135,14.556254,0.058707,1.508968,0.002132,15.4641,0.907846,8.185973,2.843221,2.814379,4.889592,0.89445,0.830226,3.995143,5.466594,0.43466,0.001106,4.889592,0.89445,2.892021,-19.269956,-19.250469,-13.154559,-25.130177,2.320453,11.975618,0.523457,1.379567,0.002572,25.130177,13.154559,19.142368,-1.398435,-1.633542,1.875953,-4.748272,1.543475,6.624226,-0.395081,0.91617,0.003039,4.748272,0.041548,2.39491,10.559835,10.359956,15.636746,6.735019,1.883564,8.901727,2.321708,1.220756,0.000743,15.636746,6.735019,11.185883,19.600812,19.64869,25.649826,14.06206,2.36913,11.587766,1.824045,1.375999,0.001962,25.649826,14.06206,19.855943


In [133]:
preds_test = []

for model in clf_3:
    preds_test.append(model.predict(X_test.loc[test_gr_idx, :]))

In [135]:
preds_test = np.argmax(np.mean(preds_test, axis=0), axis=1)

In [136]:
test_surface = []
for i in preds_test:
    test_surface.append(id_to_target[i])

In [137]:
test_surface = np.asarray(test_surface)

In [138]:
preds_test = pd.DataFrame(test_surface, index=test_gr_idx, columns=['surface'])

In [139]:
preds_test.head()

Unnamed: 0,surface
0,hard_tiles_large_space
1,carpet
3,soft_tiles
6,soft_pvc
7,hard_tiles_large_space


In [148]:
preds_test = pd.concat([pd.DataFrame(test_pr[test_pr['prob']>0.9]['surface']), preds_test], axis=0).sort_index()

In [149]:
submission = pd.read_csv('../input/sample_submission.csv')

In [150]:
submission['surface'] = preds_test['surface']
submission.to_csv('../output/submission_7.csv', index=False)
submission.head()

Unnamed: 0,series_id,surface
0,0,hard_tiles_large_space
1,1,carpet
2,2,tiled
3,3,soft_tiles
4,4,soft_tiles


In [None]:
cols = ['linear_acceleration_X', 'linear_acceleration_Y', 'linear_acceleration_Z', 'angular_velocity_X', 'angular_velocity_Y', 'angular_velocity_Z']
df_peak = pd.DataFrame()

for i in tqdm(range(0, len(X_train), 128)):
    X_tr = X_train.iloc[i:i+128, :]
    df_cols = pd.DataFrame()
    for col in cols:
        peaks = signal.find_peaks(X_tr[col], rel_height=1.0)[0]
        peak_num = len(peaks)
        peak_width = signal.peak_widths(X_tr[col], peaks, rel_height=1.0)[0]
        peak_pro = signal.peak_prominences(X_tr[col], peaks)[0]
        
        if peak_num != 0:
            peak_df = pd.DataFrame({
                             col+'_peak_num':[peak_num], 
                             col+'_peak_width_max':[np.max(peak_width)],
                             col+'_peak_width_min':[np.min(peak_width)],
                             col+'_peak_width_mean':[np.mean(peak_width)],
                             col+'_peak_pro_max':[np.max(peak_pro)],
                             col+'_peak_pro_min':[np.min(peak_pro)],
                             col+'_peak_pro_mean':[np.mean(peak_pro)]
                            }, index=[i//128])
        else:
            peak_df = pd.DataFrame({
                             col+'_peak_num':[peak_num], 
                             col+'_peak_width_max':[0],
                             col+'_peak_width_min':[0],
                             col+'_peak_width_mean':[0],
                             col+'_peak_pro_max':[0],
                             col+'_peak_pro_min':[0],
                             col+'_peak_pro_mean':[0]
                            }, index=[i//128])
            
        df_cols = pd.concat([df_cols, peak_df], axis=1)
    df_peak = pd.concat([df_peak, df_cols], axis=0)

In [None]:
cols = ['linear_acceleration_X', 'linear_acceleration_Y', 'linear_acceleration_Z', 'angular_velocity_X', 'angular_velocity_Y', 'angular_velocity_Z']
df_peak_te = pd.DataFrame()

for i in tqdm(range(0, len(X_test), 128)):
    X_tr = X_test.iloc[i:i+128, :]
    df_cols = pd.DataFrame()
    for col in cols:
        peaks = signal.find_peaks(X_tr[col], rel_height=1.0)[0]
        peak_num = len(peaks)
        peak_width = signal.peak_widths(X_tr[col], peaks, rel_height=1.0)[0]
        peak_pro = signal.peak_prominences(X_tr[col], peaks)[0]
        
        if peak_num != 0:
            peak_df = pd.DataFrame({
                             col+'_peak_num':[peak_num], 
                             col+'_peak_width_max':[np.max(peak_width)],
                             col+'_peak_width_min':[np.min(peak_width)],
                             col+'_peak_width_mean':[np.mean(peak_width)],
                             col+'_peak_pro_max':[np.max(peak_pro)],
                             col+'_peak_pro_min':[np.min(peak_pro)],
                             col+'_peak_pro_mean':[np.mean(peak_pro)]
                            }, index=[i//128])
        else:
            peak_df = pd.DataFrame({
                             col+'_peak_num':[peak_num], 
                             col+'_peak_width_max':[0],
                             col+'_peak_width_min':[0],
                             col+'_peak_width_mean':[0],
                             col+'_peak_pro_max':[0],
                             col+'_peak_pro_min':[0],
                             col+'_peak_pro_mean':[0]
                            }, index=[i//128])
            
        df_cols = pd.concat([df_cols, peak_df], axis=1)
    df_peak_te = pd.concat([df_peak_te, df_cols], axis=0)

In [None]:
df_peak.head()

In [None]:
def feat_eng(df):
    data = pd.DataFrame()
    #df['linear_acceleration_Z'] += 9.80665
    #df['eulerX-Z'] = df['euler_X']-df['euler_Z']
    df['total_acce'] = (df['linear_acceleration_X']**2 + df['linear_acceleration_Y']**2 + df['linear_acceleration_Z']**2)**0.5
#df['total_rotated'] = (df['rotated_acceleration_X']**2 + df['rotated_acceleration_Y']**2 + df['rotated_acceleration_Z']**2)**0.5
    #df['total_ang_cos'] = (df['angular_velocity_X_cos']**2 + df['angular_velocity_Y_cos']**2 + df['angular_velocity_Z_cos']**2)**0.5
    
    tmp_df = pd.concat([df['linear_acceleration_Z'], df['angular_velocity_X']], axis=1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    tmp_df = scaler.fit_transform(tmp_df)
    
    #df['total_angu'] = (df['angular_velocity_X']**2 + df['angular_velocity_Y']**2 + df['angular_velocity_Z']**2)**0.5
    
    #df['total_eule'] = (df['euler_X']**2 + df['euler_Y']**2 + df['euler_Z']**2)**0.5
    #df['total_xyz'] = (df['orientation_X']**2 + df['orientation_Y']**2 + df['orientation_Z']**2)**0.5
    #df['total_acc/vel'] = df['total_acce']/df['total_angu']
    
    df_columns = df.columns[3:]
    for col in tqdm(df_columns):
        data[col + '_mean'] = df.groupby(['series_id'])[col].mean()
        data[col + '_median'] = df.groupby(['series_id'])[col].median()
        data[col + '_max'] = df.groupby(['series_id'])[col].max()
        data[col + '_min'] = df.groupby(['series_id'])[col].min()
        data[col + '_std'] = df.groupby(['series_id'])[col].std()
        data[col + '_range'] = data[col + '_max'] - data[col + '_min']
        data[col + '_maxtoMin'] = data[col + '_max'] / data[col + '_min']
        data[col + '_mean_abs_chg'] = df.groupby(['series_id'])[col].apply(lambda x: np.mean(np.abs(np.diff(x))))
        data[col + '_mean_change_of_abs_change'] = df.groupby('series_id')[col].apply(mean_change_of_abs_change)
        data[col + '_abs_max'] = df.groupby(['series_id'])[col].apply(lambda x: np.max(np.abs(x)))
        data[col + '_abs_min'] = df.groupby(['series_id'])[col].apply(lambda x: np.min(np.abs(x)))
        data[col + '_abs_avg'] = (data[col + '_abs_min'] + data[col + '_abs_max'])/2
        
        
    #data['corr_linZangX'] = np.correlate(np.abs(tmp_df[:, 0]), np.abs(tmp_df[:, 1]))
    return data

In [None]:
X_train = feat_eng(X_train)
X_test = feat_eng(X_test)

In [None]:
X_train.shape

In [None]:
df_peak.shape

In [None]:
df_peak_te.shape

In [None]:
X_test.shape

In [None]:
X_train = pd.concat([X_train, df_peak], axis=1)
X_test = pd.concat([X_test, df_peak_te], axis=1)

In [None]:
data_col = X_train.columns

In [None]:
for data in [X_train, X_test]:
    data.fillna(0,inplace=True)
    data.replace(-np.inf,0,inplace=True)
    data.replace(np.inf,0,inplace=True)

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
N_SPLITS = 5

In [None]:
import tensorflow as tf
from sklearn.metrics import roc_auc_score

def auroc(y_true, y_pred):
    return tf.py_function(roc_auc_score, (y_true, y_pred), tf.double)

In [None]:
import lightgbm as lgb
from sklearn.metrics import accuracy_score

In [None]:
y = np.concatenate((np.zeros(len(X_train)), np.ones(len(X_test))), axis=0)
X = np.concatenate((X_train, X_test), axis=0)

In [None]:
splits = list(StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=2019).split(X, y))

clf_2 = []
val_roc = []
val_pred = pd.DataFrame()

lgb_params = {
               'feature_fraction': 0.8,
               'metric': 'binary_logloss',
               'nthread':8, 
               'learning_rate': 0.1, 
               'objective': 'binary',
               #'num_class': class_num,
               'num_leaves': 2**4,
               'verbose':0, 
               'seed':123
              }


for train_idx, val_idx in splits:
    X_tr, y_tr = X[train_idx, :], y[train_idx]
    X_val, y_val = X[val_idx, :], y[val_idx]
 
    model_lgb = lgb.train(lgb_params, lgb.Dataset(X_tr, label=y_tr), 500,\
                           valid_sets=lgb.Dataset(X_val, label=y_val), early_stopping_rounds=30)
    pred_lgb = model_lgb.predict(X_val)
    
    val_roc.append(roc_auc_score(y_val, (pred_lgb>0.5).astype(int)))
    
    pred_lgb = pd.DataFrame(pred_lgb, index=val_idx)
    val_pred = pd.concat([val_pred, pred_lgb], axis=0)
    
    clf_2.append(model_lgb)

In [None]:
val_roc

In [None]:
feature_imp = np.zeros(X_train.shape[1])
for model in clf_2:
    feature_imp+=model.feature_importance(importance_type='gain')

pd.DataFrame(feature_imp/5, index=data_col).sort_values(by=0, ascending=False)

In [None]:
adv_val = []
for model in clf_2:
    adv_val.append(model.predict(X_train))

In [None]:
plt.plot(np.sort(np.mean(adv_val, axis=0)))

In [None]:
preds_test = []

for model in clf:
    preds_test.append(model.predict(X_test))

In [None]:
preds_test = np.argmax(np.mean(preds_test, axis=0), axis=1)

In [None]:
test_surface = []
for i in preds_test:
    test_surface.append(id_to_target[i])

In [None]:
test_surface = np.asarray(test_surface)

In [None]:
submission = pd.read_csv('../input/sample_submission.csv')

In [None]:
submission['surface'] = test_surface
submission.to_csv('../output/submission_6.csv', index=False)
submission.head()