In [19]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import catboost as cbt
from scipy.stats import skew
from scipy.stats import kurtosis
from scipy.stats import mode
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from sklearn.model_selection import StratifiedKFold
import warnings 
warnings.filterwarnings("ignore")

In [20]:
nrows = None

df_train = pd.read_csv('sensor_train.csv',sep=',',nrows=nrows)
df_test = pd.read_csv('sensor_test.csv',sep=',',nrows=nrows)


In [21]:
df_train['flag'] = 'train'
df_test['flag'] = 'test'
df_test['behavior_id'] = -1
df_train_test = pd.concat([df_train, df_test])

In [22]:
df_train_test = df_train_test.sort_values(['flag','fragment_id','time_point'])

In [23]:
df_train_test['acc_all'] = (df_train_test['acc_x'] ** 2 + df_train_test['acc_y'] ** 2 + df_train_test['acc_z'] ** 2) ** 0.5
df_train_test['acc_allg'] = (df_train_test['acc_xg'] ** 2 + df_train_test['acc_yg'] ** 2 + df_train_test['acc_zg'] ** 2) ** 0.5

In [24]:
df_train_test.columns

Index(['fragment_id', 'time_point', 'acc_x', 'acc_y', 'acc_z', 'acc_xg',
       'acc_yg', 'acc_zg', 'behavior_id', 'flag', 'acc_all', 'acc_allg'],
      dtype='object')

In [25]:
df_train_test

Unnamed: 0,fragment_id,time_point,acc_x,acc_y,acc_z,acc_xg,acc_yg,acc_zg,behavior_id,flag,acc_all,acc_allg
0,0,71,0.2,1.0,0.6,0.2,5.3,9.2,-1,test,1.183216,10.619322
1,0,150,0.0,1.0,-0.7,0.2,6.0,8.2,-1,test,1.220656,10.162677
2,0,244,-0.2,0.8,-2.4,-0.4,5.3,7.6,-1,test,2.537716,9.274158
3,0,326,2.3,-0.4,-1.6,2.5,4.5,3.4,-1,test,2.830194,6.169279
4,0,409,-0.3,-1.4,3.3,0.2,3.5,12.4,-1,test,3.597221,12.886039
...,...,...,...,...,...,...,...,...,...,...,...,...
425354,7291,4561,-0.1,-0.5,-1.7,-0.2,3.4,7.4,18,train,1.774824,8.146165
425355,7291,4647,0.7,-1.9,1.1,0.5,1.7,10.7,18,train,2.304344,10.845736
425356,7291,4735,1.3,-1.4,-0.5,1.4,2.1,8.0,18,train,1.974842,8.388683
425357,7291,4830,1.2,-1.6,-0.1,1.6,1.8,9.2,18,train,2.002498,9.509995


# DATA AGGREGATE

In [26]:
df_train_test_list=df_train_test[['flag','fragment_id','behavior_id']]

In [27]:
agg_func = lambda x: list(x)
map_agg_func = {    
    'time_point' : agg_func,
    
    'acc_all' : agg_func,
    'acc_allg' : agg_func,
    
    'acc_x' : agg_func,
    'acc_y' : agg_func,
    'acc_z' : agg_func,
    
    'acc_xg' : agg_func,
    'acc_yg' : agg_func,
    'acc_zg' : agg_func
}
group = df_train_test.groupby(['flag','fragment_id','behavior_id']).agg(map_agg_func).reset_index()
df_train_test_list = pd.merge(df_train_test_list, group, on=['flag','fragment_id','behavior_id'], how='left')

In [28]:
df_train_test_list

Unnamed: 0,flag,fragment_id,behavior_id,time_point,acc_all,acc_allg,acc_x,acc_y,acc_z,acc_xg,acc_yg,acc_zg
0,test,0,-1,"[71, 150, 244, 326, 409, 495, 579, 665, 755, 8...","[1.1832159566199232, 1.2206555615733703, 2.537...","[10.619322012256713, 10.16267681273, 9.2741576...","[0.2, 0.0, -0.2, 2.3, -0.3, 0.3, -0.8, 0.1, -0...","[1.0, 1.0, 0.8, -0.4, -1.4, -1.1, 0.0, 0.6, 0....","[0.6, -0.7, -2.4, -1.6, 3.3, 3.2, -0.7, -0.1, ...","[0.2, 0.2, -0.4, 2.5, 0.2, 0.5, -0.3, 0.6, 0.0...","[5.3, 6.0, 5.3, 4.5, 3.5, 3.1, 4.1, 4.6, 4.6, ...","[9.2, 8.2, 7.6, 3.4, 12.4, 9.8, 8.5, 8.7, 8.1,..."
1,test,0,-1,"[71, 150, 244, 326, 409, 495, 579, 665, 755, 8...","[1.1832159566199232, 1.2206555615733703, 2.537...","[10.619322012256713, 10.16267681273, 9.2741576...","[0.2, 0.0, -0.2, 2.3, -0.3, 0.3, -0.8, 0.1, -0...","[1.0, 1.0, 0.8, -0.4, -1.4, -1.1, 0.0, 0.6, 0....","[0.6, -0.7, -2.4, -1.6, 3.3, 3.2, -0.7, -0.1, ...","[0.2, 0.2, -0.4, 2.5, 0.2, 0.5, -0.3, 0.6, 0.0...","[5.3, 6.0, 5.3, 4.5, 3.5, 3.1, 4.1, 4.6, 4.6, ...","[9.2, 8.2, 7.6, 3.4, 12.4, 9.8, 8.5, 8.7, 8.1,..."
2,test,0,-1,"[71, 150, 244, 326, 409, 495, 579, 665, 755, 8...","[1.1832159566199232, 1.2206555615733703, 2.537...","[10.619322012256713, 10.16267681273, 9.2741576...","[0.2, 0.0, -0.2, 2.3, -0.3, 0.3, -0.8, 0.1, -0...","[1.0, 1.0, 0.8, -0.4, -1.4, -1.1, 0.0, 0.6, 0....","[0.6, -0.7, -2.4, -1.6, 3.3, 3.2, -0.7, -0.1, ...","[0.2, 0.2, -0.4, 2.5, 0.2, 0.5, -0.3, 0.6, 0.0...","[5.3, 6.0, 5.3, 4.5, 3.5, 3.1, 4.1, 4.6, 4.6, ...","[9.2, 8.2, 7.6, 3.4, 12.4, 9.8, 8.5, 8.7, 8.1,..."
3,test,0,-1,"[71, 150, 244, 326, 409, 495, 579, 665, 755, 8...","[1.1832159566199232, 1.2206555615733703, 2.537...","[10.619322012256713, 10.16267681273, 9.2741576...","[0.2, 0.0, -0.2, 2.3, -0.3, 0.3, -0.8, 0.1, -0...","[1.0, 1.0, 0.8, -0.4, -1.4, -1.1, 0.0, 0.6, 0....","[0.6, -0.7, -2.4, -1.6, 3.3, 3.2, -0.7, -0.1, ...","[0.2, 0.2, -0.4, 2.5, 0.2, 0.5, -0.3, 0.6, 0.0...","[5.3, 6.0, 5.3, 4.5, 3.5, 3.1, 4.1, 4.6, 4.6, ...","[9.2, 8.2, 7.6, 3.4, 12.4, 9.8, 8.5, 8.7, 8.1,..."
4,test,0,-1,"[71, 150, 244, 326, 409, 495, 579, 665, 755, 8...","[1.1832159566199232, 1.2206555615733703, 2.537...","[10.619322012256713, 10.16267681273, 9.2741576...","[0.2, 0.0, -0.2, 2.3, -0.3, 0.3, -0.8, 0.1, -0...","[1.0, 1.0, 0.8, -0.4, -1.4, -1.1, 0.0, 0.6, 0....","[0.6, -0.7, -2.4, -1.6, 3.3, 3.2, -0.7, -0.1, ...","[0.2, 0.2, -0.4, 2.5, 0.2, 0.5, -0.3, 0.6, 0.0...","[5.3, 6.0, 5.3, 4.5, 3.5, 3.1, 4.1, 4.6, 4.6, ...","[9.2, 8.2, 7.6, 3.4, 12.4, 9.8, 8.5, 8.7, 8.1,..."
...,...,...,...,...,...,...,...,...,...,...,...,...
855536,train,7291,18,"[23, 108, 191, 280, 365, 450, 534, 619, 703, 7...","[2.5768197453450252, 1.5297058540778357, 2.186...","[8.869047299456689, 9.017760254076396, 10.8921...","[1.4, 0.1, 2.1, 1.4, 0.1, 0.8, -1.3, 0.6, 2.1,...","[1.8, 1.3, 0.6, 0.1, -1.2, 0.7, -1.4, -3.3, -3...","[-1.2, 0.8, 0.1, 0.7, 0.7, 0.1, 2.8, 8.0, 10.7...","[5.1, 5.8, 7.8, 6.9, 6.4, 6.4, 5.2, 8.8, 8.8, ...","[-7.2, -6.8, -7.6, -8.0, -7.7, -7.2, -8.9, -10...","[-0.9, 1.2, 0.2, 1.3, 0.7, 0.5, 2.6, 10.0, 9.2..."
855537,train,7291,18,"[23, 108, 191, 280, 365, 450, 534, 619, 703, 7...","[2.5768197453450252, 1.5297058540778357, 2.186...","[8.869047299456689, 9.017760254076396, 10.8921...","[1.4, 0.1, 2.1, 1.4, 0.1, 0.8, -1.3, 0.6, 2.1,...","[1.8, 1.3, 0.6, 0.1, -1.2, 0.7, -1.4, -3.3, -3...","[-1.2, 0.8, 0.1, 0.7, 0.7, 0.1, 2.8, 8.0, 10.7...","[5.1, 5.8, 7.8, 6.9, 6.4, 6.4, 5.2, 8.8, 8.8, ...","[-7.2, -6.8, -7.6, -8.0, -7.7, -7.2, -8.9, -10...","[-0.9, 1.2, 0.2, 1.3, 0.7, 0.5, 2.6, 10.0, 9.2..."
855538,train,7291,18,"[23, 108, 191, 280, 365, 450, 534, 619, 703, 7...","[2.5768197453450252, 1.5297058540778357, 2.186...","[8.869047299456689, 9.017760254076396, 10.8921...","[1.4, 0.1, 2.1, 1.4, 0.1, 0.8, -1.3, 0.6, 2.1,...","[1.8, 1.3, 0.6, 0.1, -1.2, 0.7, -1.4, -3.3, -3...","[-1.2, 0.8, 0.1, 0.7, 0.7, 0.1, 2.8, 8.0, 10.7...","[5.1, 5.8, 7.8, 6.9, 6.4, 6.4, 5.2, 8.8, 8.8, ...","[-7.2, -6.8, -7.6, -8.0, -7.7, -7.2, -8.9, -10...","[-0.9, 1.2, 0.2, 1.3, 0.7, 0.5, 2.6, 10.0, 9.2..."
855539,train,7291,18,"[23, 108, 191, 280, 365, 450, 534, 619, 703, 7...","[2.5768197453450252, 1.5297058540778357, 2.186...","[8.869047299456689, 9.017760254076396, 10.8921...","[1.4, 0.1, 2.1, 1.4, 0.1, 0.8, -1.3, 0.6, 2.1,...","[1.8, 1.3, 0.6, 0.1, -1.2, 0.7, -1.4, -3.3, -3...","[-1.2, 0.8, 0.1, 0.7, 0.7, 0.1, 2.8, 8.0, 10.7...","[5.1, 5.8, 7.8, 6.9, 6.4, 6.4, 5.2, 8.8, 8.8, ...","[-7.2, -6.8, -7.6, -8.0, -7.7, -7.2, -8.9, -10...","[-0.9, 1.2, 0.2, 1.3, 0.7, 0.5, 2.6, 10.0, 9.2..."


# FEATURES ENGINEERRING

In [29]:
map_features_fun = {
    # 时域
    'time_sum' : lambda x:np.sum(x),
    'time_mean' : lambda x: np.mean(x),
    'time_std' : lambda x: np.std(x),
    'time_var' : lambda x: np.var(x),
    'time_max' : lambda x: np.max(x),
    'time_min' : lambda x: np.min(x),
    'time_median' : lambda x: np.median(x),
    'time_energy' : lambda x: np.sum(np.power(x,2)),
    'time_mad' : lambda x: np.mean(np.absolute(x - np.mean(x))),
    'time_percent_9' : lambda x: np.percentile(x, 0.9),
    'time_percent_75' : lambda x: np.percentile(x, 0.75),
    'time_percent_25' : lambda x: np.percentile(x, 0.25),
    'time_percent_1' : lambda x: np.percentile(x, 0.1),
    'time_percent_75_25' : lambda x: np.percentile(x,75)-np.percentile(x,25),
    'time_range' : lambda x:np.max(x)-np.min(x),
    'time_zcr': lambda x: (np.diff(np.sign(x))!= 0).sum(),
    'time_mcr' : lambda x: (np.diff(np.sign(x-np.mean(x)))!= 0).sum(),
    'time_minind' : lambda x: np.argmin(x),
    'time_maxind' : lambda x: np.argmax(x),
    'time_skew' : lambda x: skew(x),
    'time_kurtosis' : lambda x: kurtosis(x),
    'time_zero_big' : lambda x: np.sum(np.sign(x)>0),
    'time_zero_small' : lambda x: np.sum(np.sign(x)<0),
    'time_len' : lambda x: np.size(x),
    
    # 频域
    'fft_dc' : lambda x: np.abs(np.fft.fft(x))[0],
    'fft_mean' : lambda x: np.mean(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_var' : lambda x: np.var(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_std' : lambda x: np.std(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_sum' : lambda x: np.sum(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_entropy' : lambda x: -1.0 * np.sum(np.log2(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]/np.sum(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]))),
    'fft_energy' : lambda x: np.sum(np.power(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1],2)),
    'fft_skew' : lambda x: skew(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_kurtosis' : lambda x: kurtosis(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_max' : lambda x: np.max(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_min' : lambda x: np.min(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_maxind' : lambda x: np.argmax(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_minind' : lambda x: np.argmin(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1])
}

In [30]:
df_train_test_features = df_train_test[['flag','fragment_id','behavior_id']]

In [31]:
df_train_test_features

Unnamed: 0,flag,fragment_id,behavior_id
0,test,0,-1
1,test,0,-1
2,test,0,-1
3,test,0,-1
4,test,0,-1
...,...,...,...
425354,train,7291,18
425355,train,7291,18
425356,train,7291,18
425357,train,7291,18


In [32]:

for col in ['acc_all','acc_allg','acc_x','acc_y','acc_z','acc_xg','acc_yg','acc_zg']:
    for f_name, f_fun in tqdm(map_features_fun.items()):
        df_train_test_features[col + '_' + f_name] = df_train_test_list[col].map(f_fun)

100%|██████████| 37/37 [21:37<00:00, 35.07s/it]
100%|██████████| 37/37 [21:05<00:00, 34.20s/it]
100%|██████████| 37/37 [21:06<00:00, 34.22s/it]
100%|██████████| 37/37 [21:05<00:00, 34.19s/it]
100%|██████████| 37/37 [21:00<00:00, 34.07s/it]
100%|██████████| 37/37 [21:06<00:00, 34.22s/it]
100%|██████████| 37/37 [21:00<00:00, 34.07s/it]
100%|██████████| 37/37 [21:06<00:00, 34.22s/it]


In [33]:
df_train_test_features

Unnamed: 0,flag,fragment_id,behavior_id,acc_all_time_sum,acc_all_time_mean,acc_all_time_std,acc_all_time_var,acc_all_time_max,acc_all_time_min,acc_all_time_median,...,acc_zg_fft_std,acc_zg_fft_sum,acc_zg_fft_entropy,acc_zg_fft_energy,acc_zg_fft_skew,acc_zg_fft_kurtosis,acc_zg_fft_max,acc_zg_fft_min,acc_zg_fft_maxind,acc_zg_fft_minind
0,test,0,-1,109.889781,1.894651,1.014950,1.030124,4.505552,0.424264,1.746425,...,7.333558,413.917412,146.342468,7467.50,0.748835,-0.403440,33.239467,5.058632,23,10
1,test,0,-1,109.889781,1.894651,1.014950,1.030124,4.505552,0.424264,1.746425,...,7.333558,413.917412,146.342468,7467.50,0.748835,-0.403440,33.239467,5.058632,23,10
2,test,0,-1,109.889781,1.894651,1.014950,1.030124,4.505552,0.424264,1.746425,...,7.333558,413.917412,146.342468,7467.50,0.748835,-0.403440,33.239467,5.058632,23,10
3,test,0,-1,109.889781,1.894651,1.014950,1.030124,4.505552,0.424264,1.746425,...,7.333558,413.917412,146.342468,7467.50,0.748835,-0.403440,33.239467,5.058632,23,10
4,test,0,-1,109.889781,1.894651,1.014950,1.030124,4.505552,0.424264,1.746425,...,7.333558,413.917412,146.342468,7467.50,0.748835,-0.403440,33.239467,5.058632,23,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425354,train,7291,18,52.958673,0.913081,0.471689,0.222491,2.387467,0.100000,0.781025,...,3.729725,148.504201,152.552872,1163.88,1.243270,1.143053,15.782365,0.794277,9,5
425355,train,7291,18,52.958673,0.913081,0.471689,0.222491,2.387467,0.100000,0.781025,...,3.729725,148.504201,152.552872,1163.88,1.243270,1.143053,15.782365,0.794277,9,5
425356,train,7291,18,52.958673,0.913081,0.471689,0.222491,2.387467,0.100000,0.781025,...,3.729725,148.504201,152.552872,1163.88,1.243270,1.143053,15.782365,0.794277,9,5
425357,train,7291,18,52.958673,0.913081,0.471689,0.222491,2.387467,0.100000,0.781025,...,3.729725,148.504201,152.552872,1163.88,1.243270,1.143053,15.782365,0.794277,9,5


In [34]:
#Downcast in order to save memory
def downcast(df):
    cols = df.dtypes.index.tolist()
    types = df.dtypes.values.tolist()
    for i,t in enumerate(types):
        if 'int' in str(t):
            if df[cols[i]].min() > np.iinfo(np.int8).min and df[cols[i]].max() < np.iinfo(np.int8).max:
                df[cols[i]] = df[cols[i]].astype(np.int8)
            elif df[cols[i]].min() > np.iinfo(np.int16).min and df[cols[i]].max() < np.iinfo(np.int16).max:
                df[cols[i]] = df[cols[i]].astype(np.int16)
            elif df[cols[i]].min() > np.iinfo(np.int32).min and df[cols[i]].max() < np.iinfo(np.int32).max:
                df[cols[i]] = df[cols[i]].astype(np.int32)
            else:
                df[cols[i]] = df[cols[i]].astype(np.int64)
        elif 'float' in str(t):
            if df[cols[i]].min() > np.finfo(np.float16).min and df[cols[i]].max() < np.finfo(np.float16).max:
                df[cols[i]] = df[cols[i]].astype(np.float16)
            elif df[cols[i]].min() > np.finfo(np.float32).min and df[cols[i]].max() < np.finfo(np.float32).max:
                df[cols[i]] = df[cols[i]].astype(np.float32)
            else:
                df[cols[i]] = df[cols[i]].astype(np.float64)
        elif t == np.object:
            if cols[i] == 'date':
                df[cols[i]] = pd.to_datetime(df[cols[i]], format='%Y-%m-%d')
            else:
                df[cols[i]] = df[cols[i]].astype('category')
    return df  

In [35]:
from sklearn.preprocessing import MinMaxScaler
minMax=MinMaxScaler()
df_train_test_features=downcast(df_train_test_features)

In [36]:
df_train_test_features

Unnamed: 0,flag,fragment_id,behavior_id,acc_all_time_sum,acc_all_time_mean,acc_all_time_std,acc_all_time_var,acc_all_time_max,acc_all_time_min,acc_all_time_median,...,acc_zg_fft_std,acc_zg_fft_sum,acc_zg_fft_entropy,acc_zg_fft_energy,acc_zg_fft_skew,acc_zg_fft_kurtosis,acc_zg_fft_max,acc_zg_fft_min,acc_zg_fft_maxind,acc_zg_fft_minind
0,test,0,-1,109.87500,1.894531,1.014648,1.030273,4.503906,0.424316,1.746094,...,7.332031,414.0,146.375,7467.500000,0.749023,-0.403320,33.25000,5.058594,23,10
1,test,0,-1,109.87500,1.894531,1.014648,1.030273,4.503906,0.424316,1.746094,...,7.332031,414.0,146.375,7467.500000,0.749023,-0.403320,33.25000,5.058594,23,10
2,test,0,-1,109.87500,1.894531,1.014648,1.030273,4.503906,0.424316,1.746094,...,7.332031,414.0,146.375,7467.500000,0.749023,-0.403320,33.25000,5.058594,23,10
3,test,0,-1,109.87500,1.894531,1.014648,1.030273,4.503906,0.424316,1.746094,...,7.332031,414.0,146.375,7467.500000,0.749023,-0.403320,33.25000,5.058594,23,10
4,test,0,-1,109.87500,1.894531,1.014648,1.030273,4.503906,0.424316,1.746094,...,7.332031,414.0,146.375,7467.500000,0.749023,-0.403320,33.25000,5.058594,23,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425354,train,7291,18,52.96875,0.913086,0.471680,0.222534,2.386719,0.099976,0.781250,...,3.730469,148.5,152.500,1163.880005,1.243164,1.142578,15.78125,0.794434,9,5
425355,train,7291,18,52.96875,0.913086,0.471680,0.222534,2.386719,0.099976,0.781250,...,3.730469,148.5,152.500,1163.880005,1.243164,1.142578,15.78125,0.794434,9,5
425356,train,7291,18,52.96875,0.913086,0.471680,0.222534,2.386719,0.099976,0.781250,...,3.730469,148.5,152.500,1163.880005,1.243164,1.142578,15.78125,0.794434,9,5
425357,train,7291,18,52.96875,0.913086,0.471680,0.222534,2.386719,0.099976,0.781250,...,3.730469,148.5,152.500,1163.880005,1.243164,1.142578,15.78125,0.794434,9,5


In [37]:
df_train_test_features.replace([np.inf, -np.inf], np.nan,inplace=True)

In [38]:
df_train_test_features.iloc[:,3:]=minMax.fit_transform(df_train_test_features.iloc[:,3:])#取所有行，除了前3列

In [40]:
df_train_test_features.head().append(df_train_test_features.tail())

Unnamed: 0,flag,fragment_id,behavior_id,acc_all_time_sum,acc_all_time_mean,acc_all_time_std,acc_all_time_var,acc_all_time_max,acc_all_time_min,acc_all_time_median,...,acc_zg_fft_std,acc_zg_fft_sum,acc_zg_fft_entropy,acc_zg_fft_energy,acc_zg_fft_skew,acc_zg_fft_kurtosis,acc_zg_fft_max,acc_zg_fft_min,acc_zg_fft_maxind,acc_zg_fft_minind
0,test,0,-1,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.2935,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
1,test,0,-1,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.2935,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
2,test,0,-1,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.2935,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
3,test,0,-1,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.2935,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
4,test,0,-1,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.2935,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
425354,train,7291,18,0.155334,0.14729,0.05839,0.00341,0.03869,0.036985,0.13132,...,0.068065,0.091035,0.379135,0.010032,0.407873,0.119295,0.056908,0.029103,0.310345,0.172414
425355,train,7291,18,0.155334,0.14729,0.05839,0.00341,0.03869,0.036985,0.13132,...,0.068065,0.091035,0.379135,0.010032,0.407873,0.119295,0.056908,0.029103,0.310345,0.172414
425356,train,7291,18,0.155334,0.14729,0.05839,0.00341,0.03869,0.036985,0.13132,...,0.068065,0.091035,0.379135,0.010032,0.407873,0.119295,0.056908,0.029103,0.310345,0.172414
425357,train,7291,18,0.155334,0.14729,0.05839,0.00341,0.03869,0.036985,0.13132,...,0.068065,0.091035,0.379135,0.010032,0.407873,0.119295,0.056908,0.029103,0.310345,0.172414
425358,train,7291,18,0.155334,0.14729,0.05839,0.00341,0.03869,0.036985,0.13132,...,0.068065,0.091035,0.379135,0.010032,0.407873,0.119295,0.056908,0.029103,0.310345,0.172414


In [41]:
df_train_test_features[df_train_test_features.flag=='train']

Unnamed: 0,flag,fragment_id,behavior_id,acc_all_time_sum,acc_all_time_mean,acc_all_time_std,acc_all_time_var,acc_all_time_max,acc_all_time_min,acc_all_time_median,...,acc_zg_fft_std,acc_zg_fft_sum,acc_zg_fft_entropy,acc_zg_fft_energy,acc_zg_fft_skew,acc_zg_fft_kurtosis,acc_zg_fft_max,acc_zg_fft_min,acc_zg_fft_maxind,acc_zg_fft_minind
0,train,0,0,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.29350,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
1,train,0,0,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.29350,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
2,train,0,0,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.29350,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
3,train,0,0,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.29350,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
4,train,0,0,0.322214,0.305608,0.125604,0.01579,0.073012,0.156973,0.29350,...,0.134875,0.257757,0.316794,0.064389,0.323116,0.051268,0.120709,0.185318,0.793103,0.344828
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425354,train,7291,18,0.155334,0.147290,0.058390,0.00341,0.038690,0.036985,0.13132,...,0.068065,0.091035,0.379135,0.010032,0.407873,0.119295,0.056908,0.029103,0.310345,0.172414
425355,train,7291,18,0.155334,0.147290,0.058390,0.00341,0.038690,0.036985,0.13132,...,0.068065,0.091035,0.379135,0.010032,0.407873,0.119295,0.056908,0.029103,0.310345,0.172414
425356,train,7291,18,0.155334,0.147290,0.058390,0.00341,0.038690,0.036985,0.13132,...,0.068065,0.091035,0.379135,0.010032,0.407873,0.119295,0.056908,0.029103,0.310345,0.172414
425357,train,7291,18,0.155334,0.147290,0.058390,0.00341,0.038690,0.036985,0.13132,...,0.068065,0.091035,0.379135,0.010032,0.407873,0.119295,0.056908,0.029103,0.310345,0.172414


# TRAIN AND TEST

In [39]:
# cols = [c for c in df_train_test_features.columns if c not in ['flag','fragment_id','behavior_id']]

In [30]:
from keras.utils import to_categorical

Using TensorFlow backend.


In [20]:
cols = ['acc_yg_time_max','acc_xg_time_energy','acc_yg_time_energy','acc_y_time_percent_75_25','acc_yg_time_range','acc_xg_fft_dc',\
 'acc_allg_time_mean','acc_allg_time_sum','acc_y_fft_max','acc_yg_time_percent_9','acc_xg_time_max','acc_yg_time_median','acc_zg_time_mean',\
 'acc_yg_fft_dc','acc_yg_time_sum','acc_allg_time_median','acc_zg_time_sum','acc_xg_time_percent_9','acc_yg_time_percent_75','acc_yg_time_mean',\
 'acc_allg_time_energy','acc_yg_time_std','acc_all_time_median','acc_xg_time_percent_75_25','acc_xg_time_mean','acc_zg_time_energy',\
 'acc_allg_time_percent_75_25','acc_x_time_percent_75_25','acc_all_time_sum','acc_y_fft_var','acc_x_time_zcr','acc_y_fft_maxind',\
 'acc_xg_time_sum','acc_zg_time_median','acc_y_time_sum','acc_y_time_zcr','acc_zg_time_percent_9','acc_xg_time_zero_big',\
 'acc_xg_time_median','acc_yg_time_zero_big','acc_zg_time_mcr','acc_yg_time_min','acc_y_fft_skew','acc_x_fft_max','acc_xg_time_zero_small',\
 'acc_x_time_sum','acc_yg_time_percent_1','acc_zg_time_percent_1','acc_x_time_mad','acc_allg_fft_maxind','acc_yg_time_zcr','acc_all_time_percent_9','acc_zg_fft_dc']

In [21]:
len(cols)

53

In [56]:
X = df_train_test_features[df_train_test_features['flag']=='train'][cols].values
y = df_train_test_features[df_train_test_features['flag']=='train']['behavior_id'].values
X_test = df_train_test_features[df_train_test_features['flag']=='test'][cols].values

In [53]:
y.shape

(7292, 19)

In [54]:
y

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

# CNN

In [60]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.callbacks import EarlyStopping

In [59]:
def get_base_model(X_train,Y_train):
    n_timesteps, n_features, n_outputs=X_train.shape[0],X_train.shape[1],Y_train.shape[0]
    
    n_filters=20
    k_size=10
    
    model=Sequential()
    model.add(Conv1D(filters=n_filters,kernel_size=k_size, activation='relu',input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=n_filters,kernel_size=k_size,activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(3))
    model.add(Flatten())
    model.add(Dense(n_outputs,activation='softmax'))
    
    print(model.summary())
    
    return model

# KFolds CNN

In [38]:
seed = 2021
folds = 5
kfold = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)

In [57]:
result=np.zeros((7500, 19))

for fold, (xx, yy) in enumerate(kfold.split(x, y)):
    early_stopping = EarlyStopping(monitor='val_acc',
                                   verbose=0,
                                   mode='max',
                                   patience=10)

    plateau = ReduceLROnPlateau(monitor="val_acc",
                                verbose=0,
                                mode='max',
                                factor=0.1,
                                patience=6)

    BATCH_SIZE=64
    EPOCHS=50
    
    model=get_base_model(x,y)
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.fit(x[xx],y[yy],
              batch_size=BATCH_SIZE,
              epochs=EPOCHS,
              validation_split=0.2,
              callbacks=[plateau, early_stopping],
              verbose=1)
    
    result += model.predict(X_test, verbose=0, batch_size=1024) / 5

5it [00:00, 343.90it/s]

!!!!!!!!!!!!!!!!!!!!!!!!
(5833, 53)
------------------------
(5833, 19)
!!!!!!!!!!!!!!!!!!!!!!!!
(5833, 53)
------------------------
(5833, 19)
!!!!!!!!!!!!!!!!!!!!!!!!
(5834, 53)
------------------------
(5834, 19)
!!!!!!!!!!!!!!!!!!!!!!!!
(5834, 53)
------------------------
(5834, 19)
!!!!!!!!!!!!!!!!!!!!!!!!
(5834, 53)
------------------------
(5834, 19)





# END CNN

In [None]:
sub=pd.read_csv('submit.csv')
sub.behavior_id = np.argmax(result, axis=1)
sub.to_csv('submit_.csv', index=False)