In [39]:
import numpy as np
import pandas as pd
import catboost
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from scipy.signal import resample

In [40]:
import time
import sys
import gc
import pickle

In [66]:
train=pd.read_csv('sensor_train.csv')
test=pd.read_csv('sensor_test.csv')

In [67]:
train.head()

Unnamed: 0,fragment_id,time_point,acc_x,acc_y,acc_z,acc_xg,acc_yg,acc_zg,behavior_id
0,0,27,0.3,-0.3,0.1,0.6,4.5,8.8,0
1,0,108,0.1,-0.0,-0.4,0.4,4.7,8.4,0
2,0,198,0.1,0.0,0.3,0.9,4.6,9.0,0
3,0,297,0.1,-0.1,-0.5,0.8,4.7,7.2,0
4,0,388,0.1,0.2,0.6,0.9,4.7,8.9,0


In [68]:
train['mod'] = (train.acc_x ** 2 + train.acc_y ** 2 + train.acc_z ** 2) ** .5
train['modg'] = (train.acc_xg ** 2 + train.acc_yg ** 2 + train.acc_zg ** 2) ** .5
test['mod'] = (test.acc_x ** 2 + test.acc_y ** 2 + test.acc_z ** 2) ** .5
test['modg'] = (test.acc_xg ** 2 + test.acc_yg ** 2 + test.acc_zg ** 2) ** .5




x = np.zeros((7292, 60, 8))
t = np.zeros((7500, 60, 8))
for i in tqdm(range(7292)):
    tmp = train[train.fragment_id == i][:60]
    x[i,:,:] = resample(tmp.drop(['fragment_id', 'time_point', 'behavior_id'],
                                    axis=1), 60, np.array(tmp.time_point))[0]
for i in tqdm(range(7500)):
    tmp = test[test.fragment_id == i][:60]
    t[i,:,:] = resample(tmp.drop(['fragment_id', 'time_point'],
                                    axis=1), 60, np.array(tmp.time_point))[0]


100%|██████████| 7292/7292 [00:12<00:00, 588.73it/s]
100%|██████████| 7500/7500 [00:13<00:00, 554.28it/s]


In [64]:
x.shape

(7292, 60, 8)

In [48]:
y = train.groupby('fragment_id')['behavior_id'].min()
y.shape

(7292,)

In [50]:
from keras.utils import to_categorical

Using TensorFlow backend.


In [51]:
y_ = to_categorical(y, num_classes=19)

In [52]:
y_

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [53]:
y_.shape

(7292, 19)

In [60]:
x[0,59,:,:]

array([[0.31415336],
       [0.09234453],
       [0.14216895],
       [0.80048101],
       [4.68897396],
       [8.68909755],
       [0.34551267],
       [9.90547915]])

# CNN

In [69]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.callbacks import EarlyStopping

In [None]:
def get_base_model(X_train,Y_train):
    n_timesteps, n_features, n_outputs=X_train.shape[1],X_train.shape[2],Y_train.shape[1]
    
    n_filters=20
    k_size=10
    
    model=Sequential()
    model.add(Conv1D(filters=n_filters,kernel_size=k_size, activation='relu',input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=n_filters,kernel_size=k_size,activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(3))
    model.add(Flatten())
    model.add(Dense(n_outputs,activation='softmax'))
    
    print(model.summary())
    
    return model

In [None]:
kfold = StratifiedKFold(5, shuffle=True)

In [None]:
result=np.zeros((7500, 19))

for fold, (xx, yy) in enumerate(kfold.split(x, y)):
    early_stopping = EarlyStopping(monitor='val_acc',
                                   verbose=0,
                                   mode='max',
                                   patience=10)

    plateau = ReduceLROnPlateau(monitor="val_acc",
                                verbose=0,
                                mode='max',
                                factor=0.1,
                                patience=6)

    BATCH_SIZE=64
    EPOCHS=50
    
    model=get_base_model(x,y_)
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.fit(x,y_,
              batch_size=BATCH_SIZE,
              epochs=EPOCHS,
              validation_split=0.2,
              callbacks=[plateau, early_stopping],
              verbose=1)
    
    result += model.predict(t, verbose=0, batch_size=1024) / 5

In [None]:
sub=pd.read_csv('submit.csv')
sub.behavior_id = np.argmax(result, axis=1)
sub.to_csv('submit_.csv', index=False)