# 导入包

In [1]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from scipy.stats import skew
from scipy.stats import kurtosis
from scipy.stats import mode
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from sklearn.model_selection import StratifiedKFold
import warnings 
warnings.filterwarnings("ignore")

# 导入数据

In [2]:
nrows = None

df_train = pd.read_csv('data/sensor_train.csv',sep=',',nrows=nrows)
df_test = pd.read_csv('data/sensor_test.csv',sep=',',nrows=nrows)
df_submit = pd.read_csv('data/提交结果示例.csv',sep=',',nrows=nrows)

# 合并数据

In [3]:
df_train['flag'] = 'train'
df_test['flag'] = 'test'
df_test['behavior_id'] = -1
df_train_test = pd.concat([df_train, df_test])

In [4]:
df_train_test = df_train_test.sort_values(['flag','fragment_id','time_point'])

In [5]:
df_train_test['acc_all'] = (df_train_test['acc_x'] ** 2 + df_train_test['acc_y'] ** 2 + df_train_test['acc_z'] ** 2) ** 0.5
df_train_test['acc_allg'] = (df_train_test['acc_xg'] ** 2 + df_train_test['acc_yg'] ** 2 + df_train_test['acc_zg'] ** 2) ** 0.5

# 查看字段和数据

In [6]:
df_train_test.columns

Index(['fragment_id', 'time_point', 'acc_x', 'acc_y', 'acc_z', 'acc_xg',
       'acc_yg', 'acc_zg', 'behavior_id', 'flag', 'acc_all', 'acc_allg'],
      dtype='object')

In [7]:
df_train_test.head()

Unnamed: 0,fragment_id,time_point,acc_x,acc_y,acc_z,acc_xg,acc_yg,acc_zg,behavior_id,flag,acc_all,acc_allg
0,0,71,0.2,1.0,0.6,0.2,5.3,9.2,-1,test,1.183216,10.619322
1,0,150,0.0,1.0,-0.7,0.2,6.0,8.2,-1,test,1.220656,10.162677
2,0,244,-0.2,0.8,-2.4,-0.4,5.3,7.6,-1,test,2.537716,9.274158
3,0,326,2.3,-0.4,-1.6,2.5,4.5,3.4,-1,test,2.830194,6.169279
4,0,409,-0.3,-1.4,3.3,0.2,3.5,12.4,-1,test,3.597221,12.886039


# 数据聚合

In [8]:
agg_func = lambda x: list(x)
map_agg_func = {    
    'time_point' : agg_func,
    
    'acc_all' : agg_func,
    'acc_allg' : agg_func,
    
    'acc_x' : agg_func,
    'acc_y' : agg_func,
    'acc_z' : agg_func,
    
    'acc_xg' : agg_func,
    'acc_yg' : agg_func,
    'acc_zg' : agg_func
}
df_train_test_list = df_train_test.groupby(['flag','fragment_id','behavior_id']).agg(map_agg_func).reset_index()

In [9]:
df_train_test_list.head()

Unnamed: 0,flag,fragment_id,behavior_id,time_point,acc_all,acc_allg,acc_x,acc_y,acc_z,acc_xg,acc_yg,acc_zg
0,test,0,-1,"[71, 150, 244, 326, 409, 495, 579, 665, 755, 8...","[1.1832159566199232, 1.2206555615733703, 2.537...","[10.619322012256713, 10.16267681273, 9.2741576...","[0.2, 0.0, -0.2, 2.3, -0.3, 0.3, -0.8, 0.1, -0...","[1.0, 1.0, 0.8, -0.4, -1.4, -1.1, 0.0, 0.6, 0....","[0.6, -0.7, -2.4, -1.6, 3.3, 3.2, -0.7, -0.1, ...","[0.2, 0.2, -0.4, 2.5, 0.2, 0.5, -0.3, 0.6, 0.0...","[5.3, 6.0, 5.3, 4.5, 3.5, 3.1, 4.1, 4.6, 4.6, ...","[9.2, 8.2, 7.6, 3.4, 12.4, 9.8, 8.5, 8.7, 8.1,..."
1,test,1,-1,"[151, 232, 318, 406, 493, 581, 661, 749, 833, ...","[0.1, 0.1, 0.0, 0.223606797749979, 0.316227766...","[9.629122493768579, 9.525229656024049, 9.56504...","[0.1, 0.0, 0.0, 0.2, 0.1, 0.0, 0.2, 0.1, 0.0, ...","[0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, -0.1, -0....","[0.0, 0.1, 0.0, 0.0, -0.3, 0.0, -0.1, 0.0, -0....","[-1.0, -0.9, -1.2, -0.8, -0.8, -1.0, -1.0, -0....","[4.6, 4.4, 4.6, 4.4, 4.4, 4.6, 4.5, 4.4, 4.6, ...","[8.4, 8.4, 8.3, 8.2, 8.2, 8.4, 8.3, 8.3, 8.6, ..."
2,test,2,-1,"[46, 135, 233, 315, 397, 483, 574, 659, 751, 8...","[0.1, 0.31622776601683794, 0.5, 0.583095189484...","[10.00299955013495, 9.7205966895042, 9.4403389...","[0.0, 0.3, 0.3, 0.3, 0.4, -0.1, 0.1, 0.4, -0.5...","[0.0, 0.0, 0.0, 0.3, 0.0, -0.1, 0.0, -0.2, 0.0...","[0.1, -0.1, -0.4, 0.4, 0.5, 0.0, 0.0, -0.2, -0...","[0.9, 1.2, 1.2, 1.3, 1.2, 0.9, 1.2, 1.4, 0.3, ...","[3.3, 3.2, 3.2, 3.2, 2.9, 2.8, 2.9, 2.9, 2.6, ...","[9.4, 9.1, 8.8, 9.6, 9.7, 9.4, 9.2, 9.1, 9.0, ..."
3,test,3,-1,"[91, 172, 264, 345, 436, 516, 618, 701, 782, 8...","[0.28284271247461906, 0.2, 0.0, 0.282842712474...","[9.642095207992918, 9.39095309327014, 9.404254...","[0.0, 0.0, 0.0, -0.2, 0.0, 0.0, 0.0, 0.0, 0.0,...","[-0.2, -0.2, 0.0, 0.0, 0.0, -0.1, 0.0, -0.1, 0...","[0.2, 0.0, 0.0, -0.2, -0.2, -0.3, 0.2, 0.1, -0...","[0.2, 0.3, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.2, ...","[5.8, 5.9, 5.8, 5.9, 5.9, 5.8, 5.9, 5.9, 5.7, ...","[7.7, 7.3, 7.4, 7.5, 7.3, 7.1, 7.5, 7.6, 7.3, ..."
4,test,4,-1,"[38, 112, 205, 282, 364, 451, 530, 617, 700, 7...","[0.7348469228349533, 0.31622776601683794, 1.03...","[10.431203190428226, 9.633794683301073, 8.9386...","[0.2, 0.1, -0.1, -0.4, 2.3, 0.7, -0.9, 0.4, -0...","[-0.1, 0.0, 0.5, 1.0, -0.3, -0.5, -0.5, -1.3, ...","[0.7, -0.3, -0.9, 0.3, 1.7, 0.3, -1.4, 1.4, -1...","[3.7, 3.4, 3.0, 2.3, 5.6, 3.3, 2.3, 3.2, 1.9, ...","[4.6, 5.0, 5.1, 6.1, 5.0, 4.9, 5.4, 4.7, 5.4, ...","[8.6, 7.5, 6.7, 8.3, 9.1, 8.4, 5.9, 8.8, 6.4, ..."


# 抽取特征

In [10]:
map_features_fun = {
    # 时域
    'time_sum' : lambda x:np.sum(x),
    'time_mean' : lambda x: np.mean(x),
    'time_std' : lambda x: np.std(x),
    'time_var' : lambda x: np.var(x),
    'time_max' : lambda x: np.max(x),
    'time_min' : lambda x: np.min(x),
    'time_median' : lambda x: np.median(x),
    'time_energy' : lambda x: np.sum(np.power(x,2)),
    'time_mad' : lambda x: np.mean(np.absolute(x - np.mean(x))),
    'time_percent_9' : lambda x: np.percentile(x, 0.9),
    'time_percent_75' : lambda x: np.percentile(x, 0.75),
    'time_percent_25' : lambda x: np.percentile(x, 0.25),
    'time_percent_1' : lambda x: np.percentile(x, 0.1),
    'time_percent_75_25' : lambda x: np.percentile(x,75)-np.percentile(x,25),
    'time_range' : lambda x:np.max(x)-np.min(x),
    'time_zcr': lambda x: (np.diff(np.sign(x))!= 0).sum(),
    'time_mcr' : lambda x: (np.diff(np.sign(x-np.mean(x)))!= 0).sum(),
    'time_minind' : lambda x: np.argmin(x),
    'time_maxind' : lambda x: np.argmax(x),
    'time_skew' : lambda x: skew(x),
    'time_kurtosis' : lambda x: kurtosis(x),
    'time_zero_big' : lambda x: np.sum(np.sign(x)>0),
    'time_zero_small' : lambda x: np.sum(np.sign(x)<0),
    'time_len' : lambda x: np.size(x),
    
    # 频域
    'fft_dc' : lambda x: np.abs(np.fft.fft(x))[0],
    'fft_mean' : lambda x: np.mean(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_var' : lambda x: np.var(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_std' : lambda x: np.std(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_sum' : lambda x: np.sum(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_entropy' : lambda x: -1.0 * np.sum(np.log2(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]/np.sum(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]))),
    'fft_energy' : lambda x: np.sum(np.power(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1],2)),
    'fft_skew' : lambda x: skew(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_kurtosis' : lambda x: kurtosis(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_max' : lambda x: np.max(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_min' : lambda x: np.min(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_maxind' : lambda x: np.argmax(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1]),
    'fft_minind' : lambda x: np.argmin(np.abs(np.fft.fft(x))[1:int(len(x) / 2)+1])
}

In [11]:
df_train_test_features = df_train_test_list[['flag','fragment_id','behavior_id']]
for col in ['acc_all','acc_allg','acc_x','acc_y','acc_z','acc_xg','acc_yg','acc_zg']:
    for f_name, f_fun in tqdm(map_features_fun.items()):
        df_train_test_features[col + '_' + f_name] = df_train_test_list[col].map(f_fun)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:22<00:00,  1.63it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:22<00:00,  1.63it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:22<00:00,  1.64it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:22<00:00,  1.66it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:22<00:00,  1.68it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:22<00:00,  1.68it/s]
100%|███████████████████████████████████

In [12]:
df_train_test_features.head()

Unnamed: 0,flag,fragment_id,behavior_id,acc_all_time_sum,acc_all_time_mean,acc_all_time_std,acc_all_time_var,acc_all_time_max,acc_all_time_min,acc_all_time_median,...,acc_zg_fft_std,acc_zg_fft_sum,acc_zg_fft_entropy,acc_zg_fft_energy,acc_zg_fft_skew,acc_zg_fft_kurtosis,acc_zg_fft_max,acc_zg_fft_min,acc_zg_fft_maxind,acc_zg_fft_minind
0,test,0,-1,109.889781,1.894651,1.01495,1.030124,4.505552,0.424264,1.746425,...,7.333558,413.917412,146.342468,7467.5,0.748835,-0.40344,33.239467,5.058632,23,10
1,test,1,-1,9.328892,0.169616,0.102484,0.010503,0.412311,0.0,0.2,...,0.473385,26.632236,134.339388,32.32,0.458802,-0.111296,2.047783,0.141423,11,22
2,test,2,-1,21.264674,0.379726,0.368036,0.135451,2.083267,0.0,0.263896,...,1.472473,94.856892,139.783971,382.06,-0.075707,-0.781631,6.476364,0.707192,15,18
3,test,3,-1,10.755163,0.188687,0.135766,0.018432,0.519615,0.0,0.2,...,0.519421,33.706355,138.592552,48.13,0.902622,1.222909,2.783088,0.263222,17,3
4,test,4,-1,76.585242,1.276421,0.615156,0.378417,2.875761,0.223607,1.185515,...,3.033211,198.376073,152.614007,1587.78,0.86819,1.960154,16.666859,0.852075,17,21


# 模型参数

In [13]:
params = {
          'application': 'multiclass',
          'num_class': 19,
          'boosting': 'gbdt',
          #'metric': ['multi_logloss','multi_error'],
          'num_leaves': 63,
          'learning_rate': 0.1,
          'bagging_fraction': 0.8,
          'feature_fraction': 0.7,
          'min_split_gain': 0.01,
          'min_child_samples': 120,
          'min_child_weight': 0.01,
          'lambda_l2': 0.05,
          'verbosity': -1,
          'data_random_seed': 2020
         }  

# 获取训练和测试数据

In [14]:
cols = [c for c in df_train_test_features.columns if c not in ['flag','fragment_id','behavior_id']]

In [15]:
# cols = ['acc_yg_time_max','acc_xg_time_energy','acc_yg_time_energy','acc_y_time_percent_75_25','acc_yg_time_range','acc_xg_fft_dc',\
#  'acc_allg_time_mean','acc_allg_time_sum','acc_y_fft_max','acc_yg_time_percent_9','acc_xg_time_max','acc_yg_time_median','acc_zg_time_mean',\
#  'acc_yg_fft_dc','acc_yg_time_sum','acc_allg_time_median','acc_zg_time_sum','acc_xg_time_percent_9','acc_yg_time_percent_75','acc_yg_time_mean',\
#  'acc_allg_time_energy','acc_yg_time_std','acc_all_time_median','acc_xg_time_percent_75_25','acc_xg_time_mean','acc_zg_time_energy',\
#  'acc_allg_time_percent_75_25','acc_x_time_percent_75_25','acc_all_time_sum','acc_y_fft_var','acc_x_time_zcr','acc_y_fft_maxind',\
#  'acc_xg_time_sum','acc_zg_time_median','acc_y_time_sum','acc_y_time_zcr','acc_zg_time_percent_9','acc_xg_time_zero_big',\
#  'acc_xg_time_median','acc_yg_time_zero_big','acc_zg_time_mcr','acc_yg_time_min','acc_y_fft_skew','acc_x_fft_max','acc_xg_time_zero_small',\
#  'acc_x_time_sum','acc_yg_time_percent_1','acc_zg_time_percent_1','acc_x_time_mad','acc_allg_fft_maxind','acc_yg_time_zcr','acc_all_time_percent_9','acc_zg_fft_dc']

In [16]:
X = df_train_test_features[df_train_test_features['flag']=='train'][cols].values
y = df_train_test_features[df_train_test_features['flag']=='train']['behavior_id'].values
X_test = df_train_test_features[df_train_test_features['flag']=='test'][cols].values

In [17]:
seed = 2020
folds = 5
kfold = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)

In [18]:
df_train_stacking = pd.DataFrame(np.zeros((X.shape[0],19)))
df_test_stacking = pd.DataFrame(np.zeros((X_test.shape[0],19)))

# 训练模型

In [19]:
for train_index, val_index in tqdm(kfold.split(X, y)):

    print('--------------- begin ---------------')
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_val = lgb.Dataset(X_val, y_val)
    
    watchlist = [lgb_train, lgb_val]
    model = lgb.train(params,
                      train_set = lgb_train, 
                      num_boost_round = 5000,
                      valid_sets = watchlist,
                      verbose_eval = 30,
                      early_stopping_rounds = 80)
    
    
    X_val_predict = model.predict(X_val)
    X_test_predict = model.predict(X_test)
    
    df_train_stacking.loc[val_index,:] = X_val_predict
    df_test_stacking[:] += X_test_predict / folds

0it [00:00, ?it/s]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.76677	valid_1's multi_logloss: 1.16278
[60]	training's multi_logloss: 0.329071	valid_1's multi_logloss: 0.919977
[90]	training's multi_logloss: 0.144444	valid_1's multi_logloss: 0.843266
[120]	training's multi_logloss: 0.0612204	valid_1's multi_logloss: 0.815793
[150]	training's multi_logloss: 0.0260105	valid_1's multi_logloss: 0.814918
[180]	training's multi_logloss: 0.0115914	valid_1's multi_logloss: 0.824688
[210]	training's multi_logloss: 0.00624876	valid_1's multi_logloss: 0.832485
Early stopping, best iteration is:
[137]	training's multi_logloss: 0.0375044	valid_1's multi_logloss: 0.811415


1it [00:19, 19.62s/it]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.763686	valid_1's multi_logloss: 1.18034
[60]	training's multi_logloss: 0.326748	valid_1's multi_logloss: 0.941417
[90]	training's multi_logloss: 0.141441	valid_1's multi_logloss: 0.866489
[120]	training's multi_logloss: 0.0599412	valid_1's multi_logloss: 0.840212
[150]	training's multi_logloss: 0.0252164	valid_1's multi_logloss: 0.843292
[180]	training's multi_logloss: 0.0112019	valid_1's multi_logloss: 0.854115
[210]	training's multi_logloss: 0.00612213	valid_1's multi_logloss: 0.86391
Early stopping, best iteration is:
[133]	training's multi_logloss: 0.0410749	valid_1's multi_logloss: 0.837221


2it [00:38, 19.35s/it]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.769102	valid_1's multi_logloss: 1.14692
[60]	training's multi_logloss: 0.331577	valid_1's multi_logloss: 0.912118
[90]	training's multi_logloss: 0.144118	valid_1's multi_logloss: 0.834738
[120]	training's multi_logloss: 0.0603231	valid_1's multi_logloss: 0.802729
[150]	training's multi_logloss: 0.0256475	valid_1's multi_logloss: 0.79923
[180]	training's multi_logloss: 0.0114695	valid_1's multi_logloss: 0.807545
[210]	training's multi_logloss: 0.00628646	valid_1's multi_logloss: 0.817362
Early stopping, best iteration is:
[143]	training's multi_logloss: 0.031273	valid_1's multi_logloss: 0.798945


3it [00:58, 19.56s/it]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.763589	valid_1's multi_logloss: 1.12729
[60]	training's multi_logloss: 0.328908	valid_1's multi_logloss: 0.885574
[90]	training's multi_logloss: 0.142988	valid_1's multi_logloss: 0.805591
[120]	training's multi_logloss: 0.060292	valid_1's multi_logloss: 0.775278
[150]	training's multi_logloss: 0.0256646	valid_1's multi_logloss: 0.770893
[180]	training's multi_logloss: 0.0115178	valid_1's multi_logloss: 0.774828
[210]	training's multi_logloss: 0.00630935	valid_1's multi_logloss: 0.785052
Early stopping, best iteration is:
[138]	training's multi_logloss: 0.0360024	valid_1's multi_logloss: 0.770265


4it [01:17, 19.37s/it]

--------------- begin ---------------
Training until validation scores don't improve for 80 rounds
[30]	training's multi_logloss: 0.762702	valid_1's multi_logloss: 1.13731
[60]	training's multi_logloss: 0.325538	valid_1's multi_logloss: 0.902051
[90]	training's multi_logloss: 0.140084	valid_1's multi_logloss: 0.824046
[120]	training's multi_logloss: 0.0585444	valid_1's multi_logloss: 0.796148
[150]	training's multi_logloss: 0.0246705	valid_1's multi_logloss: 0.792106
[180]	training's multi_logloss: 0.0111031	valid_1's multi_logloss: 0.802726
[210]	training's multi_logloss: 0.00613656	valid_1's multi_logloss: 0.816261
Early stopping, best iteration is:
[149]	training's multi_logloss: 0.0253497	valid_1's multi_logloss: 0.791796


5it [01:35, 19.12s/it]


# 查看结果

In [20]:
df_test_stacking.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,0.000531,0.002114,0.000187,0.000353,0.000669,0.000414,0.000591,0.000341,9.4e-05,0.000425,0.000478,0.001286,0.603442,0.353553,0.001931,0.009775,0.00022,0.000613,0.022982
1,0.032935,0.044421,0.000147,0.789258,0.003088,0.001423,0.117586,0.000372,0.000123,0.003642,0.000403,0.003372,0.000334,0.000117,0.000165,0.000434,9.7e-05,0.000244,0.001839
2,0.002602,0.378148,0.001776,0.166792,0.001681,0.011488,0.260321,0.002623,0.001237,0.033703,0.013253,0.08051,0.001413,0.005118,0.000332,0.035878,0.000699,0.000597,0.001829
3,0.07436,0.478991,0.000239,0.06641,0.00047,0.002221,0.347786,0.000649,0.000257,0.017134,0.000248,0.005302,0.000464,0.000542,0.000116,0.00119,0.000199,0.002564,0.000857
4,0.000977,0.00082,0.000233,0.00069,0.001141,0.000728,0.00034,0.00079,0.000611,0.000216,0.00034,0.000541,0.006021,0.284782,0.000248,0.007963,0.002175,0.005824,0.685562


# 模型得分和保存

In [21]:
def acc_combo(y, y_pred):
    # 数值ID与行为编码的对应关系
    mapping = {0: 'A_0', 1: 'A_1', 2: 'A_2', 3: 'A_3', 
        4: 'D_4', 5: 'A_5', 6: 'B_1',7: 'B_5', 
        8: 'B_2', 9: 'B_3', 10: 'B_0', 11: 'A_6', 
        12: 'C_1', 13: 'C_3', 14: 'C_0', 15: 'B_6', 
        16: 'C_2', 17: 'C_5', 18: 'C_6'}
    # 将行为ID转为编码
    code_y, code_y_pred = mapping[y], mapping[y_pred]
    if code_y == code_y_pred: #编码完全相同得分1.0
        return 1.0
    elif code_y.split("_")[0] == code_y_pred.split("_")[0]: #编码仅字母部分相同得分1.0/7
        return 1.0/7
    elif code_y.split("_")[1] == code_y_pred.split("_")[1]: #编码仅数字部分相同得分1.0/3
        return 1.0/3
    else:
        return 0.0

In [22]:
labels = np.argmax(df_test_stacking.values, axis=1)
pred_y = np.argmax(df_train_stacking.values, axis=1)


acc_scores = round(accuracy_score(y, pred_y), 5)
acc_combo_scores = round(sum(acc_combo(y_true, y_pred) for y_true, y_pred in zip(y, pred_y)) / len(list(y)),5)

print('--------')
print(' acc : ', acc_scores, 'acc_combo : ', acc_combo_scores)

df_out = df_train_test_features[df_train_test_features['flag']=='test'][['fragment_id']]
df_out['behavior_id'] = labels
df_out.to_csv('./submit_lgb_%.5f_%.5f.csv' % (acc_scores, acc_combo_scores), index=False)

--------
 acc :  0.73409 acc_combo :  0.77408
