In [1]:
import json, time, os, gc, math
import datetime
from tqdm import tqdm 
import numpy as np 
import pandas as pd 
import tensorflow as tf 
import keras
import keras.backend as K  
from keras.layers import *
from keras.models import Model
from keras.optimizers import Adam
from keras.utils import to_categorical, multi_gpu_model
from keras.callbacks import *
from keras.preprocessing.sequence import pad_sequences
# from gensim.models import word2vec 
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, KFold, cross_val_score, StratifiedKFold
# from utils.opt import AdamW, RAdam
import logging

os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
gpus = os.environ['CUDA_VISIBLE_DEVICES'].split(",")

Using TensorFlow backend.


In [3]:
# !pip install keras==2.2.4
# gpus

In [2]:
!free -m
!nvidia-smi

              total        used        free      shared  buff/cache   available
Mem:         112298        2650       79825          18       29821      109471
Swap:             0           0           0
Sun Jul 19 02:37:16 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.67       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P40           On   | 00000000:00:08.0 Off |                    0 |
| N/A   34C    P8     9W / 250W |      0MiB / 22919MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P40           On   | 00000000:00:09.0 Off |                    0 |
| N/A   35C    P8    11W / 250W |      0MiB 

In [3]:
maxlen = 150
w2v_size = 300
batch_size = 256
random_seed = 2020
drop_p = 0.20
sp_drop = 0.20
l2_rate = 1e-5
model_name = 'bilstm_attr_v1'

In [4]:
%%time

file_name = "data/log/"+datetime.date.today().strftime('%m%d')+"_{}.log".format(model_name)
def write_log(w):
    t0 = datetime.datetime.now().strftime('%H:%M:%S')
    info = "{} : {}\n".format(t0, w)
    print(info)
    with open(file_name, 'a') as f:
        f.write(info)
        f.write("-"*80+"\n")
write_log(model_name)

creative_id_emb_matrix = np.load("data/cid_weight_300d.npy")
ad_id_emb_matrix = np.load("data/adid_weight_300d.npy")
product_id_emb_matrix = np.load("data/pid_weight_300d.npy")
advertiser_id_emb_matrix = np.load("data/ader_weight_300d.npy")

train_cid = np.load("data/nn_data/train_cid.npy")
train_ader = np.load("data/nn_data/train_ader.npy")
train_adid = np.load("data/nn_data/train_adid.npy")
train_pid = np.load("data/nn_data/train_pid.npy")
train_pcat = np.load("data/nn_data/train_pcat.npy")
train_ind = np.load("data/nn_data/train_ind.npy")

train_age = np.load("data/nn_data/train_age.npy")
write_log("data loaded")

02:37:23 : bilstm_attr_v1

02:39:37 : data loaded

CPU times: user 52 ms, sys: 14 s, total: 14 s
Wall time: 2min 13s


In [5]:
%%time

train_cid = train_cid.astype(np.int32)
train_ader = train_ader.astype(np.int32)
train_adid = train_adid.astype(np.int32)
train_pid = train_pid.astype(np.int32)
train_pcat = train_pcat.astype(np.int32)
train_ind = train_ind.astype(np.int32)

train_age = train_age.astype(np.int32)

CPU times: user 1.96 s, sys: 4.68 s, total: 6.64 s
Wall time: 6.66 s


In [6]:
# train_cid = train_cid[:, :140]
# train_ader = train_ader[:, :140]
# train_adid = train_adid[:, :140]
# train_pid = train_pid[:, :140]
# train_pcat = train_pcat[:, :140]
# train_ind = train_ind[:, :140]

In [10]:
!free -m

              total        used        free      shared  buff/cache   available
Mem:         112298       22418       51019          18       38860       89703
Swap:             0           0           0


In [7]:
splits = list(KFold(n_splits=5, shuffle=True, random_state=2020666).split(train_cid, train_age))

In [8]:
for idx, (trn_idx, val_idx) in enumerate(splits):
    print(trn_idx, val_idx)

[      1       4       5 ... 2999996 2999997 2999999] [      0       2       3 ... 2999987 2999989 2999998]
[      0       1       2 ... 2999997 2999998 2999999] [      5       8      11 ... 2999977 2999979 2999981]
[      0       1       2 ... 2999994 2999995 2999998] [      6       7       9 ... 2999996 2999997 2999999]
[      0       1       2 ... 2999997 2999998 2999999] [     15      25      26 ... 2999993 2999994 2999995]
[      0       2       3 ... 2999997 2999998 2999999] [      1       4      10 ... 2999983 2999986 2999990]


In [8]:
kern_reg = regularizers.l2(l2_rate)
class Attention(Layer):
    """多头自注意力机制
    """
    def __init__(self, nb_head, size_per_head, **kwargs):
        self.nb_head = nb_head
        self.size_per_head = size_per_head
        self.out_dim = nb_head * size_per_head
        super(Attention, self).__init__(**kwargs)
    def build(self, input_shape):
        super(Attention, self).build(input_shape)
        q_in_dim = input_shape[0][-1]
        k_in_dim = input_shape[1][-1]
        v_in_dim = input_shape[2][-1]
        self.q_kernel = self.add_weight(name='q_kernel',
                                        shape=(q_in_dim, self.out_dim),
                                        regularizer= kern_reg,
                                        initializer='glorot_normal')
        self.k_kernel = self.add_weight(name='k_kernel',
                                        shape=(k_in_dim, self.out_dim),
                                        regularizer= kern_reg,
                                        initializer='glorot_normal')
        self.v_kernel = self.add_weight(name='w_kernel',
                                        shape=(v_in_dim, self.out_dim),
                                        regularizer= kern_reg,
                                        initializer='glorot_normal')
    def mask(self, x, mask, mode='mul'):
        if mask is None:
            return x
        else:
            for _ in range(K.ndim(x) - K.ndim(mask)):
                mask = K.expand_dims(mask, K.ndim(mask))
            if mode == 'mul':
                return x * mask
            else:
                return x - (1 - mask) * 1e10
    def call(self, inputs):
        q, k, v = inputs[:3]
        v_mask, q_mask = None, None
        if len(inputs) > 3:
            v_mask = inputs[3]
            if len(inputs) > 4:
                q_mask = inputs[4]
        # 线性变换
        qw = K.dot(q, self.q_kernel)
        kw = K.dot(k, self.k_kernel)
        vw = K.dot(v, self.v_kernel)
        # 形状变换
        qw = K.reshape(qw, (-1, K.shape(qw)[1], self.nb_head, self.size_per_head))
        kw = K.reshape(kw, (-1, K.shape(kw)[1], self.nb_head, self.size_per_head))
        vw = K.reshape(vw, (-1, K.shape(vw)[1], self.nb_head, self.size_per_head))
        # 维度置换
        qw = K.permute_dimensions(qw, (0, 2, 1, 3))
        kw = K.permute_dimensions(kw, (0, 2, 1, 3))
        vw = K.permute_dimensions(vw, (0, 2, 1, 3))
        # Attention
        a = K.batch_dot(qw, kw, [3, 3]) / self.size_per_head**0.5
        a = K.permute_dimensions(a, (0, 3, 2, 1))
        a = self.mask(a, v_mask, 'add')
        a = K.permute_dimensions(a, (0, 3, 2, 1))
        a = K.softmax(a)
        # 完成输出
        o = K.batch_dot(a, vw, [3, 2])
        o = K.permute_dimensions(o, (0, 2, 1, 3))
        o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))
        o = self.mask(o, q_mask, 'mul')
        return o
    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], input_shape[0][1], self.out_dim)

In [10]:
kern_reg = regularizers.l2(l2_rate)
def get_model(input_len, emb_size, drop_p=0.2, sp_drop=0.2): 
    cid = Input(shape=(input_len, ), name='cid')
    adid = Input(shape=(input_len, ), name='adid')
    aderid = Input(shape=(input_len, ), name='aderid')
    prodid = Input(shape=(input_len, ), name='prodid')
    # timeid = Input(shape=(input_len, ), name='timeid')
    pcid = Input(shape=(input_len, ), name='pcid')
    inid = Input(shape=(input_len, ), name='inid')
    
#     clk_times = Input(shape=(input_len, ), name='clk_times')
    
    emb0 = Embedding(creative_id_emb_matrix.shape[0], creative_id_emb_matrix.shape[1],
                      weights=[creative_id_emb_matrix], trainable=False)(cid)
    emb1 = Embedding(ad_id_emb_matrix.shape[0], ad_id_emb_matrix.shape[1],
                      weights=[ad_id_emb_matrix], trainable=False)(adid)
    emb2 = Embedding(advertiser_id_emb_matrix.shape[0], advertiser_id_emb_matrix.shape[1],
                      weights=[advertiser_id_emb_matrix], trainable=False)(aderid)
    emb3 = Embedding(product_id_emb_matrix.shape[0], product_id_emb_matrix.shape[1],
                      weights=[product_id_emb_matrix], trainable=False)(prodid)
    
#     emb4 = Embedding(92, 18, embeddings_regularizer=kern_reg)(timeid)
    emb5 = Embedding(20, 50, embeddings_regularizer=kern_reg)(pcid)
    emb6 = Embedding(338, 100, embeddings_regularizer=kern_reg)(inid)
    
    sdrop = SpatialDropout1D(sp_drop)
    x = Concatenate()([sdrop(emb0), sdrop(emb1), sdrop(emb2), sdrop(emb3), sdrop(emb5), sdrop(emb6)])
    
    # x = Dropout(drop_p)(Bidirectional(CuDNNGRU(128, return_sequences=True))(x))
    
    x = Dropout(drop_p)(Bidirectional(LSTM(256, return_sequences=True))(x))
#     x2 = Dropout(drop_p)(Bidirectional(CuDNNLSTM(128, return_sequences=True))(x1))
#     x3 = Dropout(drop_p)(Bidirectional(CuDNNLSTM(128, return_sequences=True))(x2))
#     x = Add()([x1, x3])
#     x = Dropout(drop_p)(x)
    # x2 = Dropout(drop_p)(Bidirectional(CuDNNGRU(50, return_sequences=True))(x))
    # x = Concatenate()([x1, x2])
    #mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(cid)
    #x = Attention(8, 32)([x, x, x, mask])
    x = TimeDistributed(Dense(256, activation="tanh"))(x)
    x_max = Lambda(lambda x: K.max(x, axis=1), output_shape=(256,))(x)
    x_avg = Lambda(lambda x: K.mean(x, axis=1), output_shape=(256,))(x)
    
    dnn_input = Concatenate()([x_max, x_avg])
    output = Dropout(0.2)(Dense(512, activation='relu')(dnn_input))
    output = Dense(256, activation='relu')(output)
    pred = Dense(10, activation="softmax")(output)
    
    return Model(inputs=[cid, aderid, adid, prodid, pcid, inid], outputs=pred)

# 加载测试集数据

In [11]:
%%time

test_cid = np.load("data/nn_data/test_cid.npy")
test_ader = np.load("data/nn_data/test_ader.npy")
test_adid = np.load("data/nn_data/test_adid.npy")
test_pid = np.load("data/nn_data/test_pid.npy")
test_pcat = np.load("data/nn_data/test_pcat.npy")
test_ind = np.load("data/nn_data/test_ind.npy")

# test_cid = test_cid[:, :140]
# test_ader = test_ader[:, :140]
# test_adid = test_adid[:, :140]
# test_pid = test_pid[:, :140]
# test_pcat = test_pcat[:, :140]
# test_ind = test_ind[:, :140]

print(test_cid.shape, test_pid.shape, test_ind.shape)

(1000000, 150) (1000000, 150) (1000000, 150)
CPU times: user 24 ms, sys: 2.29 s, total: 2.32 s
Wall time: 22.9 s


In [12]:
train_age = keras.utils.to_categorical(train_age)
print(train_age.shape)

pred_test_age = np.zeros((test_cid.shape[0],10))
oof_train = np.zeros((train_cid.shape[0], 10))

(3000000, 10)


In [13]:
!free -m

              total        used        free      shared  buff/cache   available
Mem:         112298       26237       32855          18       53205       85884
Swap:             0           0           0


In [None]:
for idx, (trn_idx, val_idx) in enumerate(splits):
 
    write_log("fold : {}".format(idx))
    K.clear_session()

    with tf.device('/cpu:0'):
        model = get_model(maxlen, w2v_size, drop_p, sp_drop)
        # print(model.summary())
        
    if len(gpus)>=2:
        model = multi_gpu_model(model, gpus=len(gpus))
    model.compile(loss='categorical_crossentropy', optimizer=Adam(0.001), metrics=['accuracy'])
    
    filepath = "save_model/age_lstm_fold_{}.h5".format(idx+1)
    checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max', save_weights_only=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=1, min_lr=0.00005, verbose=1)
    earlystopping = EarlyStopping(monitor='val_acc', min_delta=0.0001, patience=2, verbose=1, mode='max')
    callbacks = [checkpoint, reduce_lr, earlystopping]
    
    hist = model.fit([train_cid[trn_idx], train_ader[trn_idx], train_adid[trn_idx], 
                      train_pid[trn_idx], train_pcat[trn_idx], train_ind[trn_idx]], train_age[trn_idx], 
                     validation_data=([train_cid[val_idx], train_ader[val_idx], train_adid[val_idx],
                                        train_pid[val_idx], train_pcat[val_idx], train_ind[val_idx]], train_age[val_idx]),
                     epochs=40, batch_size=512, callbacks=callbacks, verbose=1)
    write_log(str(hist.history))
    
    model.load_weights("save_model/age_lstm_fold_{}.h5".format(idx+1))
    oof_train[val_idx] = model.predict([train_cid[val_idx], train_ader[val_idx], train_adid[val_idx],
                                        train_pid[val_idx], train_pcat[val_idx], train_ind[val_idx]], batch_size=1024, verbose=1)
    per_pred = model.predict([test_cid, test_ader, test_adid, test_pid, test_pcat, test_ind], 
                             batch_size=2048, verbose=1)
    
    pred_test_age += per_pred / len(splits)
#     break

np.save("data/nn_data/oof_train_age_lstm.npy", oof_train)
np.save("data/nn_data/pred_test_age_lstm.npy", pred_test_age)

02:42:17 : fold : 0












Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 2400000 samples, validate on 600000 samples
Epoch 1/40

Epoch 00001: val_acc improved from -inf to 0.48673, saving model to save_model/age_lstm_fold_1.h5
Epoch 2/40

Epoch 00002: val_acc improved from 0.48673 to 0.49577, saving model to save_model/age_lstm_fold_1.h5
Epoch 3/40

Epoch 00003: val_acc improved from 0.49577 to 0.49950, saving model to save_model/age_lstm_fold_1.h5
Epoch 4/40

Epoch 00004: val_acc improved from 0.49950 to 0.50195, saving model to save_model/age_lstm_fold_1.h5
Epoch 5/40

Epoch 00005: val_acc improved from 0.50195 to 0.50257, saving model to save_model/age_lstm_fold_1.h5
Epoch 6/40

Epoch 00006: val_acc did not improve from 0.50257

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.000500000023748725

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00009: val_acc improved from 0.50836 to 0.50888, saving model to save_model/age_lstm_fold_1.h5
Epoch 10/40
 522240/2400000 [=====>........................] - ETA: 34:26 - loss: 1.1453 - acc: 0.5206

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00010: val_acc did not improve from 0.50888

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 11/40

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00011: val_acc improved from 0.50888 to 0.51106, saving model to save_model/age_lstm_fold_1.h5
Epoch 12/40

Epoch 00012: val_acc did not improve from 0.51106

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 13/40

# 加载模型并测试

In [None]:
for idx, (trn_idx, val_idx) in enumerate(splits):

    write_log("fold : {}".format(idx))
    K.clear_session()

    with tf.device('/cpu:0'):
        model = get_model(maxlen, w2v_size, drop_p, sp_drop)
        # print(model.summary())
        
    if len(gpus)>=2:
        model = multi_gpu_model(model, gpus=len(gpus))
    model.compile(loss='categorical_crossentropy', optimizer=Adam(0.001), metrics=['accuracy'])
    
#     filepath = "best_model_bilstm_fold_{}.h5".format(idx+1)
#     checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max', save_weights_only=True)
#     reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=1, min_lr=0.00005, verbose=1)
#     earlystopping = EarlyStopping(monitor='val_acc', min_delta=0.0001, patience=2, verbose=1, mode='max')
#     callbacks = [checkpoint, reduce_lr, earlystopping]
    
#     hist = model.fit([train_cid[trn_idx], train_ader[trn_idx], train_adid[trn_idx], 
#                       train_pid[trn_idx], train_pcat[trn_idx], train_ind[trn_idx]], train_age[trn_idx], 
#                      validation_data=([train_cid[val_idx], train_ader[val_idx], train_adid[val_idx],
#                                         train_pid[val_idx], train_pcat[val_idx], train_ind[val_idx]], train_age[val_idx]),
#                      epochs=40, batch_size=512, callbacks=callbacks, verbose=1)
#     write_log(str(hist.history))
    
    model.load_weights("best_model_bilstm_fold_{}.h5".format(idx+1))
    oof_train[val_idx] = model.predict([train_cid[val_idx], train_ader[val_idx], train_adid[val_idx],
                                        train_pid[val_idx], train_pcat[val_idx], train_ind[val_idx]], batch_size=1024, verbose=1)
    per_pred = model.predict([test_cid, test_ader, test_adid, test_pid, test_pcat, test_ind], 
                             batch_size=1024, verbose=1)
    
    pred_test_age += per_pred / len(splits)
# #     break

np.save("data/nn_data/oof_train.npy", oof_train)
np.save("data/nn_data/pred_test_age.npy", pred_test_age)

01:36:34 : fold : 0












Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


01:42:27 : fold : 1

01:48:19 : fold : 2

01:54:11 : fold : 3

02:00:03 : fold : 4

02:05:55 : fold : 5

02:11:45 : fold : 6

02:17:35 : fold : 7

02:23:28 : fold : 8


# 后处理

In [None]:
# ```PYTHON
####opt
class_num=10
weights = [1.0]*class_num

def search_weight(valid_y, raw_prob, init_weight=[1.0]*class_num, step=0.001):
    weight = init_weight.copy()
    f_best = accuracy_score(y_true=valid_y, y_pred=raw_prob.argmax(axis=1))
    flag_score = 0
    round_num = 1
    while(flag_score != f_best):
        print("round: ", round_num)
        round_num += 1
        flag_score = f_best
        for c in range(class_num):
            for n_w in range(0, 2000,10):
                num = n_w * step
                new_weight = weight.copy()
                new_weight[c] = num

                prob_df = raw_prob.copy()
                prob_df = prob_df * np.array(new_weight)

                f = accuracy_score(y_true=valid_y, y_pred=prob_df.argmax(axis=1))
                if f > f_best:
                    weight = new_weight.copy()
                    f_best = f
                    print(f)
    return weight

weight = search_weight(train_age.argmax(-1), oof_train)
print(weight)

round:  1
0.5123206666666666
0.512323
0.5123246666666667
0.5123336666666667
0.5123363333333333
0.512339
0.512346
0.5123476666666666
0.512348
0.5123603333333333
0.5123643333333333
0.5123766666666667
0.512384
0.5123863333333334
0.5123893333333334
0.512392
0.512398
round:  2
0.5124043333333334
round:  3


In [6]:
train_age = np.load("data/nn_data/train_age.npy")
oof_train = np.load("data/nn_data/oof_train.npy")
pred_test_age = np.load("data/nn_data/pred_test_age.npy")

In [7]:
train_age

array([3, 9, 6, ..., 3, 2, 7], dtype=int32)

In [9]:
from scipy import optimize 

In [10]:
def fun(weight):
    tmp = oof_train * weight
    return -accuracy_score(train_age, np.argmax(tmp, axis=1))

res = optimize.fmin_powell(fun, np.asarray((0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), disp=True)

Optimization terminated successfully.
         Current function value: -0.512440
         Iterations: 2
         Function evaluations: 335


In [11]:
res

array([2.57191034, 2.76221778, 2.69419709, 2.69762371, 2.69795621,
       2.59238292, 2.61827807, 2.55065754, 2.55769297, 2.35621343])

In [13]:
print(accuracy_score(train_age, oof_train.argmax(-1)))
weight1 = np.array([0.96, 1.03, 1.0, 1.0, 1.0, 0.99, 1.0, 0.99, 1.0, 0.88])

print(accuracy_score(train_age, (weight1 * oof_train).argmax(-1)))

print(accuracy_score(train_age, (oof_train * res).argmax(-1)))

0.512318
0.5124043333333334
0.51244


In [16]:
# weight = np.array([0.96, 1.03, 1.0, 1.0, 1.0, 0.99, 1.0, 0.99, 1.0, 0.88])
weight = np.array([0.82, 0.97, 1.0, 1.0, 1.0, 1.0, 1.0, 0.88, 0.97, 1.01])
pred_test_age = pred_test_age * weight

3

In [14]:
test_pred = model.predict([test_cid, test_ader, test_pid], batch_size=512, verbose=1)



In [14]:
age = np.argmax(res * pred_test_age, axis=1) + 1
test_submit = pd.read_csv("data/submit/post_process_2.csv")
test_submit['predicted_age'] = age
test_submit['predicted_gender'] += 5
#np.save(filename+".npy", pred_test_age)
test_submit.to_csv("data/submit/B_bilstm_10fold_post2.csv", header=True, index=None)

# 融合

In [2]:
import pandas as pd
import numpy as np

test_age_pred_zhan = np.load("data/submit/age.npy")
test_gender_pred = np.load("data/submit/gender.npy")
test_age_pred_cong = np.load("data/nn_data/pred_test_age.npy")

In [3]:
test_gender_pred = (test_gender_pred > 0.5) + 1
test_gender_pred = test_gender_pred.reshape(-1)

In [4]:
test_gender_pred[:10]

array([1, 2, 2, 1, 1, 1, 1, 1, 1, 2])

In [5]:
test_age_pred = test_age_pred_zhan * 0.5 + test_age_pred_cong * 0.5 

In [6]:
age = np.argmax(test_age_pred, axis=1) + 1
test_submit = pd.read_csv("data/submit/post_process_2.csv")
test_submit['predicted_age'] = age
test_submit['predicted_gender'] = test_gender_pred
#np.save(filename+".npy", pred_test_age)
test_submit.to_csv("data/submit/B_bilstm_10fold_ronghe.csv", header=True, index=None)