In [None]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
import time
from tqdm import tqdm

In [None]:
def softmax(x):
    result=[]
    for line in x:
        result.append(np.exp(line)/np.sum(np.exp(line),axis=0))
    return np.array(result)
def compute_qauc(df,pred):
    aucs=[]
    df['pred']=pred
    query_ids=df['query_id'].unique()
    for query_id in query_ids:
        tmp_df=df[df['query_id']==query_id]
        label=tmp_df['label'].values
        logit=tmp_df['pred'].values
        if 0 in label and 1 in label:
            auc=roc_auc_score(label,logit)
        else:
            auc=0.5
        aucs.append(auc)
    return np.mean(aucs)

In [None]:
emb_size=128
EPOCHS=10
batch_size=128

In [None]:
class Model(object):
    def __init__(self):
        self.query=tf.placeholder(tf.int32,[None,20],name='query')
        self.title=tf.placeholder(tf.int32,[None,30],name='title')
        self.feature=tf.placeholder(tf.float32,[None,15],name='feature')
        self.y=tf.placeholder(tf.int32,[None,],name='y')

        with tf.variable_scope('EMB'):
            emb=tf.get_variable('emb',shape=[len(vocab),emb_size],initializer=tf.contrib.layers.xavier_initializer())
            query_emb=tf.nn.embedding_lookup(emb,self.query)
            title_emb=tf.nn.embedding_lookup(emb,self.title)
        with tf.variable_scope('GRU'):
            bi_gru=Bidirectional(GRU(256,return_sequences=True))
            query_gru=bi_gru(query_emb)
            title_gru=bi_gru(title_emb)
        with tf.variable_scope('Dense'):
            feature_dense1=Dense(256,activation='relu')(self.feature)
            feature=Dense(256,activation='relu')(feature_dense1)
            
            query_concat=tf.concat([query_emb,query_gru],axis=-1)
            title_concat=tf.concat([title_emb,title_gru],axis=-1)
            
            query_dense=Dense(256,activation='relu')(query_concat)
            title_dense=Dense(256,activation='relu')(title_concat)
            
            query_flatten=Flatten()(query_dense)
            title_flatten=Flatten()(title_dense)
        with tf.variable_scope('Concat'):
            final_concat=tf.concat([query_flatten,title_flatten,feature],axis=-1)
            final_batch=BatchNormalization()(final_concat)
            final_dense=Dense(256,activation='relu')(final_batch)
            self.logits=Dense(2)(final_dense)
        self.loss=tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(labels=self.y,logits=self.logits))
        self.train_step=tf.train.AdamOptimizer(0.0001).minimize(self.loss)
        self.prediction = tf.to_int32(tf.argmax(self.logits, axis=-1))
        self.correct_prediction=tf.cast(tf.equal(self.prediction, self.y), tf.float32)
        self.acc = tf.reduce_mean(self.correct_prediction)
        
    def fit(self,query_train,title_train,feature_train,y_train,query_valid,title_valid,feature_valid,y_valid):
        with tf.Session() as sess:
            #sess.run(tf.global_variables_initializer())
            saver=tf.train.Saver()
            saver.restore(sess,'model//./model_epoch0.ckpt')
            for epoch in range(1,EPOCHS):
                start_time=time.time()
                start=0
                for step in range(len(query_train)//batch_size):
                    feed_dict={
                        self.query:query_train[start:start+batch_size],
                        self.title:title_train[start:start+batch_size],
                        self.feature:feature[start:start+batch_size],
                        self.y:y_train[start:start+batch_size]
                    }
                   
                    _,loss,acc=sess.run([self.train_step,self.loss,self.acc],feed_dict=feed_dict)
                    if step%200==0:
                        valid_dict={
                            self.query:query_valid,
                            self.title:title_valid,
                            self.feature:feature_valid,
                            self.y:y_valid                           
                        }
                        valid_loss,valid_acc,logits=sess.run([self.loss,self.acc,self.logits],feed_dict=valid_dict)
                        logits=softmax(logits)
                        qauc=compute_qauc(df_valid_final_1w_ql,logits[:,1])
                        valid_auc=roc_auc_score(y_valid,logits[:,1])
                        print('auc:',valid_auc)
                        print('quc:',qauc)
                        print('epoch {0} step {1} train_loss {2} train_acc {3} valid_loss:{4} valid_acc {5}'.format(epoch,step,loss,acc,valid_loss,valid_acc))
                        
                    if (step+1)%5000==0:
                        saver.save(sess,'model//model_epoch{0}_step{1}.ckpt'.format(epoch,step+1))
                    start+=batch_size
                print("一个epoch用时{0}".format(time.time()-start_time))
                saver.save(sess,'model//model_epoch{0}.ckpt'.format(epoch))

tf.reset_default_graph()
model=Model()  
model.fit(query_train,title_train,feature_train,y_train,query_valid,title_valid,feature_valid,y_valid)

In [None]:
tf.reset_default_graph()
model=Model()
with tf.Session() as sess:
    saver=tf.train.Saver()
    saver.restore(sess,'model//./model_epoch0.ckpt')
    res=[]
    for start in tqdm(np.arange(0,len(test_title_data),10000)):
        test_feed_dict={
            model.query:test_query_data[start:start+10000],
            model.title:test_title_data[start:start+10000],
            model.feature:feature_test[start:start+10000]
        }
        res.extend(softmax(np.squeeze(sess.run([model.logits],feed_dict=test_feed_dict))))