In [1]:
import pandas as pd
import numpy as np
import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Dropout
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.optimizers import Adam
from keras import backend as K
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

from bert_serving.client import BertClient
bc = BertClient(check_length=False)

# set GPU memory
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)


# for cased restart server with 
# bert-serving-start -model_dir /share/ShareFolder/cased_L-24_H-1024_A-16/ -cased_tokenization -max_batch_size=32 -gpu_memory_fraction=0.9 -num_worker=1
# for uncased restart server with 
# bert-serving-start -model_dir /share/ShareFolder/cased_L-24_H-1024_A-16/ -max_batch_size=32 -gpu_memory_fraction=0.9 -num_worker=1

test_file_path = './dev.csv'

def get_keras_model():
    model = Sequential()
    model.add(Dense(units=200, activation='relu', input_dim=3072))
    model.add(Dropout(0.3))
    model.add(Dense(units=50, activation='relu', input_dim=3072))
    model.add(Dropout(0.3))
    model.add(Dense(units=1, activation='sigmoid'))
    # optimizer = Adam(lr=0.01)
    model.compile(loss=keras.losses.binary_crossentropy,
                  optimizer='adam', metrics=['accuracy'])

    model.load_weights("best_weights_sentence.hdf5")
    return model

def get_features(claim_list, evidence_list, pair_list):
    claim_features = bc.encode(claim_list)
    evidence_features = bc.encode(evidence_list)
    pair_features = bc.encode(pair_list)
    return np.concatenate([claim_features, evidence_features, pair_features], axis=1)
    
test_df = pd.read_csv(test_file_path)

claim_list = list(test_df['claim'].values)[0: 5]
evidence_list = list(test_df['evidence'].values)[0: 5]
pair_list = list(test_df['claim_evi_pair'].values)[0: 5]

feature_matrix = get_features(claim_list, evidence_list, pair_list)

keras_model = get_keras_model()
labels = keras_model.predict(feature_matrix, batch_size=32, verbose=1)
# y_test_predict = model.predict(x_test, batch_size=128, verbose=1)

Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [3]:
feature_matrix.shape

(5, 3072)

In [8]:
labels

array([[2.0837456e-01],
       [2.1317372e-01],
       [2.5552452e-02],
       [1.1413929e-01],
       [3.2497333e-05]], dtype=float32)

In [5]:
query_doc_sort_index = np.argsort(labels)

In [22]:
probabilities = []
for label in labels:
    probabilities.append(label[0])

In [23]:
probabilities

[0.20837456, 0.21317372, 0.025552452, 0.11413929, 3.2497333e-05]

In [29]:
query_doc_sort_index = np.argsort(probabilities)
query_doc_sort_index[::-1]

array([1, 0, 3, 2, 4])

In [30]:
query_doc_sort_index

array([4, 2, 3, 0, 1])