In [1]:
from utility import *

In [2]:
from gensim.models import Word2Vec

In [3]:
# Tensor Lambda Function
def get_last_elements(tensor) : 
    last_words = []
    for i in range(tensor.shape[0]) : 
        last_word_representation = tensor[i][-1]
        expanded = expand_dims(last_word_representation, axis=0)
        expanded = tensorflow.reshape(expanded, (30, 1))
        last_words.append(expanded)
    return tensorflow.convert_to_tensor(last_words)

In [4]:
batch_size = 1 
# for word_vec_size to change word2vec has to be trained for it
word_vec_size = 100

inp = Input(batch_shape=(batch_size, None, word_vec_size))
encoded1 = LSTM(30, return_sequences=True, activation='tanh')(inp)
encoded = Lambda(lambda x: get_last_elements(x))(encoded1)
convolved = Conv1D(32, 2, input_shape=(1, 30), activation='relu')(encoded)
pooled = MaxPooling1D(3, strides=3)(convolved)
flattened = Flatten()(pooled)
output_probabilities = Dense(8, activation='sigmoid')(flattened)
output_vector = Lambda(lambda x: x*8)(output_probabilities)
model = Model(inp, output_vector)
model.compile(loss='mean_squared_error', optimizer='sgd')

In [26]:
model_path = os.path.join(os.getcwd(), 'glassdoor_problem/model.h5')
model.load_weights(model_path)

In [6]:
wordvec_model = Word2Vec.load(os.path.join(os.getcwd(), 'glassdoor_problem/wordvecmodel'))

In [7]:
with open(os.path.join(os.getcwd(), 'glassdoor_problem/label_map.pkl'), 'rb') as f: 
    label_map = pickle.load(f)

reverse_label_map = {}
for label in label_map : 
    reverse_label_map[label_map[label]] = label

In [32]:
# functions to be used for making inference
def get_matrix_for_prediction(text) : 
    words = text.split(" ")
    words_array = [words]
    inp = get_word2vec_input_matrix(words_array, wordvec_model)
    return inp

def infer(model, text) : 
    cleaned_text = clean_text(text)
    m = get_matrix_for_prediction(cleaned_text[0])
    prediction = model.predict(m)
    all_prediction = prediction[0]
    labels_predicted_index = [i for i in range(len(all_prediction)) if all_prediction[i]>=4]
    labels = [reverse_label_map[index] for index in labels_predicted_index]
    return labels

In [19]:
label_map

{'salary_benefits': 0,
 'wlb_working_conditions': 1,
 'tech_product': 2,
 'culture_team': 3,
 'Job Security/Advancement': 4,
 'haras_discrim_sexism': 5,
 'management': 6,
 'business_vision_competitors': 7}

In [35]:
infer(model, "great work life balance")

['wlb_working_conditions']