In [1]:
from utility import *

In [2]:
from gensim.models import Word2Vec

In [3]:
# Tensor Lambda Function
def get_last_elements(tensor) : 
    last_words = []
    for i in range(tensor.shape[0]) : 
        last_word_representation = tensor[i][-1]
        expanded = expand_dims(last_word_representation, axis=0)
        expanded = tensorflow.reshape(expanded, (30, 1))
        last_words.append(expanded)
    return tensorflow.convert_to_tensor(last_words)

In [4]:
batch_size = 1 
# for word_vec_size to change word2vec has to be trained for it
word_vec_size = 100

inp = Input(batch_shape=(batch_size, None, word_vec_size))
encoded1 = LSTM(30, return_sequences=True, activation='tanh')(inp)
encoded = Lambda(lambda x: get_last_elements(x))(encoded1)
convolved = Conv1D(32, 2, input_shape=(1, 30), activation='relu')(encoded)
pooled = MaxPooling1D(3, strides=3)(convolved)
flattened = Flatten()(pooled)
output_probabilities = Dense(8, activation='sigmoid')(flattened)
output_vector = Lambda(lambda x: x*8)(output_probabilities)
model = Model(inp, output_vector)
model.compile(loss='mean_squared_error', optimizer='sgd')

In [26]:
model_path = os.path.join(os.getcwd(), 'glassdoor_problem/model.h5')
model.load_weights(model_path)

In [6]:
wordvec_model = Word2Vec.load(os.path.join(os.getcwd(), 'glassdoor_problem/wordvecmodel'))

In [7]:
with open(os.path.join(os.getcwd(), 'glassdoor_problem/label_map.pkl'), 'rb') as f: 
    label_map = pickle.load(f)

reverse_label_map = {}
for label in label_map : 
    reverse_label_map[label_map[label]] = label

In [21]:
# functions to be used for making inference
def get_matrix_for_prediction(text) : 
    words = text.split(" ")
    words_array = [words]
    inp = get_word2vec_input_matrix(words_array, wordvec_model)
    return inp

def infer(model, text) : 
#     cleaned_text = clean_text(text)
    cleaned_text = [text]
    m = get_matrix_for_prediction(cleaned_text[0])
    prediction = model.predict(m)
    all_prediction = prediction[0]
    labels_predicted_index = [i for i in range(len(all_prediction)) if all_prediction[i]>=4]
    labels = [reverse_label_map[index] for index in labels_predicted_index]
    return labels

In [19]:
label_map

{'salary_benefits': 0,
 'wlb_working_conditions': 1,
 'tech_product': 2,
 'culture_team': 3,
 'Job Security/Advancement': 4,
 'haras_discrim_sexism': 5,
 'management': 6,
 'business_vision_competitors': 7}

In [28]:
with open('/Users/purushottamsinha/Desktop/glassdoor_problem/test_data.pkl', 'rb') as f: 
    test_data = pickle.load(f)

In [29]:
unilabel = [sen for sen in test_data if sum(sen[1])==8]

In [31]:
for sen in unilabel[0:50] : 
    prediction = infer(model, sen[0])
    actual_labels = [reverse_label_map[i] for i in range(len(sen[1])) if sen[1][i]>1]
    print(prediction)
    print(actual_labels)
    print("\n")

[[9.8086423e-07 2.2034757e-07 4.0903188e-08 7.4818487e-08 3.0470321e-05
  4.8173522e-03 6.8821711e-04 7.9810734e+00]]
['business_vision_competitors']
['business_vision_competitors']


[[9.6481040e-07 2.2757991e-05 9.8474584e-06 4.6060830e-02 2.9483456e-07
  2.7530132e-07 7.9999981e+00 3.4726869e-05]]
['management']
['management']


[[6.6178018e-06 2.6610176e-05 3.3394852e-05 2.9476743e-05 7.9996233e+00
  7.9174810e-05 1.4169108e-04 5.1230524e-02]]
['Job Security/Advancement']
['Job Security/Advancement']


[[2.5343683e-01 2.2295504e-08 7.9052383e-04 3.9381388e-04 7.7749972e+00
  1.4121895e-05 2.8316088e-05 3.5229421e-03]]
['Job Security/Advancement']
['Job Security/Advancement']


[[1.2311878e-04 2.5718839e-05 3.1269673e-04 2.0342845e-02 1.0188985e-05
  1.5215450e-05 7.9999990e+00 1.9427671e-05]]
['management']
['management']


[[3.2638827e-01 7.2412186e+00 5.7066223e-11 2.2484292e-09 7.2400463e-03
  7.9880778e-07 3.5032021e-12 1.1735444e-01]]
['wlb_working_conditions']
['wlb_working_c

KeyError: "word 'diver' not in vocabulary"