In [11]:
import h5py
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers.recurrent import LSTM
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
#loading the weights from the saved file but for that we have to compile the model
#Compiling the model
vocabsize=9400
vector_embedding = 128
maxlen = 4
fixlen=3
model = Sequential()
model.add(LSTM(input_dim=vocabsize+1, output_dim=vector_embedding, input_length=fixlen, return_sequences=True))
model.add(Activation('sigmoid'))
model.add(Dropout(0.2))
model.add(LSTM(input_dim=vector_embedding, output_dim=vector_embedding, input_length=fixlen, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(vocabsize+1))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

#Loading the weights
directory ='/home/sinah/'
weights_path= directory + 'model_weight.hdf5'
f = h5py.File(weights_path)
for k in range(f.attrs['nb_layers']):
    if k ==0:
        g = f['layer_{}'.format(k)]
        weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
        model.layers[k].set_weights(weights)
        x=model.layers[k].get_weights()
        embedding = np.array(x[0])
    else:
        break
f.close()
print('Embedding Weights loaded.')

#Sigmoid transformation of weights and then to binary
top_words_count=50
embedding = 1/(1+np.exp(embedding*-1))
embedding_capped_with_max_words=np.zeros((top_words_count,len(embedding[0])))
for rows in range(0,top_words_count):
    for cols in range(0,len(embedding[0])):
        if embedding[rows][cols]<.5:
        embedding_capped_with_max_words[rows][cols] = embedding[rows][cols]
        else:
            embedding_capped_with_max_words[rows][cols] = 1 

#Get the top 100 words in vocab  in word list
counter=0
text_open=open(directory + 'vocab.txt','r')
words=[]
for item in text_open:
    if counter < top_words_count:
        words.append(item.strip('\n'))
        counter = counter +1
    else:
        break

#Get TSNE representation of the words
model_tsne = TSNE(n_components=2, verbose=2, n_iter=200)
vis_data = model_tsne.fit_transform(embedding_capped_with_max_words)
print('vis_data  vector has shape', vis_data.shape)
# plot the result
vis_x = vis_data[:, 0]
vis_y = vis_data[:, 1]
fig,ax = plt.subplots()
ax.scatter(vis_x, vis_y)
for i, txt in enumerate(words):
    ax.annotate(txt, (vis_x[i],vis_y[i]))
plt.show()


Embedding Weights loaded.
[[  2.45648175e-02   7.99257401e-03  -2.18111295e-02 ...,   1.63101871e-02
   -1.44028785e-02  -1.49583386e-03]
 [ -1.31063275e-02  -2.12847479e-02   1.12108001e-03 ...,  -2.07275455e-03
    2.81009451e-03   2.07442325e-02]
 [ -3.03017162e-03   1.08668329e-02   7.30061950e-03 ...,   1.21660037e-02
   -1.80090684e-02  -2.37702504e-02]
 ..., 
 [ -1.23752560e-02  -3.11842444e-03  -5.39299054e-03 ...,  -4.30203881e-03
   -6.89352091e-05   1.83842462e-02]
 [ -2.26896722e-02  -1.37367304e-02  -1.84576660e-02 ...,   2.15333886e-02
   -5.92074776e-03  -8.16717930e-03]
 [  6.56852545e-03  -7.13132648e-03   3.10798245e-03 ...,   5.34924166e-03
    4.51841438e-03   1.36570046e-02]]
[t-SNE] Computing pairwise distances...
[t-SNE] Computing 49 nearest neighbors...
[t-SNE] Computed conditional probabilities for sample 50 / 50
[t-SNE] Mean sigma: 0.068787
[t-SNE] Iteration 25: error = 1.4110159, gradient norm = 0.0022629
[t-SNE] Iteration 50: error = 1.3349496, gradient norm