In [None]:
import numpy as np
import matplotlib.pylab as plt
import sys
import os

In [None]:
from attalos.imgtxt_algorithms.correlation.correlation import *

In [None]:
wordvecs_dir = os.environ["WORDVECS_DIR"]
dataset_dir = os.environ["DATASET_DIR"]
octave_eval_dir = os.environ["OCTAVE_EVAL_DIR"]
img_dir = os.environ.get("IMG_DIR", None)

In [None]:
# W2V Word Vectors
import word2vec
w2v_vector_file = os.path.join(wordvecs_dir, "text9Bvin.bin")
w2v_model = word2vec.load(w2v_vector_file)

In [None]:
w2v_model.glove.most_similar("airplane")

In [None]:
# GloVe Word Vectors
from glove import Glove
from attalos.imgtxt_algorithms.correlation.w2vwrapper import W2VWrapper
glove_model = Glove.load_stanford(os.path.join(wordvecs_dir, "glove.6B.200d.txt"))
w2v_model = W2VWrapper(glove_model)

In [None]:
len(w2v_model.vocab)

In [None]:
from oct2py import octave
octave.addpath(octave_eval_dir)

In [None]:
%matplotlib inline

---

# Load data

In [None]:
imdata = np.load(os.path.join(dataset_dir, "iaprtc-12/iaprtc12-inria.npz"))

In [None]:
imdata = np.load(os.path.join(dataset_dir, "espgame/espgame-inria.npz"))

In [None]:
xTr = imdata['xTr']
xTe = imdata['xTe']
yTr = imdata['yTr']
yTe = imdata['yTe']
D = imdata['D']
train_ims = imdata['trainlist']
test_ims = imdata['testlist']

In [None]:
for idx, row in enumerate(yTe):
    if row[1] == 1:
        print idx

In [None]:
"""
Images of planes: 
1147
1893
2323
2909
2930
3954
4229
4346
4363
4475
4581
4608
4663
4666
4696
4789
4960
5018
5121
5131
5156
5259
5424
5569
5638
5688
5696
5807
5825
5905
5967
6043
6060
6165
6202
6246
6286
6388
6582
6604
6639
6640
6705
6765
6791
7176
7240
7340
7343
7512
7870
7898
8043
8172
8173
8208
8244
8368
8387
8587
8894
8990
8992
9251
9392
9670
9732
9761
9859
9910
10532
10738
10796
10906
10932
11030
11149
11245
11332
11411
11604
11633
11696
11884
12060
12212
12213
12251
12813
13603
13604
13605
"""

In [None]:
print xTr.shape
print yTr.shape
print xTe.shape
print yTe.shape

---

# Filter out labels not in W2V vocabulary

In [None]:
to_remove = get_invalid_labels(D, w2v_model.vocab)

In [None]:
D = filter(lambda word: word not in to_remove.keys(), D)

In [None]:
yTr = np.delete(yTr, to_remove.values(), axis=1)

In [None]:
yTe = np.delete(yTe, to_remove.values(), axis=1)

In [None]:
print yTr.shape
print yTe.shape

In [None]:
orig_yTr = yTr
orig_yTe = yTe

---

# Construct correlation matrix

In [None]:
W_L = construct_W(w2v_model, D, dtype=np.float16)

In [None]:
correlation_arr = np.dot(W_L.T, W_L)

In [None]:
correlation_arr.shape

In [None]:
plt.stem(correlation_arr[1])

In [None]:
correlation_arr_normed = correlation_arr / np.linalg.norm(correlation_arr, 1)

In [None]:
plt.stem(correlation_arr_normed[1])

In [None]:
tmp = []
for row in correlation_arr_normed:
    n = 2
    sorted_idxs = [i for i in row.argsort()[::-1]]
    top_idxs = sorted_idxs[:n]
    bottom_idxs = sorted_idxs[-n:]
    z = np.zeros(row.shape)
    for idx in np.concatenate([top_idxs, bottom_idxs]):
        print "%s (%s)" % (idx, D[idx])
        z[idx] = row[idx]
    #v1 = w2v_model.get_vector(D[top_idxs[0]])
    #v2 = w2v_model.get_vector(D[top_idxs[1]])
    #print "correlation: %s" % np.dot(v1, v2)
    print "-------"
    tmp.append(z)
correlation_arr_normed_nonlinear = np.asarray(tmp)

In [None]:
plt.stem(correlation_arr_normed_nonlinear[1])

In [None]:
#correlation_arr_normed_nonlinear = nonlinearity(correlation_arr_normed) #np.power(correlation_arr_normed, 3)

In [None]:
#plt.stem(scale(correlation_arr_normed_nonlinear)[0])

In [None]:
final = scale2(correlation_arr_normed_nonlinear)

In [None]:
plt.stem(final[1])

---

# Transform yTr into correlation space

In [None]:
yTr = np.dot(orig_yTr, final)

In [None]:
print xTr.shape
print yTr.shape
print xTe.shape
print yTe.shape

In [None]:
img_idx = 5905

In [None]:
plt.imshow(plt.imread(os.path.join(dataset_dir, "iaprtc-12", "images", "images", "%s.jpg" % train_ims[img_idx])))

In [None]:
plt.stem(orig_yTr[img_idx])

In [None]:
plt.stem(yTr[img_idx])

In [None]:
filtered_yTr = np.copy(yTr)
for row_idx, row in enumerate(filtered_yTr):
    for idx, val in enumerate(row):
        if idx not in np.where(orig_yTr[row_idx]>0)[0]:
            row[idx] = 0

In [None]:
plt.stem(filtered_yTr[img_idx])

In [None]:
print [D[i] for i in np.where(orig_yTr[img_idx]>0)[0]]
print [D[i]
    for i in np.where(filtered_yTr[img_idx]>0)[0]]

In [None]:
yTr_scaled = scale2(orig_yTr) #scale2(filtered_yTr) #scale2(yTr)

In [None]:
nan_idxs = []
for idx, row in enumerate(yTr_scaled):
    if np.isnan(row).any():
        nan_idxs.append(idx)
print nan_idxs

In [None]:
for nan_idx in nan_idxs:
    for idx, val in enumerate(yTr_scaled[nan_idx]):
        yTr_scaled[nan_idx][idx] = 0

In [None]:
plt.stem(yTr_scaled[img_idx])

In [None]:
yTr = yTr_scaled

---

# Do linear regression

In [None]:
# sklearn's Linear Regression implementation
from sklearn import linear_model
n_jobs = -1 # -1 = all CPUs, default = 1
linreg_model = linear_model.LinearRegression(n_jobs=n_jobs)
linreg_model.fit(xTr, yTr)
yHat = linreg_model.predict(xTe)

In [None]:
# Spark MLLib's Linear Regression implementation
"""
from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD, LinearRegressionModel
from itertools import izip

def spark_linear_regression(xTr, yTr, xTe):
    for x, y in izip(xTr, yTr):
"""

In [None]:
# sklearn Polynomial Regression
"""
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
poly_degrees = 2
poly = PolynomialFeatures(degree=poly_degrees)
xTr_poly = poly.fit_transform(xTr)
xTe_poly = poly.fit_transform(xTe)
n_jobs = -1 # -1 = all CPUs, default = 1
linreg_model = linear_model.LinearRegression(n_jobs=n_jobs)
linreg_model.fit(xTr, yTr)
yHat = linreg_model.predict(xTe)
"""

---
## tflearn

In [None]:
# Tensorflow Linear Regression
from __future__ import absolute_import, division, print_function
import tflearn

# Regression data
X = [3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167,7.042,10.791,5.313,7.997,5.654,9.27,3.1]
Y = [1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221,2.827,3.465,1.65,2.904,2.42,2.94,1.3]

# Linear Regression graph
input_ = tflearn.input_data(shape=[None])
linear = tflearn.single_unit(input_)
regression = tflearn.regression(linear, optimizer='sgd', loss='mean_square',
                                metric='R2', learning_rate=0.01)
m = tflearn.DNN(regression)
m.fit(X, Y, n_epoch=1000, show_metric=True, snapshot_epoch=False)

print("\nRegression result:")
print("Y = " + str(m.get_weights(linear.W)) +
      ".X + " + str(m.get_weights(linear.b)))

print("\nTest prediction for y = 3.2 and y = 4.5:")
print(m.predict([3.2, 4.5]))

In [None]:
# Tensorflow Linear Regression 
from __future__ import absolute_import, division, print_function
import tflearn

# Linear Regression graph
input_ = tflearn.input_data(shape=[None, xTr.shape[1]])
labels = tf.placeholder(tf.float32, [None, yTr.shape[1]], name='labels')    
linear = tflearn.single_unit(input_)
regression = tflearn.regression(linear, placeholder=labels, optimizer='sgd', loss='mean_square',
                                metric='R2', learning_rate=0.01)
m = tflearn.DNN(regression)
m.fit(xTr, yTr, n_epoch=1000, show_metric=True, snapshot_epoch=False)

print("\nRegression result:")
print("Y = " + str(m.get_weights(linear.W)) +
      ".X + " + str(m.get_weights(linear.b)))

#print("\nTest prediction for y = 3.2 and y = 4.5:")
#print(m.predict([3.2, 4.5]))

---
## tensorflow.contrib.layers

In [None]:
def create_model(hidden_units, do_batch_norm=True, optimizer_cls=tf.train.AdamOptimizer, learning_rate=0.01):
    model_info = {}

    # Placeholders for data
    #model_info['input'] = tf.placeholder(shape=(None, xTr.shape[1]), dtype=tf.float32)
    model_info['input'] = tf.placeholder(shape=(None, hidden_units[0]), dtype=tf.float32)
    #model_info['truth'] = tf.placeholder(shape=(None, yTr.shape[1]), dtype=tf.float32)
    model_info['truth'] = tf.placeholder(shape=(None, hidden_units[-1]), dtype=tf.float32)

    layers = []
    #hidden_units = [xTr.shape[1], 200, 288]
    for i, hidden_size in enumerate(hidden_units):
        if i == 0:
            layer = tf.contrib.layers.relu(model_info['input'], hidden_size)
        else:
            layer = tf.contrib.layers.relu(layer, hidden_size)
        layers.append(layer)
        if do_batch_norm:
            layer = tf.contrib.layers.batch_norm(layer)
            layers.append(layer)
        if softmax:
            layer = tf.contrib.layers.

    model_info['layers'] = layers
    model_info['prediction'] = layer

    loss = tf.reduce_sum(tf.square(model_info['prediction'] - model_info['truth']))
    #loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(model_info['prediction'], model_info['truth'], name=None))
    model_info['loss'] = loss
    
    model_info['optimizer'] = optimizer_cls(learning_rate=learning_rate).minimize(loss)
    
    return model_info

In [None]:
def train_model(xTr, yTr, xTe, epochs=100, batch_size=128, learning_rate=0.01):
    num_items = xTr.shape[0]
    hidden_units = [xTr.shape[1], 1168, yTr.shape[1]]
    losses=[]
    
    with tf.Graph().as_default():
        model_info = create_model(hidden_units, learning_rate=learning_rate)
        
        init = tf.initialize_all_variables()
        saver = tf.train.Saver()
        config = tf.ConfigProto()
        config.gpu_options.allow_growth=True
        
        with tf.Session(config=config) as sess:
            sess.run(init)
            print('Starting Training (%s images total)' % xTr.shape[0])
            print("Architecture: %s" % hidden_units)
            
            num_batches_per_epoch = int(num_items/batch_size)
            for epoch in xrange(epochs):
                for batch in xrange(num_batches_per_epoch):
                    feed_dict = {}
                    feed_dict[model_info['input']] = xTr[batch*batch_size:(batch+1)*batch_size]
                    feed_dict[model_info['truth']] = yTr[batch*batch_size:(batch+1)*batch_size]

                    # Add things you need
                    sess.run(model_info['optimizer'], feed_dict=feed_dict)

                    if batch%100 == 0:
                        save_path = saver.save(sess, "/tmp/model.ckpt")
                        # Print extra things
                        loss = sess.run(model_info['loss'], feed_dict=feed_dict)
                        print('(Epoch {}) Completed batch {} of {} (loss: {})'.format(epoch, batch, num_batches_per_epoch, loss))
                        feed_dict = {}
                        feed_dict[model_info['input']] = xTe
                        yHat_output = sess.run(model_info['prediction'], feed_dict=feed_dict)
                        losses.append(loss)
                        
            [precision, recall, f1score] = octave.evaluate(yTe.T, yHat_output.T, 5)
            print("precision: " + str(precision))
            print("recall: " + str(recall))
            print("f1score: " + str(f1score))
    return [precision, recall, f1score, losses]

In [None]:
p, r, f1, losses = train_model(xTr, yTr, xTe, epochs=10000, batch_size=1000, learning_rate=0.05)
print p, r, f1
print losses

In [None]:
def train_model():
    epochs=100
    batch_size=128
    num_items = xTr.shape[0]
    losses=[]
    with tf.Graph().as_default():
        model_info = create_model()
        init = tf.initialize_all_variables()
        saver = tf.train.Saver()
        config = tf.ConfigProto()
        config.gpu_options.allow_growth=True
        with tf.Session(config=config) as sess:
            sess.run(init)
            print('Starting Training')
            for epoch in range(epochs):
                num_batches_per_epoch = int(num_items/batch_size)
                for batch in range(num_batches_per_epoch):
                    feed_dict = {}
                    feed_dict[model_info['input']] = xTr[batch*batch_size:(batch+1)*batch_size]
                    feed_dict[model_info['truth']] = yTr[batch*batch_size:(batch+1)*batch_size]

                    # Add things you need
                    sess.run(model_info['optimizer'], feed_dict=feed_dict)
                    if batch%100 == 0:
                        save_path = saver.save(sess, "/tmp/model.ckpt")
                        # Print extra things

                        loss = sess.run(model_info['loss'], feed_dict=feed_dict)

                        print('Completed batch {} of {} with {} images (loss: {})'.format(batch, 
                                                                  (int(num_items/batch_size)),
                                                                           xTr.shape[0],
                                                                   loss))
                        feed_dict = {}
                        feed_dict[model_info['input']] = xTe
                        yHat_output = sess.run(model_info['prediction'], feed_dict=feed_dict)
            [precision, recall, f1score] = octave.evaluate(yTe.T, yHat_output.T, 5)
            print("precision: " + str(precision))
            print("recall: " + str(recall))
            print("f1score: " + str(f1score))
    return [precision, recall, f1score]

---
## tensorflow.contrib.learn

In [None]:
# Build regressor
regressor = tf.contrib.learn.TensorFlowDNNRegressor(hidden_units=[200, 150])

In [None]:
regressor.fit(xTr, yTr)

In [None]:
result = regressor.predict(xTe)

In [None]:
[precision, recall, f1score] = octave.evaluate(yTe.T, result.T, 5)
print("precision: " + str(precision))
print("recall: " + str(recall))
print("f1score: " + str(f1score))

In [None]:
import tensorflow.contrib.learn.python.learn as learn

In [None]:
reg = learn.LinearRegressor()

In [None]:
reg.fit(xTr, yTr)

In [None]:
predictions = reg.predict(yTe)

In [None]:
import tflearn
import tensorflow as tf
# Linear Regression graph
input_ = tflearn.input_data(shape=[None, xTr.shape[0], xTr.shape[1]])
linear = tflearn.single_unit(input_)
y_placeholder = tf.placeholder(tf.float64, shape=yTr.shape)
regression = tflearn.regression(input_, placeholder=y_placeholder, optimizer='sgd', loss='mean_square', metric='R2', learning_rate=0.01)
m = tflearn.DNN(regression)
m.fit(xTr, yTr, n_epoch=1000, batch_size=500, show_metric=True, snapshot_epoch=False)

---

# Regression via NN

In [None]:
import tflearn
import tensorflow as tf

In [None]:
def w2vloss( feat, truths ):
    return -tf.reduce_mean( truths * tf.log( feat ) + (1-truths)*tf.log(1-feat) ) 

In [None]:
def imagemodel(input_size=2048, output_size=281, hidden_units=[]):
    '''
    imagemodel( input_size, output_size, hidden_units )
    '''

    labels = tf.placeholder(tf.float32, [None, output_size], name='labels')    
    inputs = tf.placeholder(tf.float32, [None, input_size], name='inputs')
    
    # Iterate through the hidden units list and connect the graph
    layer_i = inputs
    for i, hidden in enumerate(hidden_units):
        layer_i = tflearn.fully_connected(layer_i, hidden, activation='relu', name='fc'+str(i))
    prediction = tflearn.fully_connected(layer_i, output_size, activation='sigmoid', name='output')

    # Loss function and optimizer to be used
    loss = w2vloss(prediction,labels)
    optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
            
    # Return actual variables
    return inputs, prediction, labels, loss, optimizer

In [None]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config=config)

In [None]:
hidden_units = [200, ]
inputs,preds,labels,loss,opt = imagemodel(output_size=yTe.shape[1], hidden_units = hidden_units)
init_op = tf.initialize_all_variables()
sess.run(init_op)

In [None]:
epochs = 500
bsize = 10000

epoch_list = []
loss_list = []

print('Starting Training')
for epoch in range(epochs):
    for b in range(0,len(yTr),bsize):            
        _, lossval = sess.run([opt,loss], feed_dict={inputs:xTr[b:b+bsize], labels:yTr[b:b+bsize]})
    epoch_list.append(epoch+1)
    loss_list.append(lossval)
    sys.stdout.write("\rEpoch {}/{}: loss={}".format(epoch, epochs, lossval))
plt.plot(epoch_list, loss_list)
plt.xlabel("Epoch")
plt.ylabel("Loss")

In [None]:
yHat_output = sess.run(preds, feed_dict={inputs:xTe})

In [None]:
[precision, recall, f1score] = octave.evaluate(yTe.T, yHat_output.T, 5)
print "precision: " + str(precision)
print "recall: " + str(recall)
print "f1score: " + str(f1score)

In [None]:
sess.close()

In [None]:
"""
net = tflearn.input_data(shape=[None, 2048])
net = tflearn.fully_connected(net, 200)
#net = tflearn.fully_connected(net, 200)
net = tflearn.fully_connected(net, 288, activation='softmax')
"""

In [None]:
"""
# Define model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
model.fit(xTr, yTr, n_epoch=10, batch_size=500, show_metric=True)
"""

In [None]:
#yHat = model.predict(xTe)

---

# Calculate performance

In [None]:
# Original Multihot scores
# Precision: 0.396726436529
# Recall: 0.211489266372
# F1 Score: 0.275900088116

# second step, filtered to original
[precision, recall, f1score] = octave.evaluate(yTe.T, yHat.T, 10)
print precision
print recall
print f1score

In [None]:
# Original Multihot scores
# Precision: 0.396726436529
# Recall: 0.211489266372
# F1 Score: 0.275900088116

# nonlinear suppression, full chain
[precision, recall, f1score] = octave.evaluate(yTe.T, yHat.T, 10)
print precision
print recall
print f1score

In [None]:
# Original Multihot scores
# Precision: 0.396726436529
# Recall: 0.211489266372
# F1 Score: 0.275900088116

# scaled multihot
[precision, recall, f1score] = octave.evaluate(yTe.T, yHat.T, 10)
print precision
print recall
print f1score

---

# Example: Predict labels for an image

In [None]:
# Randomly select an image
i=671 #np.random.randint(0, yTe.shape[1])

# Run example
imname = os.path.join(img_dir, "images", "images", "%s.jpg" % test_ims[i])
#imname = os.path.join(img_dir, "ESP-ImageSet", "images", "%s" % test_ims[i])
print "Looking at the "+str(i)+"th image: "+imname
im=plt.imread(imname)

# Prediction
#threshold = np.percentile(yHat[i], 99)
#top_idxs = [idx for idx in yHat[i].argsort()[::-1] if yHat[i][idx] > threshold]
#ypwords = [d_words[idx] for idx in top_idxs]
ypwords = ["%s (%s)" % (D[idx], yHat[i][idx]) for idx in yHat[i].argsort()[::-1][:10]]
#ypwords = [D[j] for j in yHat[i].argsort()[::-1] [ 0:(yHat[i]>0.2).sum() ] ]
# Truth
ytwords = [D[idx] for idx in np.nonzero(yTe[i])[0]]
#ytwords = [D[j] for j in np.where(yTe[i] > 0.5)[0] ]
plt.imshow(im)

print 'Predicted: '
for ypword in ypwords:
    print "\t%s" % ypword
print 'Truth:     '+ ', '.join(ytwords)

plt.figure()

In [None]:
# Randomly select an image
i=671 #np.random.randint(0, yTe.shape[1])

# Run example
imname = os.path.join(img_dir, "images", "images", "%s.jpg" % test_ims[i])
#imname = os.path.join(img_dir, "ESP-ImageSet", "images", "%s" % test_ims[i])
print "Looking at the "+str(i)+"th image: "+imname
im=plt.imread(imname)

# Prediction
#threshold = np.percentile(yHat[i], 99)
#top_idxs = [idx for idx in yHat[i].argsort()[::-1] if yHat[i][idx] > threshold]
#ypwords = [d_words[idx] for idx in top_idxs]
ypwords = ["%s (%s)" % (D[idx], yHat[i][idx]) for idx in yHat[i].argsort()[::-1][:10]]
#ypwords = [D[j] for j in yHat[i].argsort()[::-1] [ 0:(yHat[i]>0.2).sum() ] ]
# Truth
ytwords = [D[idx] for idx in np.nonzero(yTe[i])[0]]
#ytwords = [D[j] for j in np.where(yTe[i] > 0.5)[0] ]
plt.imshow(im)

print 'Predicted: '
for ypword in ypwords:
    print "\t%s" % ypword
print 'Truth:     '+ ', '.join(ytwords)

plt.figure()

In [None]:
# Randomly select an image
i=671 #np.random.randint(0, yTe.shape[1])

# Run example
imname = os.path.join(img_dir, "images", "images", "%s.jpg" % test_ims[i])
#imname = os.path.join(img_dir, "ESP-ImageSet", "images", "%s" % test_ims[i])
print "Looking at the "+str(i)+"th image: "+imname
im=plt.imread(imname)

# Prediction
#threshold = np.percentile(yHat[i], 99)
#top_idxs = [idx for idx in yHat[i].argsort()[::-1] if yHat[i][idx] > threshold]
#ypwords = [d_words[idx] for idx in top_idxs]
ypwords = ["%s (%s)" % (D[idx], yHat[i][idx]) for idx in yHat[i].argsort()[::-1][:10]]
#ypwords = [D[j] for j in yHat[i].argsort()[::-1] [ 0:(yHat[i]>0.2).sum() ] ]
# Truth
ytwords = [D[idx] for idx in np.nonzero(yTe[i])[0]]
#ytwords = [D[j] for j in np.where(yTe[i] > 0.5)[0] ]
plt.imshow(im)

print 'Predicted: '
for ypword in ypwords:
    print "\t%s" % ypword
print 'Truth:     '+ ', '.join(ytwords)

plt.figure()

In [None]:
plt.stem(yHat[i])

---

# Expand vocabulary

In [None]:
def nonlinearity2(arr, percentile=99, power=1):
    for row in arr:
        cutoff = np.percentile(row, percentile)
        for idx, val in enumerate(row):
            if val < cutoff:
                row[idx] = 0
    for idx, row in enumerate(arr):
        arr[idx] = np.power(row, power)
    return arr

In [None]:
W_V = np.load(os.path.join(wordvecs_dir, "w_v_16.npy")) #construct_W(w2v_model, w2v_model.vocab, dtype=np.float16)
#np.save(os.path.join(wordvecs_dir, "w_v_16_kylez.npy"), W_V)

In [None]:
#expansion_arr = nonlinearity2(np.dot(W_V.T, W_L), power=2)
#expansion_arr = np.load(os.path.join(wordvecs_dir, "expansion_arr.npy"))
#np.save(os.path.join(wordvecs_dir, "expansion_arr.npy"), expansion_arr)

In [None]:
expanded = np.dot(yTr, nonlinearity2(np.dot(W_V.T, W_L), power=2).T)
#expanded = np.load(os.path.join(wordvecs_dir, "expanded_yHat.npy"))
#np.save(os.path.join(wordvecs_dir, "expanded_yHat.npy"), expanded)

In [None]:
expanded.shape

In [None]:
# Randomly select an image
i = 102 #np.random.randint(0, yTe.shape[1])

# Run example
imname = os.path.join(dataset_dir, "images", "images", "%s.jpg" % test_ims_full[i])
print "Looking at the "+str(i)+"th image: "+imname
im=plt.imread(imname)

# Prediction
#threshold = np.percentile(yHat[i], 99)
#top_idxs = [idx for idx in yHat[i].argsort()[::-1] if yHat[i][idx] > threshold]
#ypwords = [d_words[idx] for idx in top_idxs]
ypwords = [w2v_model.vocab[idx] for idx in expanded[i].argsort()[::-1][:50]]
#ypwords = [D[j] for j in yHat[i].argsort()[::-1] [ 0:(yHat[i]>0.2).sum() ] ]
# Truth
ytwords = [D[idx] for idx in np.nonzero(yTe[i])[0]]
#ytwords = [D[j] for j in np.where(yTe[i] > 0.5)[0] ]
plt.imshow(im)

print 'Predicted: '+ ', '.join(ypwords)
print 'Truth:     '+ ', '.join(ytwords)

plt.figure()