In [1]:
import tensorflow as tf
import collections
import math
import os
import random
import zipfile

import numpy as np
from six.moves import urllib
from six.moves import xrange

In [2]:
##different batch
def generate_batch_skipgram(data, batch_size, num_skips, skip_window):
	'''
	Batch generator for skip-gram model.
	
	Parameters
	----------
	data: list of index of words
	batch_size: number of words in each mini-batch
	num_skips: number of surrounding words on both direction (2: one word ahead and one word following)
	skip_window: number of words at both ends of a sentence to skip (1: skip the first and last word of a sentence)	
	'''
	global data_index
	assert batch_size % num_skips == 0
	assert num_skips <= 2 * skip_window
	batch = np.ndarray(shape=(batch_size), dtype=np.int32)
	labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
	span = 2 * skip_window + 1 # [ skip_window target skip_window ]
	buffer = collections.deque(maxlen=span) # used for collecting data[data_index] in the sliding window
	for _ in range(span):
		buffer.append(data[data_index])
		data_index = (data_index + 1) % len(data)
	for i in range(batch_size // num_skips):
		target = skip_window  # target label at the center of the buffer
		targets_to_avoid = [ skip_window ]
		for j in range(num_skips):
			while target in targets_to_avoid:
				target = random.randint(0, span - 1)
			targets_to_avoid.append(target)
			batch[i * num_skips + j] = buffer[skip_window]
			labels[i * num_skips + j, 0] = buffer[target]
		buffer.append(data[data_index])
		data_index = (data_index + 1) % len(data)
	return batch, labels


def generate_batch_cbow(data, batch_size, num_skips, skip_window):
	'''
	Batch generator for CBOW (Continuous Bag of Words).
	batch should be a shape of (batch_size, num_skips)
	Parameters
	----------
	data: list of index of words
	batch_size: number of words in each mini-batch
	num_skips: number of surrounding words on both direction (2: one word ahead and one word following)
	skip_window: number of words at both ends of a sentence to skip (1: skip the first and last word of a sentence)
	'''
	global data_index
	assert batch_size % num_skips == 0
	assert num_skips <= 2 * skip_window
	batch = np.ndarray(shape=(batch_size, num_skips), dtype=np.int32)
	labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
	span = 2 * skip_window + 1 # [ skip_window target skip_window ]
	buffer = collections.deque(maxlen=span) # used for collecting data[data_index] in the sliding window
	# collect the first window of words
	for _ in range(span):
		buffer.append(data[data_index])
		data_index = (data_index + 1) % len(data)
	# move the sliding window  
	for i in range(batch_size):
		mask = [1] * span
		mask[skip_window] = 0 
		batch[i, :] = list(compress(buffer, mask)) # all surrounding words
		labels[i, 0] = buffer[skip_window] # the word at the center 
		buffer.append(data[data_index])
		data_index = (data_index + 1) % len(data)
	return batch, labels



In [3]:
f=open("d:/deep/sis/annotation.txt")
text=f.readlines()
words=[]
dic=set()
for line in text:
    word=line.split()
    for w in word:
        if w.isalnum():
            dic.add(w)
            words.append(w)
f.close()

In [4]:
f=open("d:/deep/caption/annotation.txt")
text=f.readlines()
for line in text:
    word=line.split()
    for w in word:
        if w.isalnum():
            dic.add(w)
            words.append(w)
f.close()

In [5]:
f=open("d:/deep/annotations/captions.txt")
text=f.readlines()
for line in text:
    word=line.split()
    for w in word:
        w="".join(l for l in w if l not in {',','.','!','?','\"','(',')','\''})
        if len(w)>0:
            dic.add(w)
            words.append(w)
f.close()

In [6]:
embedding_size=100
vocabulary_size=len(dic)

In [7]:
vocabulary_size

37119

In [8]:
embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))

In [9]:
import collections
import math
def build_dataset(words, vocabulary_size):
    count = [['UNK', -1]]
    count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        if word in dictionary:
            index = dictionary[word]
        else:
            index = 0  # dictionary['UNK']
            unk_count += 1
        data.append(index)
    count[0][1] = unk_count
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data, count, dictionary, reverse_dictionary

In [10]:
# Step 2: Build the dictionary and replace rare words with UNK token.
data, count, dictionary, reverse_dictionary = build_dataset(words, vocabulary_size)
#del words  # Hint to reduce memory.
print('Most common words (+UNK)', count[:5])
print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])

data_index = 0

Most common words (+UNK) [['UNK', 1], ('a', 275859), ('the', 210048), ('to', 87244), ('and', 80056)]
Sample data [50, 3383, 235, 7, 212, 9, 24, 3, 61, 4082] ['our', 'landmark', 'tree', 'in', 'town', 'was', 'about', 'to', 'be', 'destroyed']


In [11]:
count[:30]

[['UNK', 1],
 ('a', 275859),
 ('the', 210048),
 ('to', 87244),
 ('and', 80056),
 ('of', 75072),
 ('is', 73801),
 ('in', 66970),
 ('on', 49809),
 ('was', 44121),
 ('with', 37324),
 ('man', 35008),
 ('we', 33085),
 ('it', 27801),
 ('for', 27763),
 ('i', 27215),
 ('are', 25927),
 ('they', 20513),
 ('were', 20235),
 ('at', 20062),
 ('this', 19063),
 ('there', 18282),
 ('woman', 18210),
 ('some', 18144),
 ('about', 16757),
 ('an', 16744),
 ('people', 16536),
 ('had', 15488),
 ('that', 14623),
 ('my', 14203)]

In [12]:
def generate_batch(batch_size, num_skips, skip_window):
  global data_index
  assert batch_size % num_skips == 0
  assert num_skips <= 2 * skip_window
  batch = np.ndarray(shape=(batch_size), dtype=np.int32)
  labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
  span = 2 * skip_window + 1  # [ skip_window target skip_window ]
  buffer = collections.deque(maxlen=span)
  for _ in range(span):
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  for i in range(batch_size // num_skips):
    target = skip_window  # target label at the center of the buffer
    targets_to_avoid = [skip_window]
    for j in range(num_skips):
      while target in targets_to_avoid:
        target = random.randint(0, span - 1)
      targets_to_avoid.append(target)
      batch[i * num_skips + j] = buffer[skip_window]
      labels[i * num_skips + j, 0] = buffer[target]
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  # Backtrack a little bit to avoid skipping words in the end of a batch
  data_index = (data_index + len(data) - span) % len(data)
  return batch, labels

In [13]:
import collections
import math
import os
import random
import zipfile

import numpy as np
# Step 3: Function to generate a training batch for the skip-gram model.


batch, labels = generate_batch(batch_size=16, num_skips=2, skip_window=1)
for i in range(8):
  print(batch[i], reverse_dictionary[batch[i]],
        '->', labels[i, 0], reverse_dictionary[labels[i, 0]])

3383 landmark -> 50 our
3383 landmark -> 235 tree
235 tree -> 3383 landmark
235 tree -> 7 in
7 in -> 212 town
7 in -> 235 tree
212 town -> 9 was
212 town -> 7 in


In [14]:
# Step 4: Build and train a skip-gram model.

batch_size = 128
embedding_size = 128  # Dimension of the embedding vector.
skip_window = 1       # How many words to consider left and right.
num_skips = 2         # How many times to reuse an input to generate a label.

# We pick a random validation set to sample nearest neighbors. Here we limit the
# validation samples to the words that have a low numeric ID, which by
# construction are also the most frequent.
valid_size = 16     # Random set of words to evaluate similarity on.
valid_window = 100  # Only pick dev samples in the head of the distribution.
valid_examples = np.random.choice(valid_window, valid_size, replace=False)
num_sampled = 64    # Number of negative examples to sample.

graph = tf.Graph()

with graph.as_default():

  # Input data.
  train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
  train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
  valid_dataset = tf.constant(valid_examples, dtype=tf.int32)

  # Ops and variables pinned to the CPU because of missing GPU implementation
  with tf.device('/cpu:0'):
    # Look up embeddings for inputs.
    embeddings = tf.Variable(
        tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
    embed = tf.nn.embedding_lookup(embeddings, train_inputs)

    # Construct the variables for the NCE loss
    nce_weights = tf.Variable(
        tf.truncated_normal([vocabulary_size, embedding_size],
                            stddev=1.0 / math.sqrt(embedding_size)))
    nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

  # Compute the average NCE loss for the batch.
  # tf.nce_loss automatically draws a new sample of the negative labels each
  # time we evaluate the loss.
  loss = tf.reduce_mean(
      tf.nn.nce_loss(weights=nce_weights,
                     biases=nce_biases,
                     labels=train_labels,
                     inputs=embed,
                     num_sampled=num_sampled,
                     num_classes=vocabulary_size))

  # Construct the SGD optimizer using a learning rate of 1.0.
  optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)

  # Compute the cosine similarity between minibatch examples and all embeddings.
  norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
  normalized_embeddings = embeddings / norm
  valid_embeddings = tf.nn.embedding_lookup(
      normalized_embeddings, valid_dataset)
  similarity = tf.matmul(
      valid_embeddings, normalized_embeddings, transpose_b=True)

  # Add variable initializer.
  init = tf.global_variables_initializer()


In [15]:

# Step 5: Begin training.
num_steps = 50001

with tf.Session(graph=graph) as session:
  # We must initialize all variables before we use them.
  init.run()
  print("Initialized")

  average_loss = 0
  for step in xrange(num_steps):
    batch_inputs, batch_labels = generate_batch(
        batch_size, num_skips, skip_window)
    feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}

    # We perform one update step by evaluating the optimizer op (including it
    # in the list of returned values for session.run()
    _, loss_val = session.run([optimizer, loss], feed_dict=feed_dict)
    average_loss += loss_val

    if step % 2000 == 0:
      if step > 0:
        average_loss /= 2000
      # The average loss is an estimate of the loss over the last 2000 batches.
      print("Average loss at step ", step, ": ", average_loss)
      average_loss = 0

    # Note that this is expensive (~20% slowdown if computed every 500 steps)
    if step % 10000 == 0:
      sim = similarity.eval()
      for i in xrange(valid_size):
        valid_word = reverse_dictionary[valid_examples[i]]
        top_k = 8  # number of nearest neighbors
        nearest = (-sim[i, :]).argsort()[1:top_k + 1]
        log_str = "Nearest to %s:" % valid_word
        for k in xrange(top_k):
          close_word = reverse_dictionary[nearest[k]]
          log_str = "%s %s," % (log_str, close_word)
        print(log_str)
  final_embeddings = normalized_embeddings.eval()

Initialized
Average loss at step  0 :  251.646392822
Nearest to after: poaches, atheletes, physiologist, insoles, axed, attire, enat, inhabitant,
Nearest to so: clsneros, raven, anne, yermin, oc, teleconferene, pryramid, ramasay,
Nearest to UNK: composing, delete, holywoood, vin, eva, batches, timt, seaweed,
Nearest to by: approvingly, snowball, lighthouse, itsef, narration, turmoil, sprawling, ps4,
Nearest to as: graphic, jezebels, desing, jogger, awkwardness, crossbones, dunes, binder,
Nearest to food: dart, barbecues, evident, valves, sounds, infused, cycled, withered,
Nearest to in: helm, 6th, riled, functioning, bill, restarunt, emerald, prodding,
Nearest to car: cranked, vampires, offer, driveways, target, meat, nope, seaguls,
Nearest to group: contacts, stabbing, spirals, passers, tauk, jurassic, batmanton, encore,
Nearest to that: necked, antique, hols, waetr, leppard, powel, macho, skids,
Nearest to down: peels, um, shut, wallowing, stays, nosies, lei, packaging,
Nearest to th

In [111]:


# Step 6: Visualize the embeddings.


def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
  assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
  plt.figure(figsize=(18, 18))  # in inches
  for i, label in enumerate(labels):
    x, y = low_dim_embs[i, :]
    plt.scatter(x, y)
    plt.annotate(label,
                 xy=(x, y),
                 xytext=(5, 2),
                 textcoords='offset points',
                 ha='right',
                 va='bottom')

  plt.savefig(filename)

try:
  from sklearn.manifold import TSNE
  import matplotlib.pyplot as plt

  tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
  plot_only = 500
  low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
  labels = [reverse_dictionary[i] for i in xrange(plot_only)]
  plot_with_labels(low_dim_embs, labels)

except ImportError:
  print("Please install sklearn, matplotlib, and scipy to visualize embeddings.")

In [16]:
## load captions
f=open("d:/deep/annotations/captions_on_images.txt")
text=f.readlines()
num=0;
cap={}
for line in text:
    if (num==0):
        im_id=int(line)
        cap[im_id]=""
        num=1
        continue
    if (line[0]=='*'):
        num=0
        continue
    cap[im_id]+=line
f.close()

In [17]:
##labels
category=[
'airplane',
 'apple',
 'backpack',
 'banana',
 'bat',
 'glove',
 'bear',
 'bed',
 'bench',
 'bicycle',
 'bird',
 'boat',
 'book',
 'bottle',
 'bowl',
 'broccoli',
 'bus',
 'cake',
 'car',
 'carrot',
 'cat',
 'phone',
 'chair',
 'clock',
 'couch',
 'cow',
 'cup',
 'table',
 'dog',
 'donut',
 'elephant',
 'hydrant',
 'fork',
 'frisbee',
 'giraffe',
 'drier',
 'handbag',
 'horse',
 'hot',
 'keyboard',
 'kite',
 'knife',
 'laptop',
 'microwave',
 'motorcycle',
 'mouse',
 'orange',
 'oven',
 'parking',
 'person',
 'pizza',
 'plant',
 'refrigerator',
 'remote',
 'sandwich',
 'scissors',
 'sheep',
 'sink',
 'skateboard',
 'skis',
 'snowboard',
 'spoon',
 'ball',
 'stop',
 'suitcase',
 'surfboard',
 'teddy',
 'tennis',
 'tie',
 'toaster',
 'toilet',
 'toothbrush',
 'traffic',
 'train',
 'truck',
 'tv',
 'umbrella',
 'vase',
 'wine',
 'zebra']

In [18]:
len(category)

80

In [19]:
##stopwords
f=open("d:/deep/annotations/stopwords.txt")
text=f.readlines()
stw=set()
for line in text:
    stw.add(line[:-1])

In [20]:
c_id=[]
for x in category:
    c_id.append(dictionary[x])

In [21]:
vote=[[0]*80]
vote=vote[0]

In [26]:
len(c_id)

80

In [239]:
vec=final_embeddings[0]
vec2=final_embeddings[1]
x=vec*vec2
sum(x)

0.19309834533760295

In [64]:
##compute score-hard assignment
num=0
f_c={}
for idx in cap.keys():
    num+=1
    lis=cap[idx].split()
    vote=[[0]*80]
    vote=vote[0]
    for word in lis:
        w="".join(l for l in word if l not in {',','.','!','?','\"','(',')','\''})
        if w in stw:
            continue
        try:
            w_id=dictionary[w]
        except:
            continue
        vec=final_embeddings[w_id]
        max_sim=0
        for i in range(0,80):
            cate=c_id[i]
            vec2=final_embeddings[cate]
            sim_vec=vec*vec2
            cos_sim=sum(sim_vec)
            if (cos_sim>max_sim):
                max_cat=i
                max_sim=cos_sim
        vote[max_cat]+=1
    f_c[idx]=vote.index(max(vote))

In [22]:
##compute score-soft assignment(version 1)
num=0
f_c={}
for idx in cap.keys():
    #print(str(num)+" round")
    num+=1
    lis=cap[idx].split()
    vote=[[0]*80]
    vote=vote[0]
    for word in lis:
        w="".join(l for l in word if l not in {',','.','!','?','\"','(',')','\''})
        if w in stw:
            continue
        try:
            w_id=dictionary[w]
        except:
            continue
        vec=final_embeddings[w_id]
        v1=[[0]*80]
        v1=v1[0]
        for i in range(0,80):
            cate=c_id[i]
            vec2=final_embeddings[cate]
            sim_vec=vec*vec2
            cos_sim=sum(sim_vec)
            v1[i]=cos_sim
        nsum=sum(v1)
        for i in range(0,80):
            vote[i]+=v1[i]/nsum
    f_c[idx]=vote.index(max(vote))

In [23]:
d_id=[]
c_name=[]
for x in category:
    d_id.append(dictionary[x])
    c_name.append(x)

In [256]:
print(c_id)
print(d_id)

[3075, 1938, 973, 591, 522, 10234, 7090, 326, 1292, 2269, 1808, 75]
[3075, 1938, 973, 591, 522, 10234, 7090, 326, 1292, 2269, 1808, 75]


In [73]:
cap[285]

"a big burly grizzly bear is show with grass in the background.\nthe large brown bear has a black nose.\ncloseup of a brown bear sitting in a grassy area.\na large bear that is sitting on grass. \na close up picture of a brown bear's face.\n"

In [60]:
lis=cap[285].split()
vote=[[0]*80]
vote=vote[0]
vote_cat=vote
for word in lis:
    w="".join(l for l in word if l not in {',','.','!','?','\"','(',')','\''})
    if w in stw:
        continue
    w_id=dictionary[w]
    vec=final_embeddings[w_id]
    max_sim=0
    for i in range(0,80):
        cate=c_id[i]
        vec2=final_embeddings[cate]
        sim_vec=vec*vec2
        cos_sim=sum(sim_vec)
        #if (cos_sim>max_sim):
         #   max_cat=i
        #  max_sim=cos_sim
        vote_cat[i]+=cos_sim
    #print(w+" * "+category[max_cat]+" * "+str(max_sim))
    #vote_cat[max_cat]+=1
vote_cat

[6.0373464424830559,
 5.0287863813184686,
 5.623186024174089,
 4.9838541453602261,
 5.5092276819265749,
 6.2373459906463751,
 8.2610520824714087,
 6.616338929515436,
 5.7352950972691019,
 5.131979278677818,
 6.2401388656421659,
 5.9831512314649444,
 4.9001040906346134,
 5.9016973816953424,
 6.027419016411045,
 4.9448518066211591,
 5.3230896595264312,
 5.0439717061767624,
 6.9828496644919369,
 4.4637264494173792,
 5.8901301221719677,
 4.5940657976979278,
 6.7865499255924249,
 5.9955352906085295,
 5.2900227236058441,
 5.2572658933052168,
 5.654972468571259,
 6.3761998509755813,
 6.0067346303712839,
 5.9773801572385992,
 5.3403864684509017,
 5.228548451501922,
 4.7791184161555975,
 4.7638326675538423,
 4.5525080427954947,
 2.9849516307623318,
 4.2046088005199636,
 5.0875150386493715,
 5.1955107991968816,
 5.6292906314101288,
 5.6620063359252981,
 5.4748598928276238,
 5.1119638916158294,
 5.1168647271139207,
 6.0553473981862567,
 6.1166656566316533,
 6.0090655100845538,
 5.8996772763489389

In [55]:
w="bears"
w_id=dictionary[w]
print(w_id)
vec=final_embeddings[w_id]
max_sim=0
for i in range(0,80):
    cate=c_id[i]
    vec2=final_embeddings[cate]
    sim_vec=vec*vec2
    cos_sim=sum(sim_vec)
    print(category[i])
    print(cos_sim)
    if (cos_sim>max_sim):
        max_cat=i
        max_sim=cos_sim

2331
airplane
0.059897787718
apple
0.237704424556
backpack
0.198446211223
banana
0.284955965349
bat
0.125698089414
glove
0.221739510597
bear
0.269318317847
bed
0.194658221264
bench
0.0844481369854
bicycle
0.136313817468
bird
0.245391901259
boat
0.262978493005
book
0.289927862341
bottle
0.244562902182
bowl
0.197864840273
broccoli
0.12950824982
bus
0.262352346873
cake
0.195963409305
car
0.256202422288
carrot
0.271524696668
cat
0.214034944454
phone
0.0328253104161
chair
0.38340994514
clock
0.0820968878101
couch
0.0687060427708
cow
0.130919557486
cup
0.247158319208
table
0.269107949329
dog
0.193550081798
donut
0.244176500888
elephant
0.205908750127
hydrant
0.167794405199
fork
0.176555609176
frisbee
0.199541758708
giraffe
0.20735227881
drier
0.152020816653
handbag
0.173725977679
horse
0.194944637715
hot
0.29497229107
keyboard
0.0837427616561
kite
0.0458598446403
knife
0.255837239143
laptop
0.103961160092
microwave
0.242392650105
motorcycle
0.283776916667
mouse
0.24369676086
orange
0.1980202

In [46]:
dictionary['bears']

2331

In [47]:
dictionary['bear']

311

In [49]:
dictionary['chair']

686

In [57]:
vec=final_embeddings[686]
vec2=final_embeddings[2331]
x=vec*vec2
sum(x)

0.38340994513964688

In [24]:
##evaluation
fin1=open('D:/deep/annotations/annotated2.txt')
fin2=open('D:/deep/annotations/out_id.txt')
classes=fin1.readlines()
ids=fin2.readlines()
num=0;
for i in range(0,len(ids)):
    idx=int(ids[i])
    #predict=category[f_c[idx]]
    #label=classes[i][:-1]
    predict=f_c[idx]
    label=int(classes[i])
    if (predict==label):
        num+=1
fin1.close()
fin2.close()

In [25]:
type(text)

list

In [26]:
num

3516

In [28]:
##add clustering information
#fin3=open('D:/deep/annotations/cluster.txt')
fin3=open('D:/deep/cluster.txt')
cluster=fin3.readlines()
num=0;
vot=[]
for i in range(0,85):
    vote2=[[0]*80]
    vote2=vote2[0]
    vot.append(vote2)
f_d={}
exm=set()
div={}
for i in range(0,len(ids)):
    idxx=int(ids[i])
    idx=int(cluster[i])
    div[idxx]=idx
    if (idx==9):
        exm.add(idxx)
    vot[idx][f_c[idxx]]+=1
fin3.close()


In [29]:
for i in range(0,len(ids)):
    idxx=int(ids[i])
    idx=int(cluster[i])
    my=vot[idx]
    maxn=my.index(max(my))
    f_d[idxx]=maxn

In [30]:
vot

[[0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  9,
  0,
  2,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0],
 [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  3,
  0,
  1,
  4,
  0,
  45,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  4,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  2,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [3,
  0,
  1,
  0,
  0,
  0,
  1,
  1,
  7,
  2,
  0,
  4,
  0,
  1,
  0,
  0,
  0,
  0,
  5,
  0,
  0,
  0,
  1,
  6,
  0,
  0,
  0,
  2,
  3,
  0,
  0,
  6,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  

fin1=open('D:/deep/annotations/annotated.txt')
fin2=open('D:/deep/annotations/out_id.txt')
classes=fin1.readlines()
ids=fin2.readlines()
num=0;
us=0
for i in range(0,len(ids)):
    idx=int(ids[i])
    predict=f_d[idx]
    label=classes[i][:-1]
    if (label=="animal"):
        us+=1
    if (predict==label):
        num+=1
    else:
        print("pr: "+predict)
        print(label)
fin1.close()
fin2.close()

In [31]:
## evaluation with image information
fin1=open('D:/deep/annotations/annotated2.txt')
fin2=open('D:/deep/annotations/out_id.txt')
classes=fin1.readlines()
ids=fin2.readlines()
num=0

for i in range(0,len(ids)):
    idx=int(ids[i])
    #predict=category[f_c[idx]]
    #label=classes[i][:-1]
    predict=f_d[idx]
    label=int(classes[i])
    if (predict==label):
        num+=1
fin1.close()
fin2.close()

In [32]:
len(exm)

17

In [33]:
num

2826

In [55]:
inset=set()
for i in div.keys():
    if div[i]==80:
        inset.add(i)

In [56]:
inset

{10077,
 28826,
 35899,
 143946,
 151808,
 164583,
 184474,
 218960,
 221477,
 256859,
 271457,
 273188,
 277208,
 283849,
 338826,
 341752,
 359203,
 360342,
 371155,
 400472,
 405660,
 406885,
 413287,
 419624,
 479099,
 486968,
 503255,
 505040,
 537355,
 553678,
 573622,
 575776}

In [57]:
np.save("inset.npy",inset)

In [43]:
##compute score-soft assignment(version 2)
num=0
f_e={}
for idx in cap.keys():
    #print(str(num)+" round")
    num+=1
    lis=cap[idx].split()
    vote=[[0]*80]
    vote=vote[0]
    for word in lis:
        w="".join(l for l in word if l not in {',','.','!','?','\"','(',')','\''})
        if w in stw:
            continue
        try:
            w_id=dictionary[w]
        except:
            continue
        vec=final_embeddings[w_id]
        v1=[[0]*80]
        v1=v1[0]
        for i in range(0,80):
            cate=c_id[i]
            vec2=final_embeddings[cate]
            sim_vec=vec*vec2
            cos_sim=sum(sim_vec)
            v1[i]=cos_sim
        nsum=sum(v1)
        for i in range(0,80):
            vote[i]+=v1[i]/nsum
    cate=f_d[idx]
    vote[cate]+=0.5
    f_e[idx]=vote.index(max(vote))

In [44]:
fin1=open('D:/deep/annotations/annotated2.txt')
fin2=open('D:/deep/annotations/out_id.txt')
classes=fin1.readlines()
ids=fin2.readlines()
num=0;
for i in range(0,len(ids)):
    idx=int(ids[i])
    #predict=category[f_c[idx]]
    #label=classes[i][:-1]
    predict=f_e[idx]
    label=int(classes[i])
    if (predict==label):
        num+=1
fin1.close()
fin2.close()

In [45]:
num

2760