In [1]:
import time
import os
import sys
from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pickle
import json
from pprint import pprint
from sklearn import cross_validation
%matplotlib inline

  from ._conv import register_converters as _register_converters


# Load the image features

In [2]:
image_features = pickle.load(open('../VGG_Image_Features/salient_train_5_fc2', 'rb'))

In [3]:
len(image_features)

12878

# Load the image description metadata file

In [4]:
data_images = json.load(open('train.story-in-sequence.json'))

# Sort the images with stories

In [5]:
def get_story(annotations):
    story = dict()
    annotations_len = len(annotations)
    previous_story_id = annotations[0][0]['story_id']
    story[previous_story_id] = list()
    for i in range(annotations_len):
        for j in range(len(annotations[i])):
            if previous_story_id != annotations[i][j]['story_id']:
                previous_story_id = annotations[i][j]['story_id']
                story[previous_story_id] = list()
                
            image_des = {'photo_id':annotations[i][j]['photo_flickr_id'], 
                         'description' :annotations[i][j]['text']}
            story[previous_story_id].append(image_des)
            
    return story

In [6]:
_story = get_story(data_images['annotations'])

In [7]:
_story

{'30355': [{'description': 'our landmark tree in town was about to be destroyed and cleared for a new mall .',
   'photo_id': '2627795780'},
  {'description': 'so we decided to take the day to go out and enjoy its beauty .',
   'photo_id': '2626979987'},
  {'description': 'to see the final glimpse of the roots , extending out into the depths of the hill .',
   'photo_id': '2626982337'},
  {'description': 'and its magnificent trunk , larger than life itself .',
   'photo_id': '2626983575'},
  {'description': 'one last picture of its beauty so we could capture it forever .',
   'photo_id': '2626985925'}],
 '30356': [{'description': 'we found this tree when we were walking in a nearby town .',
   'photo_id': '2701863545'},
  {'description': 'it turns out it is a popular attraction here .',
   'photo_id': '2626977325'},
  {'description': 'the tree is very unusual , with its roots exposed .',
   'photo_id': '2627795780'},
  {'description': 'the trunk was really wide , as much as 12 feet !',

# Get the image features of the stories

In [8]:
def story_image_features(story_data):
    image_features_len = len(image_features)
    story_features = dict()
    for key, value in story_data.items():
        exist = True
        for i in range(len(value)):
            image_exist = False
            for j in range(image_features_len):
                if value[i]['photo_id']== image_features[j]['id']:
                    value[i]['features'] = image_features[j]['features']
                    image_exist = True
                    break
            if image_exist == False:
                exist = False
                break
        if exist == True:
            story_features[key] = value
            
    return story_features

In [9]:
story_features = story_image_features(_story)

In [10]:
story_features

{'34465': [{'description': 'we went to organization to do some shopping .',
   'features': array([[0.       , 0.       , 0.       , ..., 0.       , 0.       ,
           1.7827089]], dtype=float32),
   'photo_id': '4289156320'},
  {'description': 'somehow we ended up in the video game isle and bought a copy of location .',
   'features': array([[0.       , 0.       , 0.       , ..., 0.       , 0.       ,
           0.2968071]], dtype=float32),
   'photo_id': '4289150146'},
  {'description': 'we then bought a few tomatoes .',
   'features': array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
   'photo_id': '4289151350'},
  {'description': 'i got a few limes so i could make margaritas later tonight .',
   'features': array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
   'photo_id': '4288411333'},
  {'description': 'i also picked up some flowers for mothersday .',
   'features': array([[0.      , 7.386519, 0.      , ..., 2.937281, 0.      , 0.      ]],
         dtype=float32),
   '

In [11]:
def preProBuildWordVocab(sentence_iterator, word_count_threshold=3):
    # borrowed this function from NeuralTalk
    print ('preprocessing word counts and creating vocab based on word count threshold %d' % (word_count_threshold, ))

    word_counts = {}
    nsents = 0

    for sent in sentence_iterator:
        nsents += 1
        tmp_sent = sent.lower().split(' ')
        if '' in tmp_sent:
            tmp_sent.remove('')

        for w in tmp_sent:
            if w !='':
                word_counts[w] = word_counts.get(w, 0) + 1

    vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
    print ('filtered words from %d to %d' % (len(word_counts), len(vocab)))

    ixtoword = {}
    ixtoword[0] = '<bos>'
    ixtoword[1] = '<eos>'
    ixtoword[2] = '<pad>'
    ixtoword[3] = '<unk>'

    wordtoix = {}
    wordtoix['<bos>'] = 0
    wordtoix['<eos>'] = 1
    wordtoix['<pad>'] = 2
    wordtoix['<unk>'] = 3

    for idx, w in enumerate(vocab):
        wordtoix[w] = idx + 4
        ixtoword[idx+4] = w

    word_counts['<eos>'] = nsents
    word_counts['<bos>'] = nsents
    word_counts['<pad>'] = nsents
    word_counts['<unk>'] = nsents


    return wordtoix, ixtoword

In [12]:
def get_all_sentences(data):
    all_sentences = []
    for key, value in data.items():
        length = len(value)
        for i in range(length):
            sent = value[i]['description']
            sent = sent.lower()
            sent = sent.strip()
            sent = sent.replace(',', ' ,')
            sent = sent.replace('.', '')
            sent = sent.replace('"', ' " ')
            all_sentences.append(sent)
            
    return all_sentences


In [13]:
sentences = get_all_sentences(story_features)

In [14]:
sentences

['we went to organization to do some shopping ',
 'somehow we ended up in the video game isle and bought a copy of location ',
 'we then bought a few tomatoes ',
 'i got a few limes so i could make margaritas later tonight ',
 'i also picked up some flowers for mothersday ',
 'when we entered the supermarket  , we picked up a cart/trolley ',
 'we ventured over and picked up a boquet of flowers ',
 'we next picked some peppers for dinner tonight ',
 'we also picked up some soda to drink with dinner ',
 'finally some dish detergent to wash the dishes after dinner ',
 'we picked up a shopping cart at the market ',
 'we bought flowers  ,',
 'peppers and other vegetables  ,',
 'and fruit juice for dinner ',
 'we also bought some dish soap ',
 'the market was almost empty ',
 "they had things that are n't even food items too ",
 'the tomatoes are fresh off the vine ',
 'the limes are in season ',
 'the flowers are looking old though ',
 'grocery shopping can sometimes feel like an overwhelmi

In [15]:
word2idx, idx2word = preProBuildWordVocab(sentences, 2)

preprocessing word counts and creating vocab based on word count threshold 2
filtered words from 8358 to 4592


In [16]:
word2idx

{'<bos>': 0,
 '<eos>': 1,
 '<pad>': 2,
 '<unk>': 3,
 'we': 4,
 'went': 5,
 'to': 6,
 'organization': 7,
 'do': 8,
 'some': 9,
 'shopping': 10,
 'somehow': 11,
 'ended': 12,
 'up': 13,
 'in': 14,
 'the': 15,
 'video': 16,
 'game': 17,
 'isle': 18,
 'and': 19,
 'bought': 20,
 'a': 21,
 'copy': 22,
 'of': 23,
 'location': 24,
 'then': 25,
 'few': 26,
 'tomatoes': 27,
 'i': 28,
 'got': 29,
 'limes': 30,
 'so': 31,
 'could': 32,
 'make': 33,
 'margaritas': 34,
 'later': 35,
 'tonight': 36,
 'also': 37,
 'picked': 38,
 'flowers': 39,
 'for': 40,
 'when': 41,
 'entered': 42,
 'supermarket': 43,
 ',': 44,
 'ventured': 45,
 'over': 46,
 'next': 47,
 'peppers': 48,
 'dinner': 49,
 'soda': 50,
 'drink': 51,
 'with': 52,
 'finally': 53,
 'dish': 54,
 'wash': 55,
 'dishes': 56,
 'after': 57,
 'cart': 58,
 'at': 59,
 'market': 60,
 'other': 61,
 'vegetables': 62,
 'fruit': 63,
 'juice': 64,
 'soap': 65,
 'was': 66,
 'almost': 67,
 'empty': 68,
 'they': 69,
 'had': 70,
 'things': 71,
 'that': 72,
 'a

In [17]:
vocab_size = len(word2idx)

In [18]:
vocab_size

4596

# Global Variable

In [19]:
feature_dimension = 4096
#state_size = 512
n_words = len(word2idx)
wordRNN_lstm_dim = 512
word_embed_dim = 512
#batch_size = 4
learning_rate = 0.001
n_epochs = 3
project_dim = 1024
sentRNN_lstm_dim = 1024
sentRNN_FC_dim = 1024
N_max_word = 30
n_image = 2

# Extract training features and label

In [20]:
def get_caption_matrix(img_sent):
    img_captions_matrix = np.ones([ N_max_word+2], dtype=np.int32) * 1
    img_captions_matrix[0] = 0
    for idx, word in enumerate(img_sent.lower().split(' ')):
        # because the biggest number of words in a sentence is N_max, here is 50
        if idx == N_max_word + 1:
            break
            
        if word in word2idx:
            img_captions_matrix[ idx+1] = word2idx[word]
        else:
            img_captions_matrix[ idx+1] = word2idx['<unk>']
    
    return img_captions_matrix

In [21]:
def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

# Variable

In [22]:
regionPooling_W = tf.Variable(tf.random_uniform([feature_dimension, project_dim], -0.1, 0.1))
regionPooling_b = tf.Variable(tf.zeros([project_dim]))

img_embedding = tf.Variable(tf.random_uniform([feature_dimension, wordRNN_lstm_dim], -0.1, 0.1))
img_embedding_bias = tf.Variable(tf.zeros([wordRNN_lstm_dim])) 

fc1_W = tf.Variable(tf.random_uniform([sentRNN_lstm_dim, sentRNN_FC_dim], -0.1, 0.1))
fc1_b = tf.Variable(tf.zeros(sentRNN_FC_dim))
fc2_W = tf.Variable(tf.random_uniform([sentRNN_FC_dim, 1024], -0.1, 0.1))
fc2_b = tf.Variable(tf.zeros(1024))

embed_word_W = tf.Variable(tf.random_uniform([wordRNN_lstm_dim, n_words], -0.1,0.1))
embed_word_b = tf.Variable(tf.zeros([n_words]))

W_embeddings = tf.Variable(tf.random_uniform([n_words, word_embed_dim], -0.1, 0.1))

# Placeholder

In [23]:

    #tmp_feats = tf.reshape(feats, [-1, self.feats_dim])


# Topic RNN

In [24]:
sent_LSTM = tf.nn.rnn_cell.BasicLSTMCell(sentRNN_lstm_dim, state_is_tuple=True)


# Word RNN

In [25]:
word_LSTM_cell = tf.nn.rnn_cell.BasicLSTMCell(wordRNN_lstm_dim, state_is_tuple=True)
word_LSTM_cell = tf.nn.rnn_cell.MultiRNNCell([word_LSTM_cell] * 2, state_is_tuple=True)

In [26]:
def build_model():
    loss_word = 0.0
    feats = tf.placeholder(tf.float32, [n_image, feature_dimension])
    caption = tf.placeholder(tf.int32, [n_image, N_max_word + 2])
    
    sent_state = sent_LSTM.zero_state(batch_size=1, dtype=tf.float32)
    for i in range(n_image):
        # topic RNN
        project_vec = tf.nn.tanh(tf.matmul(feats[i:i+1,:], regionPooling_W) + regionPooling_b)
    
        sent_output, sent_state = sent_LSTM(project_vec, sent_state)
        hidden1 = tf.nn.relu( tf.matmul(sent_output, fc1_W) + fc1_b)
        sent_topic_vec = tf.nn.relu( tf.matmul(hidden1, fc2_W) + fc2_b)


        state = tf.nn.rnn_cell.LSTMStateTuple(sent_topic_vec[:, 0:512], sent_topic_vec[:, 512:])
        word_state = (state, state)

        image_embedding = tf.matmul(feats[i:i+1,:], img_embedding) + img_embedding_bias



        for j in range(0, N_max_word+1):
            if j > 0:
                tf.get_variable_scope().reuse_variables()                
                current_embed = tf.nn.embedding_lookup(W_embeddings, caption[i:i+1, j-1])
            else:
                current_embed = image_embedding

            word_output, word_state = word_LSTM_cell(current_embed, word_state)
            if j>0:
                indices = caption[i:i+1,j]
                onehot_labels = tf.one_hot(indices, depth = n_words)

                logit_words = tf.nn.xw_plus_b(word_output[:], embed_word_W, embed_word_b)
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logit_words, labels = onehot_labels)

                prediction = tf.nn.softmax(logit_words)
                correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(onehot_labels, 1))
                accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

                loss_wordRNN = tf.reduce_sum(cross_entropy)
                loss_word += loss_wordRNN


    return feats, caption, loss_word, accuracy


In [27]:
model_path = './models_batch/'

# Train the model

In [36]:
def train_model():
    tf_feats, tf_caption, tf_loss, tf_acc = build_model()
    sess = tf.InteractiveSession()
    
    saver = tf.train.Saver(max_to_keep=100, write_version=1)
    train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(tf_loss)
    tf.global_variables_initializer().run()
    
    for epoch in range(0, n_epochs):
            loss = 0;
            acc = 0;
            total_pass = 0
            start_time = time.time()
            for key, value in story_features.items():
                features = []
                caption_matrix = []
                for i in range(n_image):
                    description = value[i]['description']
                    _features = value[i]['features']
                    _caption_matrix = get_caption_matrix(description)
                    features.append(_features)
                    caption_matrix.append(_caption_matrix)
                    
                features = np.array(features)
                caption_matrix = np.array(caption_matrix)
                features = np.asarray(np.reshape(features, (n_image, -1)))
                caption_matrix = np.asarray(np.reshape(caption_matrix, (n_image, -1)))
                    

                _, loss_word, acc_word = sess.run(
                    [train_op, tf_loss, tf_acc],
                    feed_dict={
                        tf_feats: features,
                        tf_caption: caption_matrix
                    })
                
                loss = loss + loss_word
                acc = acc+acc_word
                total_pass = total_pass + 1
                # running information
            
            loss = loss/total_pass
            acc = acc/total_pass
            print ('Epoch: ', epoch, ' loss: ', loss, " accuracy: ", acc, ' Time cost: ', str((time.time() - start_time)))
            if np.mod(epoch, 2) == 0:
                print ("Epoch ", epoch, " is done. Saving the model ...")
                saver.save(sess, os.path.join(model_path, 'model_partials'), global_step=epoch)

In [37]:
train_model()

Epoch:  0  loss:  134.37811166719817  accuracy:  0.9977198697068403  Time cost:  405.87872862815857
Epoch  0  is done. Saving the model ...
Epoch:  1  loss:  123.0591306208011  accuracy:  0.9980456026058632  Time cost:  409.9509928226471
Epoch:  2  loss:  120.42586119213787  accuracy:  0.9980456026058632  Time cost:  409.7926757335663
Epoch  2  is done. Saving the model ...


# Test the Model

In [28]:
def generate_model():
    feats = tf.placeholder(tf.float32, [n_image, feature_dimension])
    sent_state = sent_LSTM.zero_state(batch_size=1, dtype=tf.float32)
    paragraph = []
    for i in range(n_image):
        project_vec = tf.nn.tanh(tf.matmul(feats[i:i+1, :], regionPooling_W) + regionPooling_b)

        sent_output, sent_state = sent_LSTM(project_vec, sent_state)
        hidden1 = tf.nn.relu( tf.matmul(sent_output, fc1_W) + fc1_b)
        sent_topic_vec = tf.nn.relu( tf.matmul(hidden1, fc2_W) + fc2_b)


        state = tf.nn.rnn_cell.LSTMStateTuple(sent_topic_vec[:, 0:512], sent_topic_vec[:, 512:])
        word_state = (state, state)

        image_embedding = tf.matmul(feats[i:i+1, :], img_embedding) + img_embedding_bias

        word_output, word_state = word_LSTM_cell(image_embedding, word_state)
        current_embed = tf.nn.embedding_lookup(W_embeddings, tf.zeros([1], dtype=tf.int64))
        generated_sent = []

        for j in range(0, N_max_word):
            tf.get_variable_scope().reuse_variables()

            word_output, word_state = word_LSTM_cell(current_embed, word_state)

            logit_words = tf.nn.xw_plus_b(word_output, embed_word_W, embed_word_b)

            max_prob_index = tf.argmax(logit_words, 1)[0]

            generated_sent.append(max_prob_index)
            with tf.device('/cpu:0'):
                current_embed = tf.nn.embedding_lookup(W_embeddings, max_prob_index)
                current_embed = tf.expand_dims(current_embed, 0)
        
        paragraph.append(generated_sent)

    return feats, paragraph
    

# Test Data

In [29]:
test_story_featues = dict()
k = 0
for key, value in story_features.items():
    test_story_featues[key] = value
    if k == 500:
        break
    k +=1

In [30]:
def format_sentance(generated_sentence):
        each_sent = []
        for sent in generated_sentence:
            for word_indices in sent:
                #print(word_indices)
                #print(word_indices)
                #for word_index in word_indices:
                    #print(idx2word[word_index])
                each_sent.append(idx2word[word_indices])
            
        current_sent = ''
        for each_word in each_sent:
            current_sent += each_word + ' '
        
        current_sent = current_sent.replace('<eos> ', '')
        current_sent = current_sent.replace('<pad> ', '')
        current_sent = current_sent + '.'
        current_sent = current_sent.replace(' .', '.')
        current_sent = current_sent.replace(' ,', ',')
        return current_sent
    

In [37]:

def test():
    #tf_feats, tf_generated_sent, tf_k_top = generate_model()
    start_time = time.time()

    model_path = './models_batch/model_partials-350'

    tf_feats, tf_generated_sent = generate_model()
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    saver.restore(sess, model_path)
    
    test_fd = open('HRNN_results.txt', 'w')
    for key, value in test_story_featues.items():
        features = []
        photo_ids = []
        descriptions = []
        for i in range(n_image):
            description = value[i]['description']
            _features = value[i]['features']
            photo_id = value[i]['photo_id']
            features.append(_features)
            photo_ids.append(photo_id)
            descriptions.append(description)
            
        features = np.array(features)
        features = np.asarray(np.reshape(features, (n_image, -1)))
        test_fd.write("story: " + key + '\n')
        test_fd.write("photo_ids: ")
        for ids in photo_ids:
            test_fd.write(ids + " ")
        test_fd.write("\n")
            
        
        each_paragraph = []
        current_paragraph = ""
        
        generated_sentence_indexes = sess.run([tf_generated_sent], feed_dict={
            tf_feats: features
        })
        
        for sentence_list in generated_sentence_indexes:
            for word_indices in sentence_list:
                each_sent = []
                for word_index in word_indices:
                    each_sent.append(idx2word[word_index])
                each_paragraph.append(each_sent)
            
        current_sent = ''
        for each_sent in each_paragraph:
            for each_word in each_sent:
                current_sent += each_word + ' '
            current_sent += '\n'
                
            current_sent = current_sent.replace('<eos> ', '')
            current_sent = current_sent.replace('<pad> ', '')
            current_sent = current_sent + '.'
            current_sent = current_sent.replace(' .', '.')
            current_sent = current_sent.replace(' ,', ',')
            
            
        test_fd.write(" generated text: " + current_sent + '\n')
        test_fd.write(" original text: " + '\n')
        for description in descriptions:
            test_fd.write(description + '\n')
        test_fd.write("\n")
        print(current_sent)
        print(descriptions)
            
    test_fd.close()
    print ("Time cost: " + str(time.time()-start_time))

In [38]:
test()

INFO:tensorflow:Restoring parameters from ./models_batch/model_partials-350
grocery shopping can sometimes feel like an overwhelming <unk> <unk> 
.this can be especially true if you are <unk> with <unk> displays meant to get you to make <unk> purchases <unk> 
.
['we went to organization to do some shopping .', 'somehow we ended up in the video game isle and bought a copy of location .']
we picked up a shopping cart at the market <unk> 
.we bought flowers, 
.
['when we entered the supermarket , we picked up a cart/trolley .', 'we ventured over and picked up a boquet of flowers .']
we picked up a shopping cart at the market <unk> 
.we bought flowers, 
.
['we picked up a shopping cart at the market .', 'we bought flowers ,']
grocery shopping can sometimes feel like an overwhelming <unk> <unk> 
.this can be especially true if you are <unk> with <unk> displays meant to get you to make <unk> purchases <unk> 
.
['the market was almost empty .', "they had things that are n't even food items to

<unk> we visited the ghost town of <unk> <unk> home of krampus <unk> 
.an eerie breeze came down the street as we admired the empty olden buildings <unk> 
.
['today we visited the town of location .', 'there were a lot of traditional american structures in the town .']
we start off by looking at an old train car <unk> 
.then we go to an old green building <unk> 
.
['we start off by looking at an old train car .', 'then we go to an old green building .']
<unk> we visited the ghost town of <unk> <unk> home of krampus <unk> 
.an eerie breeze came down the street as we admired the empty olden buildings <unk> 
.
['a marker informing visitors when location was incorporated .', 'the now defunct department store on main st .']
<unk> we visited the ghost town of <unk> <unk> home of krampus <unk> 
.an eerie breeze came down the street as we admired the empty olden buildings <unk> 
.
['during our vacation , we visited the village of location and walked around through the streets .', 'we saw lots 

[male] was leaving to go on vacation <unk> 
.he said goodbye to his friends and got in the car <unk> 
.
['[male] was leaving to go on vacation .', 'he said goodbye to his friends and got in the car .']
i had to go to an information center to get help this morning <unk> 
.i found three other people standing <unk> outside the door <unk> 
.
['our campus health clinic is pretty easy to use .', 'people get their early for fast service .']
i had to go to an information center to get help this morning <unk> 
.i found three other people standing <unk> outside the door <unk> 
.
['the group took a trip around the german town .', "they visited many places , finding new buildings they 'd never seen ."]
i had to go to an information center to get help this morning <unk> 
.i found three other people standing <unk> outside the door <unk> 
.
['i had to go to an information center to get help this morning .', 'i found three other people standing suspiciously outside the door .']
we painted the church t

this week i got to go sight-seeing in a big college town <unk> 
.they have stained glass artwork <unk> 
.
["the grand cathedral is the town 's piece de resistance .", 'the stain glass panels were carefully crafted by medieval artisans , entirely by hand .']
this week i got to go sight-seeing in a big college town <unk> 
.they have stained glass artwork <unk> 
.
['churches are truly beautiful .', 'they have stained glass artwork .']
this week i got to go sight-seeing in a big college town <unk> 
.there was strange design, including a creepy <unk> head on a building <unk> 
.
['a truly amazing landmark that one must visit .', 'wow , i can not even put into words the beauty we experience upon the viewing of this .']
this week i got to go sight-seeing in a big college town <unk> 
.there was strange design, including a creepy <unk> head on a building <unk> 
.
['this week i got to go sight-seeing in a big college town .', 'there was strange design , including a creepy seeming head on a buildi

a group of friends took a trip to location to see the sites <unk> 
.they start with a helicopter tour of the city to get a birds eye view <unk> 
.
['a group of friends took a trip to location to see the sites .', 'that all got to see the beautiful architecture around the city .']
the boy went on a road trip this summer <unk> 
.he met new friends <unk> 
.
['i was on vacation enjoying the sights .', 'my life long best friend came with me .']
while visiting location, we came across a large statue of a man holding a snake <unk> 
.we stopped to eat some of the local food <unk> 
.
['[male] and [male] met up for a day of fun .', 'first they went and had lunch at a new restaurant .']
the boy went on a road trip this summer <unk> 
.he met new friends <unk> 
.
['the boy went on a road trip this summer .', 'he met new friends .']
while visiting location, we came across a large statue of a man holding a snake <unk> 
.we stopped to eat some of the local food <unk> 
.
['two best friends decided to t

[male] and his friends are having a good time on the drive to the big game <unk> 
.[male] was excited to wear the team 's <unk> <unk> 
.
['a group of friends travel to see a football game .', "they talk about other games they 've been to ."]
[male] and his friends are having a good time on the drive to the big game <unk> 
.[male] was excited to wear the team 's <unk> <unk> 
.
['we took a bus to the away home gam .', "[male] was excited to wear the team 's jersey ."]
[male] and his friends are having a good time on the drive to the big game <unk> 
.[male] and his friends decide to make some grilled food for the players before the game starts <unk> 
.
['today we went to go see the organization organization organization play football .', 'we cooked all kinds of foods at the tailgate party .']
[male] and his friends are having a good time on the drive to the big game <unk> 
.[male] and his friends decide to make some grilled food for the players before the game starts <unk> 
.
['[male] and

the church sign has been cleaned <unk> 
.people are playing music in the graveyard <unk> 
.
["after leaving organization organization organization a group of friends go to visit their friend [male] 's memorial site .", 'they gather together to sing songs to pay respect on their friends birthday .']
the church sign has been cleaned <unk> 
.people are playing music in the graveyard <unk> 
.
['we met at the organization organization organization for a celebration .', 'there was music .']
the bowl of lemonade punch had not been spiked yet, but it was only a matter of time <unk> 
.the cotton company doors were <unk> <unk> the town was celebrating its anniversary <unk> 
.
['the bowl of lemonade punch had not been spiked yet , but it was only a matter of time .', 'the cotton company doors were locked . the town was celebrating its anniversary .']
the church sign has been cleaned <unk> 
.people are playing music in the graveyard <unk> 
.
['the church sign has been cleaned .', 'people are playi

on the train going to the flower job 
.sees different flowers 
.
['on the train going to the flower job', 'sees different flowers']
i decided i wanted to visit the farmers market <unk> 
.i rode the subway down to the street it was on <unk> 
.
['on a trip through location .', 'here in the underground']
i decided i wanted to visit the farmers market <unk> 
.i rode the subway down to the street it was on <unk> 
.
['i decided i wanted to visit the farmers market .', 'i rode the subway down to the street it was on .']
this was a statue i took a picture of on my trip around the world <unk> 
.i found this tunnel interesting, and would not want to fall in <unk> 
.
['i woke up not knowing where i was . i stood up in front of this statue in a town square .', 'i stumbled down these stairs .']
the tree has been decorated <unk> 
.the building is pretty <unk> 
.
['it was just about christmas .', "this year the couple was n't going to have a traditional christmas ."]
the tree has been decorated <unk>

you can see all the old ruins of the city from the summit <unk> 
.the land is watched over by the pointing man <unk> 
.
['greek ruins in location .', "this guy must 've been important ."]
you can see all the old ruins of the city from the summit <unk> 
.the land is watched over by the pointing man <unk> 
.
['there is an old land of stone and brick .', 'the land is watched over by the pointing man .']
you can see all the old ruins of the city from the summit <unk> 
.the land is watched over by the pointing man <unk> 
.
['you can see all the old ruins of the city from the summit .', 'he stood atop on of the ancient columns .']
while i was traveling i had the <unk> to photograph a group of students <unk> 
.we assembled them on the steps of their school <unk> 
.
['the children were very excited for their first day of school .', 'all the children gathered outside the school .']
we took a walk through the empty chapel <unk> 
.we posed for a picture outside <unk> 
.
['we got to the church and

he had been waiting to take this vacation for years, and the lights of the city were amazing <unk> 
.he really loved how the tallest buildings were lit <unk> 
.
['when i got to location location it was already night time .', 'the city was very beautiful at night .']
he had been waiting to take this vacation for years, and the lights of the city were amazing <unk> 
.he really loved how the tallest buildings were lit <unk> 
.
['he had been waiting to take this vacation for years , and the lights of the city were amazing .', 'he really loved how the tallest buildings were lit .']
as a family, we went hiking often <unk> 
.this time we went to a remote <unk> forest <unk> 
.
['family hiking to the top of the mountain .', 'crossing the bridge .']
we had a great time on our vacation last weekend <unk> 
.we stayed in a small village at the foot of a mountain <unk> 
.
['we had a great time on our vacation last weekend .', 'we stayed in a small village at the foot of a mountain .']
as a family, w

the sky is cloudy today <unk> 
.the art on the walls looks nice <unk> 
.
['tall building reaching towards the sky .', 'graffiti in an urban setting .']
the sky is cloudy today <unk> 
.the art on the walls looks nice <unk> 
.
['the sky is cloudy today .', 'the art on the walls looks nice .']
at the end of the lane stood the old church <unk> 
.the meadow, surrounded by forest gave one a feeling of <unk> <unk> 
.
['today i took a lovely stroll up this road and past the church .', 'through the pasture and beyond the large trees .']
it was a wonderful day <unk> 
.so i went out in the forest <unk> 
.
['it was a wonderful day .', 'so i went out in the forest .']
at the end of the lane stood the old church <unk> 
.the meadow, surrounded by forest gave one a feeling of <unk> <unk> 
.
['location location has so many white steepled churches .', 'we passed by this lovely field .']
at the end of the lane stood the old church <unk> 
.the meadow, surrounded by forest gave one a feeling of <unk> <unk>

we went on vacation to the beach <unk> 
.we had a <unk> cabin <unk> 
.
['we went on vacation to the beach .', 'we had a sea-side cabin .']
the two woman were out to a nice meal <unk> 
.they both got delicious look entrees <unk> 
.
['the two woman were out to a nice meal .', 'they both got delicious look entrees .']
the local grocery store had their annual organization organization organization celebration last weekend <unk> 
.special displays to actual sauces and condiments were out for anyone to try and purchase <unk> 
.
['the entrance to the store had lots of catchy displays .', 'the salad bar looked amazing and very tasty .']
the first <unk> ' convention does n't draw a ton of attention <unk> 
.old lady [female] could n't resist : she 'd been <unk> in an ambulance a few times in her life <unk> 
.
["the first responders ' convention does n't draw a ton of attention .", "old lady [female] could n't resist : she 'd been passenger in an ambulance a few times in her life ."]
the local gr

it was [female] 's first school play ever <unk> 
.they were <unk> a mariachi band and [female] was the flag bearer <unk> 
.
["it was [female] 's first school play ever .", 'they were portraying a mariachi band and [female] was the flag bearer .']
[female] was looking for her mama because she did not want to be in the play <unk> 
.so she stood <unk> while her mom came around <unk> 
.
['[female] was looking for her mama because she did not want to be in the play .', 'so she stood idle while her mom came around .']
wearing a sun hat <unk> 
.she and her father get to play in the sand and build a castle <unk> 
.
['i am going to the park with my mom today .', 'she is going to help me build a sand castle .']
the playground is one of the most fun places, ever <unk> 
.but it 's always more enjoyable with those you love and who will play with you <unk> 
.
['the playground is one of the most fun places , ever .', "but it 's always more enjoyable with those you love and who will play with you ."]


[male] bought $ 30 worth of candy for the piãƒæ’ã‚â±ata <unk> 
.[male] helps me put the candy in our star pinata <unk> 
.
['all the candy we had to put inside the pinata .', '[male] helps me put the candy in our star pinata .']
[male] was ready for the party ! 
.[male] was out playing with the pinata <unk> 
.
['[male] was ready for the party !', '[male] was out playing with the pinata .']
there was a gathering of people outside of my office <unk> 
.naturally i looked to see what was going on <unk> 
.
['the parade started out with a motorcade of motorcycles', 'we could see dad in the bed of his red pickup truck making his way towards us']
the location truck makes it 's way through the city of location <unk> 
.he is really impressed with her sombrero <unk> 
.
["the location truck makes it 's way through the city of location .", 'he is really impressed with her sombrero .']
there was a gathering of people outside of my office <unk> 
.naturally i looked to see what was going on <unk> 
.
['

In [43]:
for k, value in story_features.items():
    for i in range(5):
        description = value[i]['description']
        features = value[i]['features']
        print(description)

NameError: name 'story_features' is not defined

# Test the model

In [38]:
idx2word[0]
    

'<bos>'

In [87]:
test_story_featues

{'0': [{'description': '[male] and i were excited to be in location location during the 4th of july .',
   'features': array([[0.       , 0.       , 0.       , ..., 0.       , 4.6991124,
           0.       ]], dtype=float32),
   'photo_id': '997622638'},
  {'description': 'there was a huge crowd of people already awaiting the firework show .',
   'features': array([[0.        , 1.2952434 , 0.        , ..., 0.        , 0.06413986,
           0.        ]], dtype=float32),
   'photo_id': '997623170'},
  {'description': 'we were lucky to find a nice spot on the grass to watch the show .',
   'features': array([[0.       , 0.       , 0.       , ..., 0.       , 9.678047 ,
           3.6567364]], dtype=float32),
   'photo_id': '997624052'},
  {'description': 'as the evening grew darker the crowd was gearing up to enjoy the show , with a great view of the location location .',
   'features': array([[0.        , 0.        , 7.9393945 , ..., 0.        , 0.15816762,
           0.        ]], dtyp

In [79]:
p1

{'30355': [{'description': 'our landmark tree in town was about to be destroyed and cleared for a new mall .',
   'features': array([[0.        , 0.56369215, 0.        , ..., 0.        , 0.        ,
           0.        ]], dtype=float32),
   'photo_id': '2627795780'},
  {'description': 'so we decided to take the day to go out and enjoy its beauty .',
   'features': array([[0.03684911, 0.        , 0.        , ..., 0.        , 3.446499  ,
           0.        ]], dtype=float32),
   'photo_id': '2626979987'},
  {'description': 'to see the final glimpse of the roots , extending out into the depths of the hill .',
   'features': array([[2.4281654, 0.       , 0.       , ..., 0.       , 0.       ,
           0.       ]], dtype=float32),
   'photo_id': '2626982337'},
  {'description': 'and its magnificent trunk , larger than life itself .',
   'features': array([[15.149117,  0.      ,  0.      , ...,  0.      ,  0.      ,
            0.      ]], dtype=float32),
   'photo_id': '2626983575'},
 

In [None]:
for key, value in test_story_featues.items():
        for i in range(5):
            description = value[i]['description']
            _features = value[i]['features']
            features = np.asarray(np.reshape(_features, (1, -1)))
            photo_id = value[i]['photo_id']
            
            test_fd.write("story: " + key + " photo_id: " + photo_id + '\n')

            each_paragraph = []
            current_paragraph = ""

            generated_sentence_indexes = sess.run([tf_generated_sent], feed_dict={
                tf_feats: features
            })

In [85]:
batch_size = 2

In [39]:
length = len(X_train)
for i in range(length):
    if length - i > batch_size:
        print(i)




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747


In [75]:
len(X_train)


5411

In [81]:

if 5411-5410>=batch_size:
    print(s)

In [112]:
a = X_train[0:5]

In [113]:
a = np.asarray(np.reshape(a, (5, -1)))

In [114]:
a.shape

(5, 4096)

In [110]:
a = np.reshape(a, (5, -1))

In [111]:
a.shape

(5, 4096)

In [42]:
a = X_validation[1]

In [44]:
a.shape

(1, 4096)

In [3]:
from math import log
from numpy import array
from numpy import argmax



In [4]:
# beam search
def beam_search_decoder(data, k):
    sequences = [[list(), 1.0]]
    # walk over each step in sequence
    for row in data:
        all_candidates = list()
        # expand each current candidate
        for i in range(len(sequences)):
            seq, score = sequences[i]
            for j in range(len(row)):
                candidate = [seq + [j], score * -log(row[j])]
                all_candidates.append(candidate)
        # order all candidates by score
        ordered = sorted(all_candidates, key=lambda tup:tup[1])
        # select k best
        sequences = ordered[:k]
    return sequences

In [5]:


# define a sequence of 10 words over a vocab of 5 words
data = [[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1],
		[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1],
		[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1],
		[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1],
		[0.1, 0.2, 0.3, 0.4, 0.5],
		[0.5, 0.4, 0.3, 0.2, 0.1]]
data = array(data)
# decode sequence
result = beam_search_decoder(data, 3)
# print result
for seq in result:
	print(seq)

[[4, 0, 4, 0, 4, 0, 4, 0, 4, 0], 0.025600863289563108]
[[4, 0, 4, 0, 4, 0, 4, 0, 4, 1], 0.03384250043584397]
[[4, 0, 4, 0, 4, 0, 4, 0, 3, 0], 0.03384250043584397]


In [6]:
sequences = [[list(), 1.0]]

In [7]:
sequences

[[[], 1.0]]

In [17]:
    k = 3
    sequences = [[list(), 1.0]]
    # walk over each step in sequence
    for row in data:
        all_candidates = list()
        # expand each current candidate
        print(len(sequences))
        for i in range(len(sequences)):
            seq, score = sequences[i]
            print("seq: ", seq)
            print("score: ", score)
            for j in range(len(row)):
                
                candidate = [seq + [j], score * -log(row[j])]
                print("candidate: ", candidate)
                all_candidates.append(candidate)
        # order all candidates by score
        ordered = sorted(all_candidates, key=lambda tup:tup[1])
        # select k best
        sequences = ordered[:k]

1
seq:  []
score:  1.0
candidate:  [[0], 2.3025850929940455]
candidate:  [[1], 1.6094379124341003]
candidate:  [[2], 1.2039728043259361]
candidate:  [[3], 0.916290731874155]
candidate:  [[4], 0.6931471805599453]
3
seq:  [4]
score:  0.6931471805599453
candidate:  [[4, 0], 0.4804530139182014]
candidate:  [[4, 1], 0.6351243373717793]
candidate:  [[4, 2], 0.8345303547893733]
candidate:  [[4, 3], 1.1155773512899807]
candidate:  [[4, 4], 1.596030365208182]
seq:  [3]
score:  0.916290731874155
candidate:  [[3, 0], 0.6351243373717793]
candidate:  [[3, 1], 0.8395887053184746]
candidate:  [[3, 2], 1.1031891220323908]
candidate:  [[3, 3], 1.474713042690254]
candidate:  [[3, 4], 2.109837380062033]
seq:  [2]
score:  1.2039728043259361
candidate:  [[2, 0], 0.8345303547893733]
candidate:  [[2, 1], 1.1031891220323908]
candidate:  [[2, 2], 1.4495505135564588]
candidate:  [[2, 3], 1.937719476821764]
candidate:  [[2, 4], 2.7722498316111372]
3
seq:  [4, 0]
score:  0.4804530139182014
candidate:  [[4, 0, 0],

In [36]:
start = [word2idx["<unk>"]]
    
start_word = [[start, 0.0]]

In [37]:
start_word

[[[3], 0.0]]

In [38]:
len(start_word[0][0])

1

In [41]:
for s in start_word:
    print(s[0][:])

[3]


In [42]:
word2idx['class']

1106