<b>Section One – Image Captioning with Tensorflow</b>

In [None]:
# load essential libraries
import math
import os

import tensorflow as tf

%pylab inline

In [None]:
# load Tensorflow/Google Brain base code
# https://github.com/tensorflow/models/tree/master/research/im2txt

from im2txt import configuration
from im2txt import inference_wrapper
from im2txt.inference_utils import caption_generator
from im2txt.inference_utils import vocabulary

In [None]:
# tell our function where to find the trained model and vocabulary
checkpoint_path = './model'
vocab_file = './model/word_counts.txt'

In [None]:
# this is the function we'll call to produce our captions 
#    given input file name(s) -- separate file names by a ,
#                                 if more than one

def gen_caption(input_files):
    # only print serious log messages
    tf.logging.set_verbosity(tf.logging.FATAL)
    # load our pretrained model
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                 checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(vocab_file)

    filenames = []
    for file_pattern in input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), input_files)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)
        
        captionlist = []

        for filename in filenames:
            with tf.gfile.GFile(filename, "rb") as f:
                image = f.read()
            captions = generator.beam_search(sess, image)
            print("Captions for image %s:" % os.path.basename(filename))
            for i, caption in enumerate(captions):
                # Ignore begin and end words.
                sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
                sentence = " ".join(sentence)
                print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
                captionlist.append(sentence)
    return captionlist

In [None]:
testfile = 'test_images/ballons.jpeg'

figure()
imshow(imread(testfile))

capts = gen_caption(testfile)

In [None]:
input_files = 'test_images/ballons.jpeg,test_images/bike.jpeg,test_images/dog.jpeg,test_images/fireworks.jpeg,test_images/football.jpeg,test_images/giraffes.jpeg,test_images/headphones.jpeg,test_images/laughing.jpeg,test_images/objects.jpeg,test_images/snowboard.jpeg,test_images/surfing.jpeg'

capts = gen_caption(input_files)




<p><p><p><p><p>
<b>Retraining the image captioner</b>

In [1]:
# First download pretrained Inception (v3) model

import webbrowser 
webbrowser.open("http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz")

# Completely unzip tar.gz file to get inception_v3.ckpt,
# --recommend storing in im2txt/data directory

True

In [None]:
# Now gather and prepare the mscoco data

# Comment out cd magic command if already in data directory
%cd im2txt/data
# This command will take an hour or more to run typically.
# Note, you will need a lot of HD space (>100 GB)!
%run build_mscoco_data.py

# At this point you have files in im2txt/data/mscoco/raw-data that you can train
#   on, or you can substitute your own data

%cd ..

In [2]:
# load needed modules

import tensorflow as tf

from im2txt import configuration
from im2txt import show_and_tell_model

In [3]:
# Define (but don't run yet) our captioning training function
def train():
    model_config = configuration.ModelConfig()
    model_config.input_file_pattern = input_file_pattern
    model_config.inception_checkpoint_file = inception_checkpoint_file
    training_config = configuration.TrainingConfig()

    # Create training directory.
    train_dir = train_dir
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    # Build the TensorFlow graph.
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = show_and_tell_model.ShowAndTellModel(
                model_config, mode="train", train_inception=train_inception)
        model.build()

        # Set up the learning rate.
        learning_rate_decay_fn = None
        if train_inception:
            learning_rate = tf.constant(training_config.train_inception_learning_rate)
        else:
            learning_rate = tf.constant(training_config.initial_learning_rate)
            if training_config.learning_rate_decay_factor > 0:
                num_batches_per_epoch = (training_config.num_examples_per_epoch /
                                 model_config.batch_size)
                decay_steps = int(num_batches_per_epoch *
                          training_config.num_epochs_per_decay)

                def _learning_rate_decay_fn(learning_rate, global_step):
                    return tf.train.exponential_decay(
                                      learning_rate,
                                      global_step,
                                      decay_steps=decay_steps,
                                      decay_rate=training_config.learning_rate_decay_factor,
                                      staircase=True)

                learning_rate_decay_fn = _learning_rate_decay_fn

        # Set up the training ops.
        train_op = tf.contrib.layers.optimize_loss(
                                        loss=model.total_loss,
                                        global_step=model.global_step,
                                        learning_rate=learning_rate,
                                        optimizer=training_config.optimizer,
                                        clip_gradients=training_config.clip_gradients,
                                        learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep)

    # Run training.
    tf.contrib.slim.learning.train(
                                train_op,
                                train_dir,
                                log_every_n_steps=log_every_n_steps,
                                graph=g,
                                global_step=model.global_step,
                                number_of_steps=number_of_steps,
                                init_fn=model.init_fn,
                                saver=saver)



In [None]:
# Initial training
input_file_pattern = 'im2txt/data/mscoco/train-?????-of-00256'

# change these if you put your stuff somewhere else
inception_checkpoint_file = 'im2txt/data/inception_v3.ckpt'
train_dir = 'im2txt/model'

# Don't train inception for initial run
train_inception = False
number_of_steps = 1000000
log_every_n_steps = 1

# Now run the training (warning: takes days-to-weeks!!!)
train()

In [None]:
# Fine tuning
input_file_pattern = 'im2txt/data/mscoco/train-?????-of-00256'

# change these if you put your stuff somewhere else
inception_checkpoint_file = 'im2txt/data/inception_v3.ckpt'
train_dir = 'im2txt/model'

# This will refine our results
train_inception = True
number_of_steps = 3000000
log_every_n_steps = 1

# Now run the training (warning: takes even longer than initial training!!!)
train()

In [None]:
# If you completed this, you can go back to the start of this notebook and 
#   point checkpoint_path and vocab_file to your generated files.