Permalink
Branch: master
Find file Copy path
36c4049 Oct 27, 2016
1 contributor

Users who have contributed to this file

626 lines (456 sloc) 21.7 KB
########################################################################
#
# The Inception Model v3 for TensorFlow.
#
# This is a pre-trained Deep Neural Network for classifying images.
# You provide an image or filename for a jpeg-file which will be
# loaded and input to the Inception model, which will then output
# an array of numbers indicating how likely it is that the
# input-image is of each class.
#
# See the example code at the bottom of this file or in the
# accompanying Python Notebooks.
#
# Tutorial #07 shows how to use the Inception model.
# Tutorial #08 shows how to use it for Transfer Learning.
#
# What is Transfer Learning?
#
# Transfer Learning is the use of a Neural Network for classifying
# images from another data-set than it was trained on. For example,
# the Inception model was trained on the ImageNet data-set using
# a very powerful and expensive computer. But the Inception model
# can be re-used on data-sets it was not trained on without having
# to re-train the entire model, even though the number of classes
# are different for the two data-sets. This allows you to use the
# Inception model on your own data-sets without the need for a
# very powerful and expensive computer to train it.
#
# The last layer of the Inception model before the softmax-classifier
# is called the Transfer Layer because the output of that layer will
# be used as the input in your new softmax-classifier (or as the
# input for another neural network), which will then be trained on
# your own data-set.
#
# The output values of the Transfer Layer are called Transfer Values.
# These are the actual values that will be input to your new
# softmax-classifier or to another neural network that you create.
#
# The word 'bottleneck' is also sometimes used to refer to the
# Transfer Layer or Transfer Values, but it is a confusing word
# that is not used here.
#
# Implemented in Python 3.5 with TensorFlow v0.10.0rc0
#
########################################################################
#
# This file is part of the TensorFlow Tutorials available at:
#
# https://github.com/Hvass-Labs/TensorFlow-Tutorials
#
# Published under the MIT License. See the file LICENSE for details.
#
# Copyright 2016 by Magnus Erik Hvass Pedersen
#
########################################################################
import numpy as np
import tensorflow as tf
import download
from cache import cache
import os
import sys
########################################################################
# Various directories and file-names.
# Internet URL for the tar-file with the Inception model.
# Note that this might change in the future and will need to be updated.
data_url = "http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz"
# Directory to store the downloaded data.
data_dir = "inception/"
# File containing the mappings between class-number and uid. (Downloaded)
path_uid_to_cls = "imagenet_2012_challenge_label_map_proto.pbtxt"
# File containing the mappings between uid and string. (Downloaded)
path_uid_to_name = "imagenet_synset_to_human_label_map.txt"
# File containing the TensorFlow graph definition. (Downloaded)
path_graph_def = "classify_image_graph_def.pb"
########################################################################
def maybe_download():
"""
Download the Inception model from the internet if it does not already
exist in the data_dir. The file is about 85 MB.
"""
print("Downloading Inception v3 Model ...")
download.maybe_download_and_extract(url=data_url, download_dir=data_dir)
########################################################################
class NameLookup:
"""
Used for looking up the name associated with a class-number.
This is used to print the name of a class instead of its number,
e.g. "plant" or "horse".
Maps between:
- cls is the class-number as an integer between 1 and 1000 (inclusive).
- uid is a class-id as a string from the ImageNet data-set, e.g. "n00017222".
- name is the class-name as a string, e.g. "plant, flora, plant life"
There are actually 1008 output classes of the Inception model
but there are only 1000 named classes in these mapping-files.
The remaining 8 output classes of the model should not be used.
"""
def __init__(self):
# Mappings between uid, cls and name are dicts, where insertions and
# lookup have O(1) time-usage on average, but may be O(n) in worst case.
self._uid_to_cls = {} # Map from uid to cls.
self._uid_to_name = {} # Map from uid to name.
self._cls_to_uid = {} # Map from cls to uid.
# Read the uid-to-name mappings from file.
path = os.path.join(data_dir, path_uid_to_name)
with open(file=path, mode='r') as file:
# Read all lines from the file.
lines = file.readlines()
for line in lines:
# Remove newlines.
line = line.replace("\n", "")
# Split the line on tabs.
elements = line.split("\t")
# Get the uid.
uid = elements[0]
# Get the class-name.
name = elements[1]
# Insert into the lookup-dict.
self._uid_to_name[uid] = name
# Read the uid-to-cls mappings from file.
path = os.path.join(data_dir, path_uid_to_cls)
with open(file=path, mode='r') as file:
# Read all lines from the file.
lines = file.readlines()
for line in lines:
# We assume the file is in the proper format,
# so the following lines come in pairs. Other lines are ignored.
if line.startswith(" target_class: "):
# This line must be the class-number as an integer.
# Split the line.
elements = line.split(": ")
# Get the class-number as an integer.
cls = int(elements[1])
elif line.startswith(" target_class_string: "):
# This line must be the uid as a string.
# Split the line.
elements = line.split(": ")
# Get the uid as a string e.g. "n01494475"
uid = elements[1]
# Remove the enclosing "" from the string.
uid = uid[1:-2]
# Insert into the lookup-dicts for both ways between uid and cls.
self._uid_to_cls[uid] = cls
self._cls_to_uid[cls] = uid
def uid_to_cls(self, uid):
"""
Return the class-number as an integer for the given uid-string.
"""
return self._uid_to_cls[uid]
def uid_to_name(self, uid, only_first_name=False):
"""
Return the class-name for the given uid string.
Some class-names are lists of names, if you only want the first name,
then set only_first_name=True.
"""
# Lookup the name from the uid.
name = self._uid_to_name[uid]
# Only use the first name in the list?
if only_first_name:
name = name.split(",")[0]
return name
def cls_to_name(self, cls, only_first_name=False):
"""
Return the class-name from the integer class-number.
Some class-names are lists of names, if you only want the first name,
then set only_first_name=True.
"""
# Lookup the uid from the cls.
uid = self._cls_to_uid[cls]
# Lookup the name from the uid.
name = self.uid_to_name(uid=uid, only_first_name=only_first_name)
return name
########################################################################
class Inception:
"""
The Inception model is a Deep Neural Network which has already been
trained for classifying images into 1000 different categories.
When you create a new instance of this class, the Inception model
will be loaded and can be used immediately without training.
The Inception model can also be used for Transfer Learning.
"""
# Name of the tensor for feeding the input image as jpeg.
tensor_name_input_jpeg = "DecodeJpeg/contents:0"
# Name of the tensor for feeding the decoded input image.
# Use this for feeding images in other formats than jpeg.
tensor_name_input_image = "DecodeJpeg:0"
# Name of the tensor for the resized input image.
# This is used to retrieve the image after it has been resized.
tensor_name_resized_image = "ResizeBilinear:0"
# Name of the tensor for the output of the softmax-classifier.
# This is used for classifying images with the Inception model.
tensor_name_softmax = "softmax:0"
# Name of the tensor for the unscaled outputs of the softmax-classifier (aka. logits).
tensor_name_softmax_logits = "softmax/logits:0"
# Name of the tensor for the output of the Inception model.
# This is used for Transfer Learning.
tensor_name_transfer_layer = "pool_3:0"
def __init__(self):
# Mappings between class-numbers and class-names.
# Used to print the class-name as a string e.g. "horse" or "plant".
self.name_lookup = NameLookup()
# Now load the Inception model from file. The way TensorFlow
# does this is confusing and requires several steps.
# Create a new TensorFlow computational graph.
self.graph = tf.Graph()
# Set the new graph as the default.
with self.graph.as_default():
# TensorFlow graphs are saved to disk as so-called Protocol Buffers
# aka. proto-bufs which is a file-format that works on multiple
# platforms. In this case it is saved as a binary file.
# Open the graph-def file for binary reading.
path = os.path.join(data_dir, path_graph_def)
with tf.gfile.FastGFile(path, 'rb') as file:
# The graph-def is a saved copy of a TensorFlow graph.
# First we need to create an empty graph-def.
graph_def = tf.GraphDef()
# Then we load the proto-buf file into the graph-def.
graph_def.ParseFromString(file.read())
# Finally we import the graph-def to the default TensorFlow graph.
tf.import_graph_def(graph_def, name='')
# Now self.graph holds the Inception model from the proto-buf file.
# Get the output of the Inception model by looking up the tensor
# with the appropriate name for the output of the softmax-classifier.
self.y_pred = self.graph.get_tensor_by_name(self.tensor_name_softmax)
# Get the unscaled outputs for the Inception model (aka. softmax-logits).
self.y_logits = self.graph.get_tensor_by_name(self.tensor_name_softmax_logits)
# Get the tensor for the resized image that is input to the neural network.
self.resized_image = self.graph.get_tensor_by_name(self.tensor_name_resized_image)
# Get the tensor for the last layer of the graph, aka. the transfer-layer.
self.transfer_layer = self.graph.get_tensor_by_name(self.tensor_name_transfer_layer)
# Get the number of elements in the transfer-layer.
self.transfer_len = self.transfer_layer.get_shape()[3]
# Create a TensorFlow session for executing the graph.
self.session = tf.Session(graph=self.graph)
def close(self):
"""
Call this function when you are done using the Inception model.
It closes the TensorFlow session to release its resources.
"""
self.session.close()
def _write_summary(self, logdir='summary/'):
"""
Write graph to summary-file so it can be shown in TensorBoard.
This function is used for debugging and may be changed or removed in the future.
:param logdir:
Directory for writing the summary-files.
:return:
Nothing.
"""
writer = tf.train.SummaryWriter(logdir=logdir, graph=self.graph)
writer.close()
def _create_feed_dict(self, image_path=None, image=None):
"""
Create and return a feed-dict with an image.
:param image_path:
The input image is a jpeg-file with this file-path.
:param image:
The input image is a 3-dim array which is already decoded.
The pixels MUST be values between 0 and 255 (float or int).
:return:
Dict for feeding to the Inception graph in TensorFlow.
"""
if image is not None:
# Image is passed in as a 3-dim array that is already decoded.
feed_dict = {self.tensor_name_input_image: image}
elif image_path is not None:
# Read the jpeg-image as an array of bytes.
image_data = tf.gfile.FastGFile(image_path, 'rb').read()
# Image is passed in as a jpeg-encoded image.
feed_dict = {self.tensor_name_input_jpeg: image_data}
else:
raise ValueError("Either image or image_path must be set.")
return feed_dict
def classify(self, image_path=None, image=None):
"""
Use the Inception model to classify a single image.
The image will be resized automatically to 299 x 299 pixels,
see the discussion in the Python Notebook for Tutorial #07.
:param image_path:
The input image is a jpeg-file with this file-path.
:param image:
The input image is a 3-dim array which is already decoded.
The pixels MUST be values between 0 and 255 (float or int).
:return:
Array of floats (aka. softmax-array) indicating how likely
the Inception model thinks the image is of each given class.
"""
# Create a feed-dict for the TensorFlow graph with the input image.
feed_dict = self._create_feed_dict(image_path=image_path, image=image)
# Execute the TensorFlow session to get the predicted labels.
pred = self.session.run(self.y_pred, feed_dict=feed_dict)
# Reduce the array to a single dimension.
pred = np.squeeze(pred)
return pred
def get_resized_image(self, image_path=None, image=None):
"""
Input an image to the Inception model and return
the resized image. The resized image can be plotted so
we can see what the neural network sees as its input.
:param image_path:
The input image is a jpeg-file with this file-path.
:param image:
The input image is a 3-dim array which is already decoded.
The pixels MUST be values between 0 and 255 (float or int).
:return:
A 3-dim array holding the image.
"""
# Create a feed-dict for the TensorFlow graph with the input image.
feed_dict = self._create_feed_dict(image_path=image_path, image=image)
# Execute the TensorFlow session to get the predicted labels.
resized_image = self.session.run(self.resized_image, feed_dict=feed_dict)
# Remove the 1st dimension of the 4-dim tensor.
resized_image = resized_image.squeeze(axis=0)
# Scale pixels to be between 0.0 and 1.0
resized_image = resized_image.astype(float) / 255.0
return resized_image
def print_scores(self, pred, k=10, only_first_name=True):
"""
Print the scores (or probabilities) for the top-k predicted classes.
:param pred:
Predicted class-labels returned from the predict() function.
:param k:
How many classes to print.
:param only_first_name:
Some class-names are lists of names, if you only want the first name,
then set only_first_name=True.
:return:
Nothing.
"""
# Get a sorted index for the pred-array.
idx = pred.argsort()
# The index is sorted lowest-to-highest values. Take the last k.
top_k = idx[-k:]
# Iterate the top-k classes in reversed order (i.e. highest first).
for cls in reversed(top_k):
# Lookup the class-name.
name = self.name_lookup.cls_to_name(cls=cls, only_first_name=only_first_name)
# Predicted score (or probability) for this class.
score = pred[cls]
# Print the score and class-name.
print("{0:>6.2%} : {1}".format(score, name))
def transfer_values(self, image_path=None, image=None):
"""
Calculate the transfer-values for the given image.
These are the values of the last layer of the Inception model before
the softmax-layer, when inputting the image to the Inception model.
The transfer-values allow us to use the Inception model in so-called
Transfer Learning for other data-sets and different classifications.
It may take several hours or more to calculate the transfer-values
for all images in a data-set. It is therefore useful to cache the
results using the function transfer_values_cache() below.
:param image_path:
The input image is a jpeg-file with this file-path.
:param image:
The input image is a 3-dim array which is already decoded.
The pixels MUST be values between 0 and 255 (float or int).
:return:
The transfer-values for those images.
"""
# Create a feed-dict for the TensorFlow graph with the input image.
feed_dict = self._create_feed_dict(image_path=image_path, image=image)
# Use TensorFlow to run the graph for the Inception model.
# This calculates the values for the last layer of the Inception model
# prior to the softmax-classification, which we call transfer-values.
transfer_values = self.session.run(self.transfer_layer, feed_dict=feed_dict)
# Reduce to a 1-dim array.
transfer_values = np.squeeze(transfer_values)
return transfer_values
########################################################################
# Batch-processing.
def process_images(fn, images=None, image_paths=None):
"""
Call the function fn() for each image, e.g. transfer_values() from
the Inception model above. All the results are concatenated and returned.
:param fn:
Function to be called for each image.
:param images:
List of images to process.
:param image_paths:
List of file-paths for the images to process.
:return:
Numpy array with the results.
"""
# Are we using images or image_paths?
using_images = images is not None
# Number of images.
if using_images:
num_images = len(images)
else:
num_images = len(image_paths)
# Pre-allocate list for the results.
# This holds references to other arrays. Initially the references are None.
result = [None] * num_images
# For each input image.
for i in range(num_images):
# Status-message. Note the \r which means the line should overwrite itself.
msg = "\r- Processing image: {0:>6} / {1}".format(i+1, num_images)
# Print the status message.
sys.stdout.write(msg)
sys.stdout.flush()
# Process the image and store the result for later use.
if using_images:
result[i] = fn(image=images[i])
else:
result[i] = fn(image_path=image_paths[i])
# Print newline.
print()
# Convert the result to a numpy array.
result = np.array(result)
return result
########################################################################
def transfer_values_cache(cache_path, model, images=None, image_paths=None):
"""
This function either loads the transfer-values if they have
already been calculated, otherwise it calculates the values
and saves them to a file that can be re-loaded again later.
Because the transfer-values can be expensive to compute, it can
be useful to cache the values through this function instead
of calling transfer_values() directly on the Inception model.
See Tutorial #08 for an example on how to use this function.
:param cache_path:
File containing the cached transfer-values for the images.
:param model:
Instance of the Inception model.
:param images:
4-dim array with images. [image_number, height, width, colour_channel]
:param image_paths:
Array of file-paths for images (must be jpeg-format).
:return:
The transfer-values from the Inception model for those images.
"""
# Helper-function for processing the images if the cache-file does not exist.
# This is needed because we cannot supply both fn=process_images
# and fn=model.transfer_values to the cache()-function.
def fn():
return process_images(fn=model.transfer_values, images=images, image_paths=image_paths)
# Read the transfer-values from a cache-file, or calculate them if the file does not exist.
transfer_values = cache(cache_path=cache_path, fn=fn)
return transfer_values
########################################################################
# Example usage.
if __name__ == '__main__':
print(tf.__version__)
# Download Inception model if not already done.
maybe_download()
# Load the Inception model so it is ready for classifying images.
model = Inception()
# Path for a jpeg-image that is included in the downloaded data.
image_path = os.path.join(data_dir, 'cropped_panda.jpg')
# Use the Inception model to classify the image.
pred = model.classify(image_path=image_path)
# Print the scores and names for the top-10 predictions.
model.print_scores(pred=pred, k=10)
# Close the TensorFlow session.
model.close()
# Transfer Learning is demonstrated in Tutorial #08.
########################################################################