# Prerequisite packages

In [1]:
import sys
import os
import time
import cPickle
sys.path.append('./models/slim')

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
from skimage.transform import resize
from sklearn.decomposition import PCA
import tqdm

from preprocessing import inception_preprocessing 
from nets.inception_v4 import inception_v4_arg_scope, inception_v4 

### Hardcoded paths
* provide full paths to the required files

In [2]:
model_checkpoint_path = '/home/user/FER/AVSP/inception_v4.ckpt'
root_image_folder = '/home/user/Open_image/images_2016_08/train/images'
raw_result_folder = '/home/user/FER/AVSP/raw_results'
temp_vector_result_folder = '/home/user/FER/AVSP/temp_results'
final_result_folder = '/home/user/FER/AVSP/final_results'

### Creating folders if they don't exist

In [3]:
if not os.path.exists(raw_result_folder):
    os.makedirs(raw_result_folder)
    
if not os.path.exists(temp_vector_result_folder):
    os.makedirs(temp_vector_result_folder)
    
if not os.path.exists(final_result_folder):
    os.makedirs(final_result_folder)

# Utils

In [4]:
def get_img(path):
    img = io.imread(path)
    img = resize(img,(229,229))
    return img

def show_img(path):
    plt.figure(figsize=(16,12))
    img = get_img(path)
    plt.imshow(img)
    plt.show()
    
def maxpool2d(x, k=2, stride=2, padding='SAME'):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, stride, stride, 1],
                          padding=padding)

# This creates a concatenated vector with the result of max pooling all the endpoints with maximum size kernel for them
# for example an output of convolution is 12x12x1024, then max pool will be with kernel [1,12,12,1] and padding VALID
# the resulting vector will be 1,1,1024 for each image
def create_prediction(endpoints):
    values_to_concat = []
    for end in sorted(endpoints):
        if 'Pred' in end or 'Log' in end: 
            continue
        all_channels_maxpool = maxpool2d(end_points[end],end_points[end].shape[1], padding = 'VALID')
        values_to_concat.append(all_channels_maxpool)
    concatenated = tf.concat(values_to_concat,3)
    return concatenated

# Model initialization

In [5]:
slim = tf.contrib.slim
images = tf.placeholder(tf.float32, [None, 229,229,3])

with slim.arg_scope(inception_v4_arg_scope()):
    logits, end_points = inception_v4(images, num_classes = 1001, is_training = False)

### Excluding unnecessary variables
These layers must be excluded from the restored variables otherwise an error is thrown because they're not present in the checkpoint

In [6]:
exclude = ['InceptionV4/Logits', 'InceptionV4/AuxLogits']
exclude = ['InceptionV4/AuxLogits']
variables_to_restore = slim.get_variables_to_restore(exclude = exclude)

### Various endpoints contained in the model that will be used for feature extraction

In [7]:
end_points

{'AuxLogits': <tf.Tensor 'InceptionV4/AuxLogits/Aux_logits/BiasAdd:0' shape=(?, 1001) dtype=float32>,
 'Conv2d_1a_3x3': <tf.Tensor 'InceptionV4/InceptionV4/Conv2d_1a_3x3/Relu:0' shape=(?, 114, 114, 32) dtype=float32>,
 'Conv2d_2a_3x3': <tf.Tensor 'InceptionV4/InceptionV4/Conv2d_2a_3x3/Relu:0' shape=(?, 112, 112, 32) dtype=float32>,
 'Conv2d_2b_3x3': <tf.Tensor 'InceptionV4/InceptionV4/Conv2d_2b_3x3/Relu:0' shape=(?, 112, 112, 64) dtype=float32>,
 'Logits': <tf.Tensor 'InceptionV4/Logits/Logits/BiasAdd:0' shape=(?, 1001) dtype=float32>,
 'Mixed_3a': <tf.Tensor 'InceptionV4/InceptionV4/Mixed_3a/concat:0' shape=(?, 55, 55, 160) dtype=float32>,
 'Mixed_4a': <tf.Tensor 'InceptionV4/InceptionV4/Mixed_4a/concat:0' shape=(?, 53, 53, 192) dtype=float32>,
 'Mixed_5a': <tf.Tensor 'InceptionV4/InceptionV4/Mixed_5a/concat:0' shape=(?, 26, 26, 384) dtype=float32>,
 'Mixed_5b': <tf.Tensor 'InceptionV4/InceptionV4/Mixed_5b/concat:0' shape=(?, 26, 26, 384) dtype=float32>,
 'Mixed_5c': <tf.Tensor 'Incep

# Model restoration

In [8]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess, model_checkpoint_path)

# Creating the prediction endpoint by maxpooling all the layers
pred = create_prediction(end_points)

#inputs for preprocessing
placeholder_input = tf.placeholder(tf.float32, [None,None,None])
preprocessed_img = inception_preprocessing.preprocess_image(placeholder_input, 229, 229, False)

## Loading all paths to images
* only reason to do this beforehand is to have a better progress bar with tqdm later, other option would be to do the processing without caching the paths

In [9]:
paths_iterate = []

for root, dirs, files in os.walk(root_image_folder):
    for name in files:
        file_name = os.path.join(root, name)
        paths_iterate.append(file_name)

## Processing images and saving partial outputs
* change save_every to more/less depending on your RAM
* grayscale images are skipped, they could be used if u preprocess them and stack them three times to form a rgb image where all channels have the same value
* progress bar with tqdm provides better insight than printing every x iterations
* later the chunks will be combined for easier access

In [11]:
paths = []
vectors = []
cnt = 1
part_num = 1
save_every = 100000

for file_name in tqdm.tqdm_notebook(paths_iterate):
    input_img = io.imread(file_name)
    input_img = resize(input_img, (229,229), mode = 'constant')
    if len(input_img.shape)!=3:
        #Skipping grayscale images
        continue

    test = sess.run(preprocessed_img, feed_dict={placeholder_input:input_img})
    res = sess.run(pred,feed_dict={images:[test]})[0].flatten()
    paths.append(file_name)
    vectors.append(res)

    if cnt % save_every == 0:
        vectors = np.array(vectors)
        raw_result_folder
        vector_location = os.path.join(raw_result_folder,'vectors_%03d' % part_num)
        np.save(vector_location, vectors)
        
        paths = np.array(paths)
        path_location = os.path.join(raw_result_folder,'paths_%03d' % part_num)
        np.save(path_location, paths)
        part_num += 1
        vectors = []
        paths = []

    cnt+=1
    
#saving last results
vectors = np.array(vectors)
vector_location = os.path.join(raw_result_folder,'vectors_%03d' % part_num)
np.save(vector_location, vectors)

paths = np.array(paths)
path_location = os.path.join(raw_result_folder,'paths_%03d' % part_num)
np.save(path_location, paths)

paths_iterate = []
vectors = []
paths = []

918157/|/ 99%|| 918157/931295 [9:57:06<08:32, 25.63it/s]s]


# Fitting PCA model
* uncomment saving/fitting part depending if u trained it before or you already have the model and you're restoring it from file

In [None]:
#Partial vectors for PCA model training, memory requirements are around 13Gb for these so you can
#combine more parts if you have more memory and train on those
vector_location = os.path.join(raw_result_folder,'vectors_001.npy')
vectors = np.load(vector_location)


PCA_location = os.path.join(final_result_folder,'PCA_model_300.pkl')
PCA_model = PCA(n_components=300)

#careful to comment the fit and saving parts back after training so you dont overwrite your results 
#if you're running whole notebook at once
PCA_model.fit(vectors)

with open(PCA_location, 'wb') as f:
    cPickle.dump(PCA_model, f)

# Reducing all the raw results with PCA and storing them

### Reducing vectors

In [9]:
PCA_model = PCA(n_components=300)
PCA_location = os.path.join(final_result_folder,'PCA_model_300.pkl')

with open(PCA_location, 'rb') as f:
    PCA_model = cPickle.load(f)
    
for root, dirs, files in os.walk(raw_result_folder):
    for name in sorted(files):
        if 'vector' not in name:
            continue
        file_name = os.path.join(root, name)
        print file_name
        vectors = np.load(file_name)
        vectors = PCA_model.transform(vectors)
        np.save(os.path.join(temp_vector_result_folder, name), vectors)

/home/user/FER/AVSP/raw_results/vectors_001.npy
/home/user/FER/AVSP/raw_results/vectors_002.npy
/home/user/FER/AVSP/raw_results/vectors_003.npy
/home/user/FER/AVSP/raw_results/vectors_004.npy
/home/user/FER/AVSP/raw_results/vectors_005.npy
/home/user/FER/AVSP/raw_results/vectors_006.npy
/home/user/FER/AVSP/raw_results/vectors_007.npy
/home/user/FER/AVSP/raw_results/vectors_008.npy
/home/user/FER/AVSP/raw_results/vectors_009.npy
/home/user/FER/AVSP/raw_results/vectors_010.npy


# Combining the reduced results and storing it as a single numpy matrix for easier usage

### Combining paths to single npy

In [20]:
paths = None
for root, dirs, files in os.walk(raw_result_folder):
    for name in sorted(files):
        if 'path' not in name:
            continue
        file_name = os.path.join(root, name)
        print file_name
        temp = np.load(file_name)
        if isinstance(paths, np.ndarray):
            paths = np.concatenate((paths, temp), axis = 0)
        else:
            paths = temp
final_result_path = os.path.join(final_result_folder, 'full_paths')
np.save(final_result_path, paths)

/home/user/FER/AVSP/raw_results/paths_001.npy
(100000,)
/home/user/FER/AVSP/raw_results/paths_002.npy
(100000,)
/home/user/FER/AVSP/raw_results/paths_003.npy
(100000,)
/home/user/FER/AVSP/raw_results/paths_004.npy
(100000,)
/home/user/FER/AVSP/raw_results/paths_005.npy
(100000,)
/home/user/FER/AVSP/raw_results/paths_006.npy
(100000,)
/home/user/FER/AVSP/raw_results/paths_007.npy
(100000,)
/home/user/FER/AVSP/raw_results/paths_008.npy
(100000,)
/home/user/FER/AVSP/raw_results/paths_009.npy
(100000,)
/home/user/FER/AVSP/raw_results/paths_010.npy
(12891,)
(912891,)


### Combining vectors to single npy

In [12]:
vectors = None
for root, dirs, files in os.walk(temp_vector_result_folder):
    for name in sorted(files):
        if 'vector' not in name:
            continue
        file_name = os.path.join(root, name)
        print file_name
        temp = np.load(file_name)
        if isinstance(vectors, np.ndarray):
            vectors = np.concatenate((vectors, temp), axis = 0)
        else:
            vectors = temp
            
final_result_path = os.path.join(final_result_folder, 'full_vectors')
np.save(final_result_path,vectors)

/home/user/FER/AVSP/temp_results/vectors_001.npy
/home/user/FER/AVSP/temp_results/vectors_002.npy
/home/user/FER/AVSP/temp_results/vectors_003.npy
/home/user/FER/AVSP/temp_results/vectors_004.npy
/home/user/FER/AVSP/temp_results/vectors_005.npy
/home/user/FER/AVSP/temp_results/vectors_006.npy
/home/user/FER/AVSP/temp_results/vectors_007.npy
/home/user/FER/AVSP/temp_results/vectors_008.npy
/home/user/FER/AVSP/temp_results/vectors_009.npy
/home/user/FER/AVSP/temp_results/vectors_010.npy
