In [1]:
%matplotlib inline
import os
import math
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import h5py

from PIL import Image
from vgg16 import vgg16
from dataLoader import *


In [2]:
train_params = {
    'json_path':'resource/train.json',
    'h5py_path':'resource/train.h5',
    'img_dir': 'resource/224x224_mscoco_images/'
}
test_params = {
    'json_path':'resource/test.json',
    'h5py_path':'resource/test.h5',
    'img_dir': 'resource/224x224_mscoco_images/'
}
# create data loader for training and test data
loaders = {}
loaders['train'] = dataLoader(train_params)
loaders['test'] = dataLoader(test_params)
model_params = {
    'wtoi': loaders['train'].getWtoi(),
    'vgg16_weight_file': 'resource/vgg16_weights.npz',
    'dim_image': 4096, # dimension of vgg16 network output
    'dim_embed': 512,
    'dim_hidden': 512,
    'batch_size': 50,
    'n_lstm_steps': loaders['train'].getMaxCaptionLength(),
    'n_words': loaders['train'].getVocabSize(),
}

In [3]:
sess = tf.InteractiveSession()

image = tf.placeholder(tf.float32, [224,224,3])
vgg_feat = {}

vgg = vgg16(image, model_params['vgg16_weight_file'], sess)
fc7 = vgg.fc7
save_f = h5py.File('vgg_feat.h5','w')

tf.global_variables_initializer().run()

for i in range(loaders['train'].num_captions):
    image_id = loaders['train'].image_ids[i]
    if not image_id in vgg_feat.keys():
        img = Image.open(loaders['train'].img_dir + loaders['train'].image_path[i]).convert('RGB') #(224,224,3)
        img = np.array(img, 'f')
        for j in range(3): img[:,:,j] = img[:,:,j] - loaders['train'].vgg_mean[j]
        vgg_feat[image_id] = sess.run(fc7, feed_dict={image:img})
        save_f.create_dataset(str(image_id), data=vgg_feat[image_id], dtype=np.float32)
        
for i in range(loaders['test'].num_captions):
    image_id = loaders['test'].image_ids[i]
    if not image_id in vgg_feat.keys():
        img = Image.open(loaders['test'].img_dir + loaders['test'].image_path[i]).convert('RGB') #(224,224,3)
        img = np.array(img, 'f')
        for j in range(3): img[:,:,j] = img[:,:,j] - loaders['test'].vgg_mean[j]
        vgg_feat[image_id] = sess.run([fc7], feed_dict={image:img})
        save_f.create_dataset(str(image_id), data=vgg_feat[image_id], dtype=np.float32)

(0, 'conv1_1_W', (3, 3, 3, 64))
(1, 'conv1_1_b', (64,))
(2, 'conv1_2_W', (3, 3, 64, 64))
(3, 'conv1_2_b', (64,))
(4, 'conv2_1_W', (3, 3, 64, 128))
(5, 'conv2_1_b', (128,))
(6, 'conv2_2_W', (3, 3, 128, 128))
(7, 'conv2_2_b', (128,))
(8, 'conv3_1_W', (3, 3, 128, 256))
(9, 'conv3_1_b', (256,))
(10, 'conv3_2_W', (3, 3, 256, 256))
(11, 'conv3_2_b', (256,))
(12, 'conv3_3_W', (3, 3, 256, 256))
(13, 'conv3_3_b', (256,))
(14, 'conv4_1_W', (3, 3, 256, 512))
(15, 'conv4_1_b', (512,))
(16, 'conv4_2_W', (3, 3, 512, 512))
(17, 'conv4_2_b', (512,))
(18, 'conv4_3_W', (3, 3, 512, 512))
(19, 'conv4_3_b', (512,))
(20, 'conv5_1_W', (3, 3, 512, 512))
(21, 'conv5_1_b', (512,))
(22, 'conv5_2_W', (3, 3, 512, 512))
(23, 'conv5_2_b', (512,))
(24, 'conv5_3_W', (3, 3, 512, 512))
(25, 'conv5_3_b', (512,))
(26, 'fc6_W', (25088, 4096))
(27, 'fc6_b', (4096,))
(28, 'fc7_W', (4096, 4096))
(29, 'fc7_b', (4096,))
57870
384029
222016
520950
69675
547471
122688
392136
398494
90570
504616
161919
457732
44404
4428
170558
405

In [1]:
save_f.close()

NameError: name 'save_f' is not defined