/
extract_visual_features.py
193 lines (143 loc) · 7.16 KB
/
extract_visual_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import caffe
import numpy as np
import os
import tables
import params_cub as params
caffe.set_mode_cpu()
def normalize(x):
return x / (np.sqrt(np.mean(np.square(x))) + 1e-5)
def softmax(x):
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum(axis=0)
def grad_cam(input_model, category_index, layer_name, final_layer="fc8_cub", data_layer='data'):
label = np.zeros(input_model.blobs[final_layer].shape)
label[0, category_index] = 1
imdiff = input_model.backward(diffs=[data_layer, layer_name], **{input_model.outputs[0]: label})
gradients = imdiff[layer_name]
gradients = normalize(gradients)
gradients = gradients[0, :, :, :]
weights = np.mean(gradients, axis=(1, 2))
return weights
def get_gradients_and_weights(input_model, category_index, layer_name, final_layer="fc8_cub", data_layer_name='data'):
label = np.zeros(input_model.blobs[final_layer].shape)
label[0, category_index] = 1
im_diff = input_model.backward(diffs=[data_layer_name, layer_name], **{input_model.outputs[0]: label})
gradients = im_diff[layer_name]
gradients = normalize(gradients)
gradients = gradients[0, :, :, :]
weights = np.mean(gradients, axis=(1, 2))
return gradients, weights
def extract_visual_feats_and_gradients(all_image_name_file, prototxt_file, weight_file, image_width, image_height, image_folder, layer_name, save_folder_path, final_layer, data_layer):
"""Extract visual features from a given layer of a Caffe model
:param all_image_name_file:
:param prototxt_file:
:param weight_file:
:param image_width:
:param image_height:
:param image_folder:
:param layer_name:
:param save_folder_path:
:param final_layer:
:param data_layer:
:return:
"""
predicted_class_list = []
# ------- Get image names --------
with open(all_image_name_file, 'rb') as f:
images = [line.rstrip().decode('utf-8') for line in f.readlines()]
# -------- Load the Caffe model --------
net = caffe.Net(prototxt_file, weight_file, caffe.TEST)
# -------- Configure pre-processing --------
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_channel_swap('data', (2, 1, 0))
transformer.set_raw_scale('data', 255.0)
net.blobs['data'].reshape(1, 3, image_width, image_height)
# -------- Extract image features --------
for k in range(len(images)):
image_name = images[k].split("/")[1]
image_path = os.path.join(image_folder, images[k])
pre_processed_image = caffe.io.load_image(image_path)
net.blobs['data'].data[...] = transformer.preprocess('data', pre_processed_image)
out = net.forward()
predicted_class = out[layer_name].argmax()
predicted_class_list.append(image_name + "\t" + str(predicted_class))
if k % 100 == 0:
print('Completed : %d / %d' % (k, len(images)))
activations = net.blobs[layer_name].data[0, :, :, :]
output_file_path = os.path.join(save_folder_path, "activations", image_name.split('.')[0] + '.npz')
np.savez_compressed(output_file_path, x=activations)
# -------- Get gradients for the predicted class --------
gradients, weights = get_gradients_and_weights(net, predicted_class, layer_name, final_layer, data_layer)
output_file_path = os.path.join(save_folder_path, "grad_pred", image_name.split('.')[0] + '.npz')
np.savez_compressed(output_file_path, x=gradients)
def create_final_features(visual_feature_file_name, feature_vector_size, image_name_file, no_sent_per_image, save_folder_path, is_testing=False):
""" Create weighted image features
:param visual_feature_file_name:
:param feature_vector_size:
:param image_name_file:
:param no_sent_per_image:
:param save_folder_path:
:param is_testing:
:return:
"""
f = tables.open_file(visual_feature_file_name, mode='w')
atom = tables.Float64Atom()
array_c = f.create_earray(f.root, 'data', atom, (0, feature_vector_size))
with open(os.path.join(image_name_file), 'rb') as fk:
images = [line.rstrip().decode('utf-8') for line in fk.readlines()]
# -------- Load number of sentences per image --------
num_for_each_image = np.load(no_sent_per_image, allow_pickle=True).item()
# -------- Create image features --------
for k in range(len(images)):
image_name = images[k].split("/")[1]
if k % 100 == 0:
print('Completed : %d / %d' % (k, len(images)))
activations = np.load(os.path.join(save_folder_path, "activations", image_name + '.npz'))['x']
gradients = np.load(os.path.join(save_folder_path, "grad_pred", image_name + '.npz'))['x']
weights = np.mean(gradients, axis=(1, 2))
weights = np.maximum(weights, 0)
# --------- Created weighted visual features --------
for i, w in enumerate(weights):
activations[i, :, :] = w * activations[i, :, :]
# -------- Reshape --------
feature_vector = activations.reshape(feature_vector_size)
if not is_testing:
# -------- Repeat visual feature for 'num' of times
num = num_for_each_image[image_name]
for i in range(num):
array_c.append(np.expand_dims(feature_vector, axis=0))
else:
array_c.append(np.expand_dims(feature_vector, axis=0))
def main():
feature_vector_size = params.FEATURE_VECTOR_SIZE
save_folder_path = params.IMG_FEATURE_SAVE_FOLDER_PATH
all_image_name_file = params.ALL_IMAGE_NAME_FILE
prototxt_file = params.CLASSIFIER_PROTOTXT_FILE
weight_file = params.CLASSIFIER_WEIGHT_FILE
image_width = params.IMAGE_WIDTH
image_height = params.IMAGE_HEIGHT
image_folder = params.IMAGE_FOLDER
layer_name = params.LAYER_NAME
final_layer = params.FINAL_LAYER_NAME
data_layer = params.DATA_LAYER_NAME
if not os.path.exists(save_folder_path):
os.makedirs(save_folder_path)
extract_visual_feats_and_gradients(all_image_name_file, prototxt_file, weight_file, image_width, image_height, image_folder, layer_name, save_folder_path, final_layer, data_layer)
# -------- For train images --------
visual_feature_file_name = params.TRAIN_VISUAL_FEATURE_FILE
image_name_file = params.TRAIN_IMAGE_NAMES
no_sent_per_image = params.TRAIN_NO_SENT_PER_IMAGE
create_final_features(visual_feature_file_name, feature_vector_size, image_name_file, no_sent_per_image, save_folder_path)
# -------- For val images --------
visual_feature_file_name = params.VAL_VISUAL_FEATURE_FILE
image_name_file = params.VAL_IMAGE_NAMES
no_sent_per_image = params.VAL_NO_SENT_PER_IMAGE
create_final_features(visual_feature_file_name, feature_vector_size, image_name_file, no_sent_per_image, save_folder_path)
# # -------- For test images --------
visual_feature_file_name = params.TEST_VISUAL_FEATURE_FILE
image_name_file = params.TEST_IMAGE_NAMES
no_sent_per_image = params.TEST_NO_SENT_PER_IMAGE
create_final_features(visual_feature_file_name, feature_vector_size, image_name_file, no_sent_per_image, save_folder_path, is_testing=True)
if __name__ == '__main__':
main()