Here we download a pre-trained Resnet 50-layer model on Imagenet. 

In [1]:
import os, urllib
def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.urlretrieve(url, filename)
def get_model(prefix, epoch):
    download(prefix+'-symbol.json')
    download(prefix+'-%04d.params' % (epoch,))

get_model('http://data.dmlc.ml/mxnet/models/imagenet/inception-bn/Inception-BN', 0)

Initialization

We first load the model into memory with load_checkpoint. It returns the symbol (see symbol.ipynb) definition of the neural network, and parameters.


In [None]:
import mxnet as mx
sym, arg_params, aux_params = mx.model.load_checkpoint('Inception-BN', 0)

Next we create an executable module on CPU.

In [48]:
mod = mx.mod.Module(symbol=sym, context=mx.cpu())

The ResNet is trained with RGB images of size 224 x 224. The training data is feed by the variable data. We bind the module with the input shape and specify that it is only for predicting. The number 1 added before the image shape (3x224x224) means that we will only predict one image each time. Next we set the loaded parameters. Now the module is ready to run. 

In [49]:
mod.bind(for_training = False,
         data_shapes=[('data', (1,3,224,224))])
mod.set_params(arg_params, aux_params)

Preparing data

In [50]:
download('http://data.mxnet.io/models/imagenet/resnet/synset1.txt')
with open('synset1.txt') as f:
    synsets = [l.rstrip() for l in f]
print(synsets)

['<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">', '<html><head>', '<title>404 Not Found</title>', '</head><body>', '<h1>Not Found</h1>', '<p>The requested URL /models/imagenet/resnet/synset1.txt was not found on this server.</p>', '<hr>', '<address>Apache/2.4.7 (Ubuntu) Server at data.mxnet.io Port 80</address>', '</body></html>']


In [51]:
import tarfile
import os
#download('http://data.mxnet.io/data/val_1000.tar')
tfile_flower = tarfile.open('flowers.tar.gz')
tfile_flower.extractall()
val_label = [0 for f in os.listdir('flowers')]


tfile_bird = tarfile.open('birds.tar.gz')
tfile_bird.extractall()
val_label = [0 for f in os.listdir('birds')]


In [52]:
%matplotlib inline
import matplotlib
matplotlib.rc("savefig", dpi=100)
import matplotlib.pyplot as plt
import cv2


Next we define a function that reads one image each time and convert to a format can be used by the model. Here we use a naive way that resizes the original image into the desired shape, and change the data layout. 

In [53]:
import numpy as np
import cv2
def get_image(filename):
    img = cv2.imread(filename)  # read image in b,g,r order
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   # change to r,g,b order
    img = cv2.resize(img, (224, 224))  # resize to 224*224 to fit model
    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 1, 2)  # change to (channel, height, width)
    img = img[np.newaxis, :]  # extend to (example, channel, heigth, width)
    return img

In [55]:
files = os.listdir('flowers')
flower_img=[]
flower_img_rep=[]
flower_lbl=[]
i=0
for file in files:
    
    img = get_image('flowers/'+file)
   
    flower_img_rep.append(cv2.imread(file, cv2.COLOR_BGR2RGB))
    flower_img.append(img)
    flower_lbl.append('flower')
    i=i+1
files = os.listdir('birds')
bird_img=[]
bird_img_rep=[]
bird_lbl=[]
i=0
for file in files:
    img = get_image('birds/'+file)
    
    bird_img_rep.append(cv2.imread(file, cv2.COLOR_BGR2RGB))
    bird_img.append(img)
    bird_lbl.append('bird')
    i=i+1
files = os.listdir('food')
food_img=[]
food_img_rep=[]
food_lbl=[]
i=0
for file in files:
    img = get_image('food/'+file)
   
    food_img_rep.append(cv2.imread(file, cv2.COLOR_BGR2RGB))
    food_img.append(img)
    food_lbl.append('food')
    i=i+1
print len(bird_img)
print len(flower_img)
print len(food_img)

1000
1000
1000


In [56]:
train_img = []
train_lbl = []
test_img = []
test_lbl = []
train_img = flower_img[:len(flower_img)*7/10] 
train_lbl = flower_lbl[:len(flower_lbl)*7/10]
test_img = flower_img[len(flower_img)*7/10:] 
test_lbl = flower_lbl[len(flower_lbl)*7/10:]  
test_img_rep = flower_img_rep[len(flower_img)*7/10:]
print len(train_lbl)
train_img.extend(bird_img[:len(bird_img)*7/10]) 
train_lbl.extend(bird_lbl[:len(bird_lbl)*7/10])
test_img.extend(bird_img[len(bird_img)*7/10:]) 
test_lbl.extend(bird_lbl[len(bird_lbl)*7/10:])  
test_img_rep = bird_img_rep[len(bird_img)*7/10:]
print len(train_lbl)
train_img.extend(food_img[:len(bird_img)*7/10]) 
train_lbl.extend(food_lbl[:len(bird_lbl)*7/10])
test_img.extend(food_img[len(bird_img)*7/10:]) 
test_lbl.extend(food_lbl[len(bird_lbl)*7/10:])  
test_img_rep = food_img_rep[len(bird_img)*7/10:]
print len(train_lbl)

700
1400
2100


Finally we define a input data structure which is acceptable by mxnet. The field data is used for the input data, which is a list of NDArrays. 

In [57]:
from collections import namedtuple
Batch = namedtuple('Batch', ['data'])

Extract Features

Sometime we want the internal outputs from a neural network rather than then final predicted probabilities. In this way, the neural network works as a feature extraction module to other applications.

A loaded symbol in default only returns the last layer as output. But we can get all internal layers by get_internals, which returns a new symbol outputting all internal layers. The following codes print the last 10 layer names.

We can also use mx.viz.plot_network(sym) to visually find the name of the layer we want to use. The name conventions of the output is the layer name with _output as the postfix.


In [13]:
all_layers = sym.get_internals()
all_layers.list_outputs()[-10:-1]

['bn1_moving_var',
 'bn1_output',
 'relu1_output',
 'pool1_output',
 'flatten0_output',
 'fc1_weight',
 'fc1_bias',
 'fc1_output',
 'softmax_label']

Often we want to use the output before the last fully connected layers, which may return semantic features of the raw images but not too fitting to the label yet. In the ResNet case, it is the flatten layer with name flatten0 before the last fullc layer. The following codes get the new symbol sym3 which use the flatten layer as the last output layer, and initialize a new module.

In [14]:
all_layers = sym.get_internals()
sym3 = all_layers['flatten0_output']
mod3 = mx.mod.Module(symbol=sym3, context=mx.cpu())
mod3.bind(for_training=False, data_shapes=[('data', (1,3,224,224))])
mod3.set_params(arg_params, aux_params)


In [15]:
length = len(train_img)
featuremap=[]
for i in range(0,length):
    img = train_img[i]
    mod3.forward(Batch([mx.nd.array(img)]))
    out = mod3.get_outputs()[0].asnumpy()
    featuremap.append(out.flatten())
    print i
train_images = np.array(featuremap)
print(train_images.shape)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

KeyboardInterrupt: 

In [309]:
from sklearn import svm
from sklearn.neural_network import MLPClassifier

print train_images.shape
print len(train_lbl)
mlp = MLPClassifier(hidden_layer_sizes=(10),solver='adam',learning_rate_init=0.01,max_iter=500)

mlp.fit(train_images, train_lbl)


(2100, 2048)
2100


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=10, learning_rate='constant',
       learning_rate_init=0.01, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

Testing and calculating accuracy of the classifier

In [310]:
from sklearn.metrics import accuracy_score
predict_lbl = []
print len(test_lbl)
length = len(test_img)
for i in range(0,length):
    img = test_img[i]
    mod3.forward(Batch([mx.nd.array(img)]))
    out = mod3.get_outputs()[0].asnumpy()
    predict_lbl.append(mlp.predict([out.flatten()])[0])
    print(predict_lbl[i],test_lbl[i])
    
accuracy_score(test_lbl, predict_lbl)


900
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('food', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 'flower')
('flower', 

0.98777777777777775