Here we download a pre-trained Resnet 50-layer model on Imagenet. 

In [9]:
import os, urllib
def download(url):
    filename = url.split("/")[-1]
    if not os.path.exists(filename):
        urllib.urlretrieve(url, filename)
def get_model(prefix, epoch):
    print prefix+'-%04d.params' % (epoch,)
    download(prefix+'-symbol.json')
    download(prefix+'-%04d.params' % (epoch,))



In [10]:
get_model('http://data.dmlc.ml/mxnet/models/imagenet/caffenet/caffenet', 0)

http://data.dmlc.ml/mxnet/models/imagenet/caffenet/caffenet-0000.params


Initialization

We first load the model into memory with load_checkpoint. It returns the symbol (see symbol.ipynb) definition of the neural network, and parameters.


In [11]:
import mxnet as mx
sym, arg_params, aux_params = mx.model.load_checkpoint('caffenet', 0)

Next we create an executable module on CPU.

In [14]:
mod = mx.mod.Module(symbol=sym, context=mx.cpu())
print sym

<Symbol prob>


The ResNet is trained with RGB images of size 224 x 224. The training data is feed by the variable data. We bind the module with the input shape and specify that it is only for predicting. The number 1 added before the image shape (3x224x224) means that we will only predict one image each time. Next we set the loaded parameters. Now the module is ready to run. 

In [15]:
mod.bind(for_training = False,
         data_shapes=[('data', (1,3,224,224))])
arg_params['prob_label'] = mx.nd.array([0])
mod.set_params(arg_params, aux_params)

Preparing data

In [19]:
%matplotlib inline
import matplotlib
matplotlib.rc("savefig", dpi=100)
import matplotlib.pyplot as plt
import cv2
import os

Next we define a function that reads one image each time and convert to a format can be used by the model. Here we use a naive way that resizes the original image into the desired shape, and change the data layout. 

In [20]:
import numpy as np
import cv2
def get_image(filename):
    img = cv2.imread(filename)  # read image in b,g,r order
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   # change to r,g,b order
    img = cv2.resize(img, (224, 224))  # resize to 224*224 to fit model
    img = np.swapaxes(img, 0, 2)
    img = np.swapaxes(img, 1, 2)  # change to (channel, height, width)
    img = img[np.newaxis, :]  # extend to (example, channel, heigth, width)
    return img

In [21]:
files = os.listdir('flowers')
flower_img=[]
flower_img_rep=[]
flower_lbl=[]
i=0
for file in files:
    
    img = get_image('flowers/'+file)
    if i>100:
        break
    flower_img_rep.append(cv2.imread(file, cv2.COLOR_BGR2RGB))
    flower_img.append(img)
    flower_lbl.append('flower')
    i=i+1

files = os.listdir('birds')
bird_img=[]
bird_img_rep=[]
bird_lbl=[]
i=0
for file in files:
    img = get_image('birds/'+file)
    if i>100:
        break
    bird_img_rep.append(cv2.imread(file, cv2.COLOR_BGR2RGB))
    bird_img.append(img)
    bird_lbl.append('bird')
    i=i+1

files = os.listdir('food')
food_img=[]
food_img_rep=[]
food_lbl=[]
i=0
for file in files:
    img = get_image('food/'+file)
    if i>100:
        break
    food_img_rep.append(cv2.imread(file, cv2.COLOR_BGR2RGB))
    food_img.append(img)
    food_lbl.append('food')
    i=i+1
print len(bird_img)
print len(flower_img)
print len(food_img)

101
101
101


In [22]:
train_img = []
train_lbl = []
test_img = []
test_lbl = []
train_img = flower_img[:len(flower_img)*7/10] 
train_lbl = flower_lbl[:len(flower_lbl)*7/10]
test_img = flower_img[len(flower_img)*7/10:] 
test_lbl = flower_lbl[len(flower_lbl)*7/10:]  
test_img_rep = flower_img_rep[len(flower_img)*7/10:]
print len(train_lbl)
train_img.extend(bird_img[:len(bird_img)*7/10]) 
train_lbl.extend(bird_lbl[:len(bird_lbl)*7/10])
test_img.extend(bird_img[len(bird_img)*7/10:]) 
test_lbl.extend(bird_lbl[len(bird_lbl)*7/10:])  
test_img_rep = bird_img_rep[len(bird_img)*7/10:]
print len(train_lbl)
train_img.extend(food_img[:len(bird_img)*7/10]) 
train_lbl.extend(food_lbl[:len(bird_lbl)*7/10])
test_img.extend(food_img[len(bird_img)*7/10:]) 
test_lbl.extend(food_lbl[len(bird_lbl)*7/10:])  
test_img_rep = food_img_rep[len(bird_img)*7/10:]
print len(train_lbl)

70
140
210


In [23]:
from sklearn.utils import shuffle 

train_img, train_lbl = shuffle(train_img, train_lbl)


Finally we define a input data structure which is acceptable by mxnet. The field data is used for the input data, which is a list of NDArrays. 

In [24]:
from collections import namedtuple
Batch = namedtuple('Batch', ['data'])

Extract Features

Sometime we want the internal outputs from a neural network rather than then final predicted probabilities. In this way, the neural network works as a feature extraction module to other applications.

A loaded symbol in default only returns the last layer as output. But we can get all internal layers by get_internals, which returns a new symbol outputting all internal layers. The following codes print the last 10 layer names.

We can also use mx.viz.plot_network(sym) to visually find the name of the layer we want to use. The name conventions of the output is the layer name with _output as the postfix.


In [25]:
all_layers = sym.get_internals()
all_layers.list_outputs()[-20:-1]

['conv5_bias',
 'conv5_output',
 'relu5_output',
 'pool5_output',
 'flatten_0_output',
 'fc6_weight',
 'fc6_bias',
 'fc6_output',
 'relu6_output',
 'drop6_output',
 'fc7_weight',
 'fc7_bias',
 'fc7_output',
 'relu7_output',
 'drop7_output',
 'fc8_weight',
 'fc8_bias',
 'fc8_output',
 'prob_label']

Often we want to use the output before the last fully connected layers, which may return semantic features of the raw images but not too fitting to the label yet. In the ResNet case, it is the flatten layer with name flatten0 before the last fullc layer. The following codes get the new symbol sym3 which use the flatten layer as the last output layer, and initialize a new module.

In [27]:
all_layers = sym.get_internals()
sym3 = all_layers['flatten_0_output']
mod3 = mx.mod.Module(symbol=sym3, context=mx.cpu())
mod3.bind(for_training=False, data_shapes=[('data', (1,3,224,224))])
mod3.set_params(arg_params, aux_params)


In [28]:
import timeit
length = len(train_img)
featuremap=[]
start_time = timeit.default_timer() 
batch_size = 150
for i in range(0,length):
    img = train_img[i]
    mod3.forward(Batch([mx.nd.array(img)]))
    out = mod3.get_outputs()[0].asnumpy()
    featuremap.append(out.flatten())
elapsed = timeit.default_timer() - start_time
train_images = np.array(featuremap)
print(train_images.shape)
print("Time taken : ")


(210, 9216)
Time taken : 


In [29]:
print elapsed

420.213258982


In [32]:
from sklearn import svm
from sklearn.neural_network import MLPClassifier

print train_images.shape
print len(train_lbl)
mlp = MLPClassifier(hidden_layer_sizes=(10,10),solver='adam',learning_rate_init=0.01,max_iter=500)

mlp.fit(train_images, train_lbl)


(210, 9216)
210


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(10, 10), learning_rate='constant',
       learning_rate_init=0.01, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

Testing and calculating accuracy of the classifier

In [33]:
from sklearn.metrics import accuracy_score
predict_lbl = []
print len(test_lbl)
length = len(test_img)
for i in range(0,length):
    img = test_img[i]
    mod3.forward(Batch([mx.nd.array(img)]))
    out = mod3.get_outputs()[0].asnumpy()
    predict_lbl.append(mlp.predict([out.flatten()])[0])
    print(predict_lbl[i],test_lbl[i])
    
accuracy_score(test_lbl, predict_lbl)


93
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'flower')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')
('food', 'bird')


0.33333333333333331

In [None]:
print len(train_img)