## Using pre-trained NN

In [38]:
import numpy as np
import theano
import theano.tensor as T
import lasagne
import pickle
import os
import matplotlib.pyplot as plt
%matplotlib inline
import scipy
from scipy.misc import imread, imsave, imresize
from lasagne.utils import floatX
import pandas as pd

In [2]:
from lasagne.layers import InputLayer
from lasagne.layers import DenseLayer
from lasagne.layers import NonlinearityLayer
from lasagne.layers import DropoutLayer
from lasagne.layers import Pool2DLayer as PoolLayer
from lasagne.layers import Conv2DLayer as ConvLayer
from lasagne.nonlinearities import softmax


def build_model():
    net = {}
    net['input'] = InputLayer((None, 3, 224, 224))
    net['conv1_1'] = ConvLayer(
        net['input'], 64, 3, pad=1, flip_filters=False)
    net['conv1_2'] = ConvLayer(
        net['conv1_1'], 64, 3, pad=1, flip_filters=False)
    net['pool1'] = PoolLayer(net['conv1_2'], 2)
    net['conv2_1'] = ConvLayer(
        net['pool1'], 128, 3, pad=1, flip_filters=False)
    net['conv2_2'] = ConvLayer(
        net['conv2_1'], 128, 3, pad=1, flip_filters=False)
    net['pool2'] = PoolLayer(net['conv2_2'], 2)
    net['conv3_1'] = ConvLayer(
        net['pool2'], 256, 3, pad=1, flip_filters=False)
    net['conv3_2'] = ConvLayer(
        net['conv3_1'], 256, 3, pad=1, flip_filters=False)
    net['conv3_3'] = ConvLayer(
        net['conv3_2'], 256, 3, pad=1, flip_filters=False)
    net['pool3'] = PoolLayer(net['conv3_3'], 2)
    net['conv4_1'] = ConvLayer(
        net['pool3'], 512, 3, pad=1, flip_filters=False)
    net['conv4_2'] = ConvLayer(
        net['conv4_1'], 512, 3, pad=1, flip_filters=False)
    net['conv4_3'] = ConvLayer(
        net['conv4_2'], 512, 3, pad=1, flip_filters=False)
    net['pool4'] = PoolLayer(net['conv4_3'], 2)
    net['conv5_1'] = ConvLayer(
        net['pool4'], 512, 3, pad=1, flip_filters=False)
    net['conv5_2'] = ConvLayer(
        net['conv5_1'], 512, 3, pad=1, flip_filters=False)
    net['conv5_3'] = ConvLayer(
        net['conv5_2'], 512, 3, pad=1, flip_filters=False)
    net['pool5'] = PoolLayer(net['conv5_3'], 2)
    net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
    net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5)
    net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096)
    net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5)
    net['fc8'] = DenseLayer(
        net['fc7_dropout'], num_units=1000, nonlinearity=None)
    net['prob'] = NonlinearityLayer(net['fc8'], softmax)

    return net

In [3]:
#classes' names are stored here
classes = pickle.load(open('classes.pkl','rb'))
#for example, 10th class is ostrich:
print(classes[9])

ostrich, Struthio camelus


In [4]:
MEAN_VALUES = np.array([104, 117, 123])
IMAGE_W = 224

def preprocess(img):
    tmp_img = img.copy().astype(np.float32)
    tmp_img = np.flip(tmp_img, axis=2)
    
    for i in range(3):
         tmp_img[:,:, i] -= MEAN_VALUES[i]
    
    #convert from [w,h,3 to 1,3,w,h]
    tmp_img = np.transpose(tmp_img, (2, 0, 1))[None]
    return tmp_img

0.0


### Deploy the network

In [6]:
net = build_model()

In [7]:
import pickle
with open('vgg16.pkl','rb') as f:
    weights = pickle.load(f, encoding='latin1')

lasagne.layers.set_all_param_values(net['prob'], weights['param values'])
#all_weights=lasagne.layers.get_all_param_values(net['prob'])

## Images to features
This part creates 25 .csv files in 'features/' directory. Each of 25 files has 1000 rows coresponding to 1000 images. Each row has 4096 (+2) elements coresponding to features of image obtained from 'fc6' DenseLayer, first column is labels: True, if cat and False if dog.

As our problem is similar to one which trained NN, that let us to use dense layer. I think, if we have different problem (e.g. different sizes of images, or some very different dataset), we couldn't use dense layers.

In [72]:
input_image = T.tensor4('input')
output = lasagne.layers.get_output(net['fc6'], input_image,deterministic=True)
prob = theano.function([input_image], output) 
from tqdm import tqdm
from scipy.misc import imresize
import os


file = open('features/{}.csv'.format(i), 'w')
#extract features from images
data = []
Y = []
i=0
j=0

    #this may be a tedious process. If so, store the results in some pickle and re-use them.
    
for fname in tqdm(os.listdir('train/')):
    i+=1
    y = fname.startswith("cat")
    img = plt.imread("train/"+fname)

    img = preprocess(imresize(img,(IMAGE_W,IMAGE_W)))
    feature=prob(img)
    data.append([y]+feature[0].tolist())
    if i==1000:
        print(j)
        pd.DataFrame(data).to_csv('features/{}.csv'.format(j))
        data=[]
        j+=1
        i=0
    



  0%|          | 0/25000 [00:00<?, ?it/s][A
  0%|          | 1/25000 [00:00<2:41:32,  2.58it/s][A
  0%|          | 2/25000 [00:00<2:40:05,  2.60it/s][A
  0%|          | 3/25000 [00:01<2:39:50,  2.61it/s][A
  0%|          | 4/25000 [00:01<2:39:51,  2.61it/s][A
  0%|          | 5/25000 [00:01<2:39:03,  2.62it/s][A
  0%|          | 6/25000 [00:02<2:38:42,  2.62it/s][A
  0%|          | 7/25000 [00:02<2:37:33,  2.64it/s][A
  0%|          | 8/25000 [00:03<2:44:11,  2.54it/s][A
  0%|          | 9/25000 [00:03<2:41:53,  2.57it/s][A
  0%|          | 10/25000 [00:03<2:40:21,  2.60it/s][A
  0%|          | 11/25000 [00:04<2:39:14,  2.62it/s][A
  0%|          | 12/25000 [00:04<2:38:19,  2.63it/s][A
  0%|          | 13/25000 [00:04<2:37:34,  2.64it/s][A
  0%|          | 14/25000 [00:05<2:37:44,  2.64it/s][A
  0%|          | 15/25000 [00:05<2:46:57,  2.49it/s][A
  0%|          | 16/25000 [00:06<2:45:03,  2.52it/s][A
  0%|          | 17/25000 [00:06<2:42:29,  2.56it/s][A
  0%|      

0


  8%|▊         | 1999/25000 [13:24<2:30:56,  2.54it/s]

1


 12%|█▏        | 2999/25000 [20:10<2:22:07,  2.58it/s]

2


 16%|█▌        | 3999/25000 [27:00<2:42:15,  2.16it/s]

3


 20%|█▉        | 4999/25000 [33:42<2:03:46,  2.69it/s]

4


 24%|██▍       | 5999/25000 [40:26<2:00:27,  2.63it/s]

5


 28%|██▊       | 6999/25000 [47:05<1:52:22,  2.67it/s]

6


 32%|███▏      | 7999/25000 [53:47<2:02:15,  2.32it/s]

7


 36%|███▌      | 8999/25000 [1:00:27<1:38:50,  2.70it/s]

8


 40%|███▉      | 9999/25000 [1:07:06<1:33:57,  2.66it/s]

9


 44%|████▍     | 10999/25000 [1:13:44<1:28:43,  2.63it/s]

10


 48%|████▊     | 11999/25000 [1:20:27<1:38:53,  2.19it/s]

11


 52%|█████▏    | 12999/25000 [1:27:05<1:27:18,  2.29it/s]

12


 56%|█████▌    | 13999/25000 [1:33:45<1:10:07,  2.61it/s]

13


 60%|█████▉    | 14999/25000 [1:40:21<1:01:42,  2.70it/s]

14


 64%|██████▍   | 15999/25000 [1:46:57<1:02:07,  2.41it/s]

15


 68%|██████▊   | 16999/25000 [1:53:31<51:08,  2.61it/s]  

16


 72%|███████▏  | 17999/25000 [2:00:06<44:42,  2.61it/s]  

17


 76%|███████▌  | 18999/25000 [2:06:50<52:01,  1.92it/s]  

18


 80%|███████▉  | 19999/25000 [2:13:38<31:53,  2.61it/s]  

19


 84%|████████▍ | 20999/25000 [2:20:17<27:49,  2.40it/s]  

20


 88%|████████▊ | 21999/25000 [2:27:00<19:15,  2.60it/s]  

21


 92%|█████████▏| 22999/25000 [2:33:41<12:40,  2.63it/s]  

22


 96%|█████████▌| 23999/25000 [2:40:21<06:27,  2.58it/s]

23


100%|█████████▉| 24999/25000 [2:47:11<00:00,  1.72it/s]

24


100%|██████████| 25000/25000 [2:47:15<00:00,  1.81s/it]


## Then, please switch to <a href="https://github.com/DanielBaghdasaryan/Session2/blob/master/RF_Dog_Cat.ipynb">RF_Dog_cat.ipynb</a>