In [1]:
import bson
import numpy as np
import pandas as pd
import os
import io
from scipy.misc import imread, imresize
from keras.models import Model, load_model
from keras.applications.xception import Xception, preprocess_input

Using TensorFlow backend.


In [None]:
from keras_exp.multigpu._multigpu import get_available_gpus
from keras_exp.multigpu._multigpu import make_parallel
gdev_list = get_available_gpus()

In [2]:
from keras.layers import merge
from keras.layers.core import Lambda
from keras.models import Model

import tensorflow as tf

def make_parallel(model, gpu_count):
    def get_slice(data, idx, parts):
        shape = tf.shape(data)
        size = tf.concat([ shape[:1] // parts, shape[1:] ],axis=0)
        stride = tf.concat([ shape[:1] // parts, shape[1:]*0 ],axis=0)
        start = stride * idx
        return tf.slice(data, start, size)

    outputs_all = []
    for i in range(len(model.outputs)):
        outputs_all.append([])

    #Place a copy of the model on each GPU, each getting a slice of the batch
    for i in range(gpu_count):
        with tf.device('/gpu:%d' % i):
            with tf.name_scope('tower_%d' % i) as scope:

                inputs = []
                #Slice each input into a piece for processing on this GPU
                for x in model.inputs:
                    input_shape = tuple(x.get_shape().as_list())[1:]
                    slice_n = Lambda(get_slice, output_shape=input_shape, arguments={'idx':i,'parts':gpu_count})(x)
                    inputs.append(slice_n)                
                outputs = model(inputs)
                
                if not isinstance(outputs, list):
                    outputs = [outputs]
                
                #Save all the outputs for merging back together later
                for l in range(len(outputs)):
                    outputs_all[l].append(outputs[l])

    # merge outputs on CPU
    with tf.device('/cpu:0'):
        merged = []
        for outputs in outputs_all:
            merged.append(merge(outputs, mode='concat', concat_axis=0))
            
        new_model = Model(input=model.inputs, output=merged)
        ## to save initial model
        funcType = type(model.save)
        # monkeypatch the save to save just the underlying model
        def new_save(self_,filepath, overwrite=True):
            model.save(filepath, overwrite)
        new_model.save=funcType(new_save, new_model)
        return new_model

In [3]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

num_classes = 1054
def add_new_last_layer(base_model, nb_classes):
    #add new layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x) #new global pooling layer layer
    #x = Dense(1024, activation='relu')(x)
    #x = Dropout(0.3)(x)
    #x = Dense(512, activation='relu')(x)
    #x = Dropout(0.3)(x)
    predictions = Dense(num_classes, activation='softmax')(x) #new softmax layer
    model = Model(input=base_model.input, output=predictions)
    return model

In [4]:
from keras.models import Model
from keras.optimizers import RMSprop

base_model = Xception(weights='imagenet', include_top=False, input_shape = (80, 80, 3)) 
model = add_new_last_layer(base_model, num_classes)
rmsprop = RMSprop(lr=0.0005, rho=0.9, epsilon=1e-08, decay=0.0)
model = make_parallel(model, 2)
model.compile(optimizer=rmsprop, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

  from ipykernel import kernelapp as app


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 80, 80, 3)     0                                            
____________________________________________________________________________________________________
lambda_1 (Lambda)                (None, 80, 80, 3)     0           input_1[0][0]                    
____________________________________________________________________________________________________
lambda_2 (Lambda)                (None, 80, 80, 3)     0           input_1[0][0]                    
____________________________________________________________________________________________________
model_1 (Model)                  (None, 1054)          23021126    lambda_1[0][0]                   
                                                                   lambda_2[0][0]          

  name=name)


In [3]:
from keras.layers import merge
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
batch_size = 512
num_classes = 1054
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = 2, padding = 'same', activation = 'relu', input_shape = (80, 80, 3)))
model.add(MaxPooling2D(pool_size = 2))
model.add(Conv2D(filters = 64, kernel_size = 2, padding = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Conv2D(filters = 128, kernel_size = 2, padding = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Conv2D(filters = 256, kernel_size = 2, padding = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Conv2D(filters = 512, kernel_size = 2, padding = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(GlobalAveragePooling2D(data_format=None))
model.add(Dense(num_classes, activation = 'softmax'))

#model = make_parallel(model, 2)

model.summary()
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 80, 80, 32)        416       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 40, 40, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 40, 40, 64)        8256      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 20, 20, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 20, 20, 128)       32896     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 10, 10, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 10, 10, 256)       131328    
__________

In [4]:
model.load_weights('.models_checkpoints/mine_weights_1.hdf5')

In [3]:
model1 = load_model('models/xception_small_single.h5', custom_objects={'tf':tf})



In [4]:
model2 = load_model('models/xception_small_additional.h5', custom_objects={'tf':tf})



## Pre-test single and additional models combined on train data

In [5]:
def get_image(pic):
    x = imread(io.BytesIO(pic['picture']))
    x = imresize(x, (80, 80))
    x = np.array(x, np.float32)
    x = np.expand_dims(x, axis=0)
    img = preprocess_input(x)
    return img

In [None]:
from glob import glob
def indices():
    folder = './data/files/train_small'
    categories = [item[25:-1] for item in sorted(glob("./data/files/train_small/*/"))]
    indices2class = dict(zip(range(len(categories)), categories))
    return indices2class

In [9]:
from keras.preprocessing import image
from tqdm import tqdm 

with open('./data/train.bson', 'rb') as fbson:
    data = bson.decode_file_iter(fbson)
    batch = np.empty((0,80,80,3))
    ids = []
    results = []
    results_1 = []
    results_2 = []
    categories = []
    #input_data = pd.DataFrame({'_id' : [], 'img' : []}, dtype = 'int32')
    j = 0
    indices2class = indices()
    categories_small = os.listdir('./data/files/train_small')
    for c, d in enumerate(data):
        if c < 20000:
            continue
        _id = d['_id']
        category = d['category_id']
        pics = d['imgs']
        if str(category) in categories_small:
            ids.append(_id)
            categories.append(category)
            for e, pic in enumerate(d['imgs']):
                if e == 0:
                    img_0 = get_image(pic)
                else:
                    batch = np.append(batch, get_image(pic), axis=0)
            #predicting on model 2 - additional images
            batch_size = batch.shape[0]
            if batch_size != 0:
                probabilities_2 = model2.predict(batch)
                probabilities_sum_2 = np.sum(probabilities_2, axis = 0)/batch_size
                prediction_2 = np.argmax(probabilities_sum_2)
                predict_2 = int(indices2class[prediction_2])
                results_2.append(predict_2)
                batch = np.empty((0,80,80,3))
            else:
                results_2.append(0)
                
            #predicting on model 1 - single image
            probabilities_1 = model1.predict(img_0)
            prediction_1 = np.argmax(probabilities_1)
            predict_1 = int(indices2class[prediction_1])
            results_1.append(predict_1)
            
            #predicting on combined model 1&2            
            if batch_size != 0:
                probabilities = probabilities_1 + probabilities_sum_2
            else:
                probabilities = probabilities_1
                
            prediction = np.argmax(probabilities)
            predict = int(indices2class[prediction])
            results.append(predict)
                
            j+= 1
            if j > 5000:
                result = pd.DataFrame(np.column_stack((ids, results_1, results_2, results, categories)), dtype = 'int32')
                result.columns = ['_id', 'predict_1', 'predict_2', 'predict', 'category_id']
                #result.to_csv("result.csv", index = False)
                #print("saved")
                #print(result)
                print("done")
                j = 0

done


KeyboardInterrupt: 

In [10]:
result.loc[result['predict'] == result['category_id']].count()

_id            4350
predict_1      4350
predict_2      4350
predict        4350
category_id    4350
dtype: int64

In [11]:
result.loc[result['predict_1'] == result['category_id']].count()

_id            4276
predict_1      4276
predict_2      4276
predict        4276
category_id    4276
dtype: int64

In [12]:
result.loc[result['predict_2'] == result['category_id']].count()

_id            938
predict_1      938
predict_2      938
predict        938
category_id    938
dtype: int64

## Test the whole model

In [9]:
from keras.preprocessing import image
from tqdm import tqdm 
import time

with open('./data/test.bson', 'rb') as fbson:
    data = bson.decode_file_iter(fbson)
    batch = np.empty((0,80,80,3))
    ids = []
    results = []
    input_data = pd.DataFrame({'_id' : [], 'img' : []}, dtype = 'int32')
    j = 0
    indices2class = indices()
    for c, d in tqdm(enumerate(data)):
        #start_time = time.time()
        _id = d['_id']
        i = 0  
        for e, pic in enumerate(d['imgs']):
            x = imread(io.BytesIO(pic['picture']))
            x = imresize(x, (80, 80))
            x = np.array(x, np.float32)
            x = np.expand_dims(x, axis=0)
            img = preprocess_input(x)
            batch = np.append(batch, img, axis=0)
            i += 1
        probabilities = model3.predict(batch)
        probabilities_sum = np.sum(probabilities, axis = 0)  
        prediction = np.argmax(probabilities_sum)
        predict = int(indices2class[prediction])
        ids.append(_id)
        results.append(predict)
        second_time = time.time()
        batch = np.empty((0,80,80,3))
        j+= 1
        finish_time = time.time()
    result = pd.DataFrame(np.column_stack((ids, results)), dtype = 'int32')
    result.columns = ['_id', 'category_id']
    result.to_csv("result.csv", index = False)
    print("saved")
    j = 0

0it [00:00, ?it/s]


NameError: name 'model3' is not defined