In [2]:
import cv2
import pandas as pd
import numpy as np
import os
from tqdm import tqdm, tqdm_notebook

train_df = pd.read_csv('train.csv')
img_size = 256
batch_size = 16

In [3]:
pet_ids = train_df['PetID'].values
n_batches = len(pet_ids) // batch_size + 1

In [4]:
from keras.applications.densenet import preprocess_input, DenseNet121


Using TensorFlow backend.


In [6]:
def resize_to_square(im):
    old_size = im.shape[:2] # old_size is in (height, width) format
    ratio = float(img_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    # new_size should be in (width, height) format
    im = cv2.resize(im, (new_size[1], new_size[0]))
    delta_w = img_size - new_size[1]
    delta_h = img_size - new_size[0]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)
    color = [0, 0, 0]
    new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT,value=color)
    return new_im

def load_image(path, pet_id):
    image = cv2.imread(f'{path}{pet_id}-1.jpg')
    new_image = resize_to_square(image)
    new_image = preprocess_input(new_image)
    return new_image

In [7]:
from keras.models import Model
from keras.layers import GlobalAveragePooling2D, Input, Lambda, AveragePooling1D
import keras.backend as K
inp = Input((256,256,3))
backbone = DenseNet121(input_tensor = inp, include_top = False)
x = backbone.output
x = GlobalAveragePooling2D()(x)
x = Lambda(lambda x: K.expand_dims(x,axis = -1))(x)
x = AveragePooling1D(4)(x)
out = Lambda(lambda x: x[:,:,0])(x)

m = Model(inp,out)

Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/keras-team/keras-applications/releases/download/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5


In [12]:
features = {}
for b in tqdm_notebook(range(n_batches)):
    start = b*batch_size
    end = (b+1)*batch_size
    batch_pets = pet_ids[start:end]
    batch_images = np.zeros((len(batch_pets),img_size,img_size,3))
    for i,pet_id in enumerate(batch_pets):
        try:
            batch_images[i] = load_image("train_images/", pet_id)
        except:
            pass
    batch_preds = m.predict(batch_images)
    for i,pet_id in enumerate(batch_pets):
        features[pet_id] = batch_preds[i]

HBox(children=(IntProgress(value=0, max=938), HTML(value='')))




In [13]:
train_feats = pd.DataFrame.from_dict(features, orient='index')

In [14]:
train_feats.to_csv('train_img_features.csv')
train_feats.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
86e1089a3,0.001913,0.200123,0.034662,0.011279,0.22124,0.002751,0.001853,0.004002,0.114432,0.029524,...,0.017692,0.004894,0.045289,0.03007,0.013002,0.045237,0.051465,0.071165,0.063994,0.055685
6296e909a,0.001913,0.200123,0.034662,0.011279,0.22124,0.002751,0.001853,0.004002,0.114432,0.029524,...,0.017692,0.004894,0.045289,0.03007,0.013002,0.045237,0.051465,0.071165,0.063994,0.055685
3422e4906,0.001913,0.200123,0.034662,0.011279,0.22124,0.002751,0.001853,0.004002,0.114432,0.029524,...,0.017692,0.004894,0.045289,0.03007,0.013002,0.045237,0.051465,0.071165,0.063994,0.055685
5842f1ff5,0.001913,0.200123,0.034662,0.011279,0.22124,0.002751,0.001853,0.004002,0.114432,0.029524,...,0.017692,0.004894,0.045289,0.03007,0.013002,0.045237,0.051465,0.071165,0.063994,0.055685
850a43f90,0.001913,0.200123,0.034662,0.011279,0.22124,0.002751,0.001853,0.004002,0.114432,0.029524,...,0.017692,0.004894,0.045289,0.03007,0.013002,0.045237,0.051465,0.071165,0.063994,0.055685


In [17]:
load_image("train_images/", pet_id)

AttributeError: 'NoneType' object has no attribute 'shape'