In [4]:
import os
import pickle
import json
import numpy as np
from keras.layers import Input,Dense
from keras.models import Model
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input
from keras.applications.inception_v3 import decode_predictions

from keras.layers import AveragePooling2D
from keras.preprocessing import image
from keras.models import Model
import tensorflow as tf
sess = tf.Session()
from keras import backend as K
K.set_session(sess)
from keras.layers.normalization import BatchNormalization

In [6]:
num_labels = 228 + 1

In [13]:
#getting annotations from file
with open('train.json') as train_json:
    train_data = json.load(train_json)
label_mapping = {}
for obj in train_data['annotations']:
    label_mapping[obj['imageId']] = obj['labelId']

In [14]:
#getting annotations from validation file
with open('validation.json') as val_json:
    train_data = json.load(val_json)
validation_label_mapping = {}
for obj in train_data['annotations']:
    validation_label_mapping[obj['imageId']] = obj['labelId']

In [15]:
#getting annotations from file
with open('test.json') as test_json:
    test_data = json.load(test_json)
len(test_data['images'])

39706

In [7]:
base_model = InceptionV3(
    input_shape=(299,299,3),
    weights='imagenet',
    include_top=True
)

model = Model(
    inputs=base_model.input,
    outputs=base_model.get_layer('avg_pool').output
)

In [17]:
def extract_image(image_path,model):
        img = image.load_img(image_path, target_size=(299, 299))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        
        return model.predict(x)

In [8]:
image_input = Input(shape=(2048,))
middle = Dense(2048,activation='relu')(image_input)
middle = BatchNormalization()(image_input)
middle = Dense(2048,activation='relu')(image_input)
middle = BatchNormalization()(middle)
output = Dense(num_labels,activation='sigmoid')(middle)
model_dense = Model(image_input, output)
model_dense.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

In [48]:
def extract_features_test(folder):
    if os.path.isfile(folder+"_data.pckl"):
        with open(folder+"_data.pckl",'rb') as write_file:
            image_features,img_ids = pickle.load(write_file)
    else:
        with open(folder+"_data.pckl",'wb') as output:
            with open('test.json') as test_json:
                test_data = json.load(test_json)
            num_test_images = len(test_data['images'])

            image_features = []
            img_ids = []
            for i in range(num_test_images):
                print(i)
                try:
                    features = extract_image('data/test_images/'+str(i+1)+".jpg",model)
                except:
                    features = np.random.randn(2048)
                features = np.array(features).reshape(1,-1)
                image_features.append(features)
                img_ids.append(i+1)
            pickle.dump((image_features,img_ids),output)
    return image_features,img_ids

In [20]:
def extract_features(folder,validation_label_mapping):
    if os.path.isfile(folder+"_data.pckl"):
        with open(folder+"_data.pckl",'rb') as write_file:
            validation_features,validation_labels = pickle.load(write_file)
    else:
        with open(folder+"_data.pckl",'wb') as output:
            validation_files = os.listdir('data/'+folder+'_images/')
            num_validation_files = len(validation_files)
            batch_sz = 100
            num_batches = int(num_validation_files/batch_sz)

            print("Number of batches",num_batches)
            validation_features = []
            validation_labels = []
            for idx,file in enumerate(validation_files):
                print(idx)
                image_feature = extract_image('data/'+folder+'_images/'+file,model)
                validation_features.append(image_feature)
                labels = np.zeros(num_labels)
                for label in validation_label_mapping[file.split(".")[0]]:
                    labels[int(label)] = 1
                validation_labels.append(np.array(labels))
            pickle.dump((validation_features,validation_labels),output)
    return validation_features,validation_labels

In [26]:
validation_features,validation_labels = extract_features('validation',validation_label_mapping)

In [25]:
train_features,train_labels = extract_features('train',label_mapping)

In [51]:
test_features,test_ids = extract_features_test('test')

In [41]:
train_files = os.listdir('data/train_images/')
num_train_files = len(train_files)
batch_sz = 100
num_batches = int(num_train_files/batch_sz)

print("Number of batches",num_batches)
for i in range(num_batches):
    print("Batch number",i)
    batch_features = np.squeeze(np.array(train_features),axis=1)[i*batch_sz:i*batch_sz+batch_sz,:]
    batch_labels = np.array(train_labels)[i*batch_sz:i*batch_sz+batch_sz,:]
    model_dense.fit(batch_features,batch_labels,verbose=0)
    if i%10==0:
        print(model_dense.evaluate(np.squeeze(np.array(validation_features),axis=1),np.array(validation_labels)))

Number of batches 640
Batch number 0
[0.09574040513068989, 0.9710374766676741]
Batch number 1
Batch number 2
Batch number 3
Batch number 4
Batch number 5
Batch number 6
Batch number 7
Batch number 8
Batch number 9
Batch number 10
[0.10290178083386989, 0.9700703097013701]
Batch number 11
Batch number 12
Batch number 13
Batch number 14
Batch number 15
Batch number 16
Batch number 17
Batch number 18
Batch number 19
Batch number 20
[0.09719762581956619, 0.9702710664981758]
Batch number 21
Batch number 22
Batch number 23
Batch number 24
Batch number 25
Batch number 26
Batch number 27
Batch number 28
Batch number 29
Batch number 30
[0.0957012731569505, 0.9705759508990153]
Batch number 31
Batch number 32
Batch number 33
Batch number 34
Batch number 35
Batch number 36
Batch number 37
Batch number 38
Batch number 39
Batch number 40
[0.09677194732436029, 0.970995560533846]
Batch number 41
Batch number 42
Batch number 43
Batch number 44
Batch number 45
Batch number 46
Batch number 47
Batch number

Batch number 306
Batch number 307
Batch number 308
Batch number 309
Batch number 310
[0.0948688047667507, 0.9710502730326976]
Batch number 311
Batch number 312
Batch number 313
Batch number 314
Batch number 315
Batch number 316
Batch number 317
Batch number 318
Batch number 319
Batch number 320
[0.09322810295968871, 0.9712042612500464]
Batch number 321
Batch number 322
Batch number 323
Batch number 324
Batch number 325
Batch number 326
Batch number 327
Batch number 328
Batch number 329
Batch number 330
[0.09429675698238177, 0.9712885329973171]
Batch number 331
Batch number 332
Batch number 333
Batch number 334
Batch number 335
Batch number 336
Batch number 337
Batch number 338
Batch number 339
Batch number 340
[0.0944851774470522, 0.9709898225945801]
Batch number 341
Batch number 342
Batch number 343
Batch number 344
Batch number 345
Batch number 346
Batch number 347
Batch number 348
Batch number 349
Batch number 350
[0.09452949672925348, 0.9707449441294821]
Batch number 351
Batch numb

In [42]:
outputs = []
for test_image in test_features:
    output = model_dense.predict(test_image)
    #print(output)
    output[output>=0.5] = 1
    output[output<0.5] = 0    
    for i in range(output.shape[0]):
        arr = np.where(output[i]==1)[0]
        outputs.append(" ".join(map(str, arr.tolist())))    

In [43]:
import pandas as pd

In [44]:
df = pd.DataFrame()

In [45]:
df['image_id'] = test_ids
df['label_id'] = outputs

In [46]:
df.to_csv("predictions.csv",index=False)

In [40]:
train_files = os.listdir('data/train_images/')
len(train_files)

64097