In [4]:
import os
import pickle
import json
import numpy as np
from keras.layers import Input,Dense
from keras.models import Model
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.applications.resnet50 import decode_predictions

from keras.layers import AveragePooling2D
from keras.preprocessing import image
from keras.models import Model
import tensorflow as tf
sess = tf.Session()
from keras import backend as K
K.set_session(sess)
from keras.layers.normalization import BatchNormalization
from sklearn.metrics import f1_score

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [5]:
num_labels = 228 + 1

In [6]:
#getting annotations from file
with open('train.json') as train_json:
    train_data = json.load(train_json)
label_mapping = {}
for obj in train_data['annotations']:
    label_mapping[obj['imageId']] = obj['labelId']

In [4]:
#getting annotations from validation file
with open('validation.json') as val_json:
    validation_data = json.load(val_json)
validation_label_mapping = {}
for obj in train_data['annotations']:
    validation_label_mapping[obj['imageId']] = obj['labelId']

In [5]:
#getting annotations from file
with open('test.json') as test_json:
    test_data = json.load(test_json)
len(test_data['images'])

39706

In [6]:
base_model = ResNet50(
    input_shape=(224,224,3),
    weights='imagenet',
    include_top=True
)

model = Model(
    inputs=base_model.input,
    outputs=base_model.get_layer('avg_pool').output
)

In [7]:
def extract_image(image_path,model):
        img = image.load_img(image_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        
        return model.predict(x)

In [8]:
image_input = Input(shape=(2048,))
middle = Dense(2048,activation='relu')(image_input)
middle = BatchNormalization()(image_input)
middle = Dense(2048,activation='relu')(image_input)
middle = BatchNormalization()(middle)
output = Dense(num_labels,activation='sigmoid')(middle)
model_dense = Model(image_input, output)
model_dense.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

In [9]:
def extract_features_test(folder):
    if os.path.isfile(folder+"_data_resnet50.pckl"):
        with open(folder+"_data_resnet50.pckl",'rb') as write_file:
            image_features,img_ids = pickle.load(write_file)
    else:
        with open(folder+"_data_resnet50.pckl",'wb') as output:
            with open('test.json') as test_json:
                test_data = json.load(test_json)
            num_test_images = len(test_data['images'])

            image_features = []
            img_ids = []
            for i in range(num_test_images):
                print(i)
                try:
                    features = extract_image('data/test_images/'+str(i+1)+".jpg",model)
                except:
                    features = np.random.randn(2048)
                features = np.array(features).reshape(1,-1)
                image_features.append(features)
                img_ids.append(i+1)
            pickle.dump((image_features,img_ids),output)
    return image_features,img_ids

In [10]:
def extract_features(folder,validation_label_mapping):
    if os.path.isfile(folder+"_data_resnet50.pckl"):
        with open(folder+"_data_resnet50.pckl",'rb') as write_file:
            validation_features,validation_labels = pickle.load(write_file)
    else:
        with open(folder+"_data_resnet50.pckl",'wb') as output:
            validation_files = os.listdir('data/'+folder+'_images/')
            num_validation_files = len(validation_files)
            batch_sz = 100
            num_batches = int(num_validation_files/batch_sz)

            print("Number of batches",num_batches)
            validation_features = []
            validation_labels = []
            for idx,file in enumerate(validation_files):
                print(idx)
                image_feature = extract_image('data/'+folder+'_images/'+file,model)
                validation_features.append(image_feature)
                labels = np.zeros(num_labels)
                for label in validation_label_mapping[file.split(".")[0]]:
                    labels[int(label)] = 1
                validation_labels.append(np.array(labels))
            pickle.dump((validation_features,validation_labels),output)
    return validation_features,validation_labels

In [11]:
validation_features,validation_labels = extract_features('validation',validation_label_mapping)

In [12]:
train_features,train_labels = extract_features('train',label_mapping)

In [13]:
test_features,test_ids = extract_features_test('test')

In [14]:
train_features = np.squeeze(np.squeeze(np.squeeze(np.array(train_features),axis=1),axis=1),axis=1)
test_features = np.squeeze(np.array(test_features),axis=1)
validation_features = np.squeeze(np.squeeze(np.squeeze(np.array(validation_features),axis=1),axis=1),axis=1)

In [18]:
np.array(validation_labels).shape

(9897, 229)

In [20]:
train_files = os.listdir('data/train_images/')
num_train_files = len(train_files)
batch_sz = 100
num_batches = int(num_train_files/batch_sz)

print("Number of batches",num_batches)
for i in range(num_batches):
    print("Batch number",i)
    batch_features = train_features[i*batch_sz:i*batch_sz+batch_sz,:]
    batch_labels = np.array(train_labels)[i*batch_sz:i*batch_sz+batch_sz,:]
    model_dense.fit(batch_features,batch_labels,verbose=0)
    if i%10==0:
        predicted = model_dense.predict(validation_features)
        predicted[predicted>=0.5] = 1
        predicted[predicted<0.5] = 0
        print(f1_score(np.array(validation_labels),predicted,average='micro'))
        print(model_dense.evaluate(validation_features,np.array(validation_labels)))

Number of batches 640
Batch number 0
0.08375144050359042
[0.8927846016264256, 0.4962392056290641]
Batch number 1
Batch number 2
Batch number 3
Batch number 4
Batch number 5
Batch number 6
Batch number 7
Batch number 8
Batch number 9
Batch number 10
0.2124291679999352
[0.3127943170369364, 0.9142076089380485]
Batch number 11
Batch number 12
Batch number 13
Batch number 14
Batch number 15
Batch number 16
Batch number 17
Batch number 18
Batch number 19
Batch number 20
0.35467074480674354
[0.11464472069645261, 0.9687206006066973]
Batch number 21
Batch number 22
Batch number 23
Batch number 24
Batch number 25
Batch number 26
Batch number 27
Batch number 28
Batch number 29
Batch number 30
0.3576884301898399
[0.11437093693116729, 0.9668286245339228]
Batch number 31
Batch number 32
Batch number 33
Batch number 34
Batch number 35
Batch number 36
Batch number 37
Batch number 38
Batch number 39
Batch number 40
0.3494003426613364
[0.10950602328456917, 0.9698413147810216]
Batch number 41
Batch numbe

Batch number 284
Batch number 285
Batch number 286
Batch number 287
Batch number 288
Batch number 289
Batch number 290
0.3940689545739045
[0.10191001004946043, 0.969672323784901]
Batch number 291
Batch number 292
Batch number 293
Batch number 294
Batch number 295
Batch number 296
Batch number 297
Batch number 298
Batch number 299
Batch number 300
0.3932766193453739
[0.09906895154185809, 0.9700738426361516]
Batch number 301
Batch number 302
Batch number 303
Batch number 304
Batch number 305
Batch number 306
Batch number 307
Batch number 308
Batch number 309
Batch number 310
0.4095996880455879
[0.0969913278535013, 0.9706059600881245]
Batch number 311
Batch number 312
Batch number 313
Batch number 314
Batch number 315
Batch number 316
Batch number 317
Batch number 318
Batch number 319
Batch number 320
0.4170058875941229
[0.09774578595096395, 0.9705525714363175]
Batch number 321
Batch number 322
Batch number 323
Batch number 324
Batch number 325
Batch number 326
Batch number 327
Batch numb

0.41081866227993413
[0.09587337495754425, 0.970290478792072]
Batch number 571
Batch number 572
Batch number 573
Batch number 574
Batch number 575
Batch number 576
Batch number 577
Batch number 578
Batch number 579
Batch number 580
0.3948698577158995
[0.09756308623208558, 0.9703134242194921]
Batch number 581
Batch number 582
Batch number 583
Batch number 584
Batch number 585
Batch number 586
Batch number 587
Batch number 588
Batch number 589
Batch number 590
0.4142212189616253
[0.0963537168614826, 0.9702304780296549]
Batch number 591
Batch number 592
Batch number 593
Batch number 594
Batch number 595
Batch number 596
Batch number 597
Batch number 598
Batch number 599
Batch number 600
0.36158181351631324
[0.10220053399709471, 0.9703606372230328]
Batch number 601
Batch number 602
Batch number 603
Batch number 604
Batch number 605
Batch number 606
Batch number 607
Batch number 608
Batch number 609
Batch number 610
0.4144616190725701
[0.09610636543297726, 0.9714019311381383]
Batch number 61

In [None]:
outputs = []
for test_image in test_features:
    output = model_dense.predict(test_image)
    #print(output)
    output[output>=0.5] = 1
    output[output<0.5] = 0    
    for i in range(output.shape[0]):
        arr = np.where(output[i]==1)[0]
        outputs.append(" ".join(map(str, arr.tolist())))    

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame()

In [None]:
df['image_id'] = test_ids
df['label_id'] = outputs

In [None]:
df.to_csv("predictions.csv",index=False)

In [None]:
train_files = os.listdir('data/train_images/')
len(train_files)

In [22]:
validation_predictions = model_dense.predict(validation_features)
test_predictions = model_dense.predict(test_features)
with open("resnet_results.pckl",'wb') as write_file:
    pickle.dump((validation_predictions,validation_labels,test_predictions),write_file)

In [None]:
import pandas as pd
df = pd.DataFrame()
df['image_id'] = test_ids
df['label_id'] = outputs

In [None]:
def extract_features_test(folder):
    if os.path.isfile(folder+"_data_vgg19.pckl"):
        with open(folder+"_data_vgg19.pckl",'rb') as write_file:
            image_features,img_ids = pickle.load(write_file)
    else:
        with open(folder+"_data_vgg19.pckl",'wb') as output:
            with open('test.json') as test_json:
                test_data = json.load(test_json)
            num_test_images = len(test_data['images'])

            image_features = []
            img_ids = []
            for i in range(num_test_images):
                print(i)
                try:
                    features = extract_image('data/test_images/'+str(i+1)+".jpg",model)
                except:
                    features = np.random.randn(4096)
                features = np.array(features).reshape(1,-1)
                image_features.append(features)
                img_ids.append(i+1)
            pickle.dump((image_features,img_ids),output)
    return image_features,img_ids
test_features,test_ids = extract_features_test('test')