In [1]:
import os
import pickle
import json
import numpy as np
from keras.layers import Input,Dense
from keras.models import Model
from keras.applications.nasnet import NASNetMobile
from keras.applications.nasnet import preprocess_input
from keras.applications.nasnet import decode_predictions

from keras.layers import AveragePooling2D
from keras.preprocessing import image
from keras.models import Model
import tensorflow as tf
sess = tf.Session()
from keras import backend as K
K.set_session(sess)
from keras.layers.normalization import BatchNormalization
from sklearn.metrics import f1_score

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
num_labels = 228 + 1

In [3]:
#getting annotations from file
with open('train.json') as train_json:
    train_data = json.load(train_json)
label_mapping = {}
for obj in train_data['annotations']:
    label_mapping[obj['imageId']] = obj['labelId']

In [4]:
#getting annotations from validation file
with open('validation.json') as val_json:
    train_data = json.load(val_json)
validation_label_mapping = {}
for obj in train_data['annotations']:
    validation_label_mapping[obj['imageId']] = obj['labelId']

In [5]:
#getting annotations from file
with open('test.json') as test_json:
    test_data = json.load(test_json)
len(test_data['images'])

39706

In [6]:
base_model = NASNetMobile(
    input_shape=(224,224,3),
    weights='imagenet',
    include_top=True
)

model = Model(
    inputs=base_model.input,
    outputs=base_model.get_layer(index=749).output
)

In [7]:
def extract_image(image_path,model):
        img = image.load_img(image_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        
        return model.predict(x)

In [8]:
def extract_features_test(folder):
    if os.path.isfile(folder+"_data_nasnet.pckl"):
        with open(folder+"_data_nasnet.pckl",'rb') as write_file:
            image_features,img_ids = pickle.load(write_file)
    else:
        with open(folder+"_data_nasnet.pckl",'wb') as output:
            with open('test.json') as test_json:
                test_data = json.load(test_json)
            num_test_images = len(test_data['images'])

            image_features = []
            img_ids = []
            for i in range(num_test_images):
                print(i)
                try:
                    features = extract_image('data/test_images/'+str(i+1)+".jpg",model)
                except:
                    features = np.random.randn(1056)
                features = np.array(features).reshape(1,-1)
                image_features.append(features)
                img_ids.append(i+1)
            pickle.dump((image_features,img_ids),output)
    return image_features,img_ids

In [9]:
def extract_features(folder,validation_label_mapping):
    if os.path.isfile(folder+"_data_nasnet.pckl"):
        with open(folder+"_data_nasnet.pckl",'rb') as write_file:
            validation_features,validation_labels = pickle.load(write_file)
    else:
        with open(folder+"_data_nasnet.pckl",'wb') as output:
            validation_files = os.listdir('data/'+folder+'_images/')
            num_validation_files = len(validation_files)
            batch_sz = 100
            num_batches = int(num_validation_files/batch_sz)

            print("Number of batches",num_batches)
            validation_features = []
            validation_labels = []
            for idx,file in enumerate(validation_files):
                print(idx)
                image_feature = extract_image('data/'+folder+'_images/'+file,model)
                validation_features.append(image_feature)
                labels = np.zeros(num_labels)
                for label in validation_label_mapping[file.split(".")[0]]:
                    labels[int(label)] = 1
                validation_labels.append(np.array(labels))
            pickle.dump((validation_features,validation_labels),output)
    return validation_features,validation_labels

In [10]:
validation_features,validation_labels = extract_features('validation',validation_label_mapping)

In [11]:
train_features,train_labels = extract_features('train',label_mapping)

In [12]:
test_features,test_ids = extract_features_test('test')

In [13]:
np.array(train_features).shape

(64097, 1, 1056)

In [16]:
image_input = Input(shape=(1056,))
middle = BatchNormalization()(image_input)
middle = Dense(2048,activation='sigmoid')(middle)
middle = BatchNormalization()(image_input)
middle = Dense(2048,activation='sigmoid')(image_input)
middle = BatchNormalization()(middle)
output = Dense(num_labels,activation='sigmoid')(middle)
model_dense = Model(image_input, output)
model_dense.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['acc'])

In [17]:
train_files = os.listdir('data/train_images/')
num_train_files = len(train_files)
batch_sz = 100
num_batches = int(num_train_files/batch_sz)
epochs = 2
print("Number of batches",num_batches)
for j in range(epochs):
    for i in range(num_batches):
        print("Batch number",i)
        batch_features = np.squeeze(np.array(train_features),axis=1)[i*batch_sz:i*batch_sz+batch_sz,:]
        batch_labels = np.array(train_labels)[i*batch_sz:i*batch_sz+batch_sz,:]
        model_dense.fit(batch_features,batch_labels,verbose=0)
        if i%10==0:
            predicted = model_dense.predict(np.squeeze(np.array(validation_features),axis=1))
            predicted[predicted>=0.3] = 1
            predicted[predicted<0.3] = 0
            print(f1_score(np.array(validation_labels),predicted,average='micro'))
            #print(model_dense.evaluat)e(np.squeeze(np.array(validation_features),axis=1),np.array(validation_labels)))

Number of batches 640
Batch number 0
0.06637024332193005
Batch number 1
Batch number 2
Batch number 3
Batch number 4
Batch number 5
Batch number 6
Batch number 7
Batch number 8
Batch number 9
Batch number 10
0.11287821584596298
Batch number 11
Batch number 12
Batch number 13
Batch number 14
Batch number 15
Batch number 16
Batch number 17
Batch number 18
Batch number 19
Batch number 20
0.12104148403155936
Batch number 21
Batch number 22
Batch number 23
Batch number 24
Batch number 25
Batch number 26
Batch number 27
Batch number 28
Batch number 29
Batch number 30
0.1128553547325668
Batch number 31
Batch number 32
Batch number 33
Batch number 34
Batch number 35
Batch number 36
Batch number 37
Batch number 38
Batch number 39
Batch number 40
0.07098794092549023
Batch number 41
Batch number 42
Batch number 43
Batch number 44
Batch number 45
Batch number 46
Batch number 47
Batch number 48
Batch number 49
Batch number 50
0.14220187891106378
Batch number 51
Batch number 52
Batch number 53
Batch

Batch number 437
Batch number 438
Batch number 439
Batch number 440
0.38070803744209875
Batch number 441
Batch number 442
Batch number 443
Batch number 444
Batch number 445
Batch number 446
Batch number 447
Batch number 448
Batch number 449
Batch number 450
0.42803723046563225
Batch number 451
Batch number 452
Batch number 453
Batch number 454
Batch number 455
Batch number 456
Batch number 457
Batch number 458
Batch number 459
Batch number 460
0.4168111659733292
Batch number 461
Batch number 462
Batch number 463
Batch number 464
Batch number 465
Batch number 466
Batch number 467
Batch number 468
Batch number 469
Batch number 470
0.42195472783708077
Batch number 471
Batch number 472
Batch number 473
Batch number 474
Batch number 475
Batch number 476
Batch number 477
Batch number 478
Batch number 479
Batch number 480
0.4345924662423637
Batch number 481
Batch number 482
Batch number 483
Batch number 484
Batch number 485
Batch number 486
Batch number 487
Batch number 488
Batch number 489
B

Batch number 236
Batch number 237
Batch number 238
Batch number 239
Batch number 240
0.43519675678970954
Batch number 241
Batch number 242
Batch number 243
Batch number 244
Batch number 245
Batch number 246
Batch number 247
Batch number 248
Batch number 249
Batch number 250
0.4107229165228199
Batch number 251
Batch number 252
Batch number 253
Batch number 254
Batch number 255
Batch number 256
Batch number 257
Batch number 258
Batch number 259
Batch number 260
0.44064106172144624
Batch number 261
Batch number 262
Batch number 263
Batch number 264
Batch number 265
Batch number 266
Batch number 267
Batch number 268
Batch number 269
Batch number 270
0.4391516975453776
Batch number 271
Batch number 272
Batch number 273
Batch number 274
Batch number 275
Batch number 276
Batch number 277
Batch number 278
Batch number 279
Batch number 280
0.4286460373416895
Batch number 281
Batch number 282
Batch number 283
Batch number 284
Batch number 285
Batch number 286
Batch number 287
Batch number 288
Ba

In [18]:
outputs = []
for test_image in test_features:
    output = model_dense.predict(test_image)
    #print(output)
    output[output>=0.3] = 1
    output[output<0.3] = 0    
    for i in range(output.shape[0]):
        arr = np.where(output[i]==1)[0]
        outputs.append(" ".join(map(str, arr.tolist())))    

KeyboardInterrupt: 

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame()

In [None]:
df['image_id'] = test_ids
df['label_id'] = outputs

In [None]:
df.to_csv("predictions.csv",index=False)

In [None]:
train_files = os.listdir('data/train_images/')
len(train_files)

In [21]:
validation_predictions = model_dense.predict(np.squeeze(np.array(validation_features),axis=1))
test_predictions = model_dense.predict(np.squeeze(np.array(test_features),axis=1))
with open("nasnet_results.pckl",'wb') as write_file:
    pickle.dump((validation_predictions,validation_labels,test_predictions),write_file)

In [22]:
with open("nasnet_results.pckl",'rb') as file:
    a = pickle.load(file)