In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc

import keras as k
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.models import load_model

import cv2
import platform
from tqdm import tqdm
import h5py

In [4]:
# load keras model
model = load_model('../models/simple_cnn_weights_09_0.15.hdf5')
image_size = (150,150)

In [5]:
# get the file names for all test images
file_names = os.listdir('../data/test-jpg')
test_images = [name.split('.')[0] for name in file_names]

In [6]:
# load test data function
def load_test_images(test_images):
    x_test = []
    for f in tqdm(test_images, miniters=10):
        img = cv2.imread('../data/test-jpg/{}.jpg'.format(f))
        x_test.append(cv2.resize(img, image_size))
    x_test = np.array(x_test, np.float16)/255.
    return x_test   

# test load_test_images
x_test = load_test_images(test_images[:10])
print('x_test shape:      {}'.format(x_test.shape))

100%|██████████| 10/10 [00:00<00:00, 87.05it/s]

x_test shape:      (10, 150, 150, 3)





In [7]:
# get inv_label_map
df_train = pd.read_csv('../data/train_v2.csv')

flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

In [8]:
def create_submission_file(test_images, p_test, threshold):
    p_test_threshold = p_test > threshold
    p_tags = []
    for p_single in p_test_threshold:
        p_tags.append(" ".join([inv_label_map[i] for i, tag_pred in enumerate(p_single) if tag_pred == 1]))
    submission_df = pd.DataFrame({'image_name': test_images})
    submission_df['tags'] = p_tags
    return submission_df
    
# test... requires x_test from above
p_test = model.predict(x_test, batch_size=10)
submission_df = create_submission_file(test_images[:10], p_test, 0.2)

submission_df

Unnamed: 0,image_name,tags
0,test_38951,blooming primary clear
1,test_24774,road slash_burn partly_cloudy primary blow_dow...
2,test_3232,blooming primary clear
3,test_237,clear water
4,file_2684,road slash_burn partly_cloudy primary blow_dow...
5,test_36293,road slash_burn partly_cloudy primary blow_dow...
6,test_18892,road primary clear
7,test_18828,road clear water
8,test_26789,selective_logging
9,file_3965,road slash_burn partly_cloudy primary blow_dow...


In [10]:
chunk_size = 12800
test_image_chunks = [test_images[x:x+chunk_size] for x in range(0, len(test_images), chunk_size)]
sub_list = []
for test_image_chunk in test_image_chunks:
    x_test = load_test_images(test_image_chunk)
    p_test = model.predict(x_test, batch_size=128)
    submission_df = create_submission_file(test_image_chunk, p_test, 0.2)
    sub_list.append(submission_df)
submission_df_final = pd.concat(sub_list)
submission_df_final.head()

100%|██████████| 12800/12800 [00:22<00:00, 568.54it/s]
100%|██████████| 12800/12800 [00:22<00:00, 574.20it/s]
100%|██████████| 12800/12800 [00:22<00:00, 580.93it/s]
100%|██████████| 12800/12800 [00:29<00:00, 436.13it/s]
100%|██████████| 9991/9991 [00:24<00:00, 408.69it/s]


Unnamed: 0,image_name,tags
0,test_38951,blooming primary clear
1,test_24774,road slash_burn partly_cloudy primary blow_dow...
2,test_3232,blooming primary clear
3,test_237,clear water
4,file_2684,road slash_burn partly_cloudy primary blow_dow...


In [19]:
submission_df_final.to_csv('../submissions/submission_aws_1.csv', index=False)

In [18]:
len(submission_df_final)

61191

In [None]:
# attempt 1 - 0.839 (ankora simple keras)
# https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
# attempt 2 - 0.863 (keras blog link above)
