In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc

import keras as k
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.models import load_model

import cv2
import platform
from tqdm import tqdm
import h5py
import yaml

Using TensorFlow backend.


In [2]:
# load keras model
model = load_model('../models/simple_cnn_weights_00_0.20.hdf5')
image_size = (150,150)

In [3]:
# get the file names for all test images
file_names = os.listdir('../data/test-jpg')
test_images = [name.split('.')[0] for name in file_names]

In [4]:
# load test data function
def load_test_images(test_images):
    x_test = []
    for f in tqdm(test_images, miniters=10):
        img = cv2.imread('../data/test-jpg/{}.jpg'.format(f))
        x_test.append(cv2.resize(img, image_size))
    x_test = np.array(x_test, np.float16)/255.
    return x_test   

# test load_test_images
x_test = load_test_images(test_images[:10])
print('x_test shape:      {}'.format(x_test.shape))

100%|██████████| 10/10 [00:00<00:00, 87.43it/s]

x_test shape:      (10, 150, 150, 3)





In [5]:
with open('../label_maps.yml', 'r') as lablels:
    label_data = yaml.load(lablels)
inv_label_map = label_data['inv_label_map']
label_map = label_data['label_map']

In [6]:
def create_submission_file(test_images, p_test, threshold):
    p_test_threshold = p_test > threshold
    p_tags = []
    for p_single in p_test_threshold:
        p_tags.append(" ".join([inv_label_map[i] for i, tag_pred in enumerate(p_single) if tag_pred == 1]))
    submission_df = pd.DataFrame({'image_name': test_images})
    submission_df['tags'] = p_tags
    return submission_df
    
# test... requires x_test from above
p_test = model.predict(x_test, batch_size=10)
submission_df = create_submission_file(test_images[:10], p_test, 0.2)

submission_df

Unnamed: 0,image_name,tags
0,test_38951,road agriculture water clear partly_cloudy pri...
1,test_24774,road agriculture water clear partly_cloudy pri...
2,test_3232,road agriculture water haze clear partly_cloud...
3,test_237,road agriculture water clear partly_cloudy pri...
4,file_2684,agriculture clear primary
5,test_36293,road agriculture water clear partly_cloudy pri...
6,test_18892,road agriculture water clear partly_cloudy pri...
7,test_18828,road agriculture water clear partly_cloudy pri...
8,test_26789,cloudy partly_cloudy primary
9,file_3965,road agriculture water clear partly_cloudy pri...


In [7]:
chunk_size = 12800
test_image_chunks = [test_images[x:x+chunk_size] for x in range(0, len(test_images), chunk_size)]
sub_list = []
for test_image_chunk in test_image_chunks:
    x_test = load_test_images(test_image_chunk)
    p_test = model.predict(x_test, batch_size=128)
    submission_df = create_submission_file(test_image_chunk, p_test, 0.2)
    sub_list.append(submission_df)
submission_df_final = pd.concat(sub_list)
submission_df_final.head()

100%|██████████| 12800/12800 [00:31<00:00, 401.16it/s]
100%|██████████| 12800/12800 [00:31<00:00, 410.73it/s]
100%|██████████| 12800/12800 [00:31<00:00, 405.64it/s]
100%|██████████| 12800/12800 [00:31<00:00, 409.56it/s]
100%|██████████| 9991/9991 [00:24<00:00, 402.46it/s]


Unnamed: 0,image_name,tags
0,test_38951,road agriculture water clear partly_cloudy pri...
1,test_24774,road agriculture water clear partly_cloudy pri...
2,test_3232,road agriculture water haze clear partly_cloud...
3,test_237,road agriculture water clear partly_cloudy pri...
4,file_2684,agriculture clear primary


In [8]:
submission_df_final.to_csv('../submissions/submission_aws_1.csv', index=False)

In [9]:
len(submission_df_final)

61191

In [None]:
# attempt 1 - 0.839 (ankora simple keras)
# https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
# attempt 2 - 0.863 (keras blog link above)
