In [3]:
import pandas as pd
import numpy as np
import h5py
import datetime

from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
from keras.layers.normalization import *

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
np.random.seed(2017)

In [4]:
def finetune_model(input_shape):
    input_tensor = Input(input_shape)
    # x = Dropout(0.5)(input_tensor)
    x = BatchNormalization()(x)
    x = Dense(1, activation='sigmoid')(x)
    model = Model(input_tensor, x)
    
    return model

    

In [5]:
batch_size = 128
epochs = 20

X_train = []
X_valid = []

file_features = [
    "features/finetune_ResNet50BN_160.h5",
    "features/finetune_VGG16BN_15.h5",
    "features/finetune_InceptionV3BN_200.h5"
]

for filename in file_features:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_valid.append(np.array(h['valid']))
        y_train = np.array(h['label'])
        y_valid = np.array(h['valid_label'])

X_train = np.concatenate(X_train, axis=1)
X_valid = np.concatenate(X_valid, axis=1)

X_train, y_train = shuffle(X_train, y_train)
y_train = np.eye(10)[y_train]
X_valid, y_valid = shuffle(X_valid, y_valid)
y_valid = np.eye(10)[y_valid]

print(X_train.shape)
print(y_train.shape)

(20787, 4608)
(20787, 10)


In [6]:
def make_model(input_shape):

    input_tensor = Input(input_shape)
    x = input_tensor
    # x = Dropout(0.5)(x)
    x = BatchNormalization()(x)
    x = Dense(10, activation='softmax')(x)
    model = Model(input_tensor, x)
    
    return model

model_merged = make_model(X_train.shape[1:])


print("Adam")
model_merged.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_merged.fit(X_train, y_train, batch_size=batch_size, epochs=10, validation_data=(X_valid,y_valid))
model_merged.save("models/mixed-model.h5")
print("model save successed")

Adam
Train on 20787 samples, validate on 1637 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
model save successed


In [11]:
def gen_kaggle_csv(model, X_test,  model_image_size, csv_name):
    dir = "dataset"
    y_pred = model.predict(X_test, verbose=1)
    print(y_pred[:3])
    y_pred = y_pred.clip(min=0.005, max=0.995)
    print()
    print(y_pred[:3])

    gen = ImageDataGenerator()
    test_generator = gen.flow_from_directory(dir + "/to_prediction/", (model_image_size, model_image_size), shuffle=False, 
                                             batch_size=16, class_mode=None)

    l = list()
    for i, fname in enumerate(test_generator.filenames):
        name = fname[fname.rfind('/')+1:]
        l.append( [name, y_pred[i]] )

    l = np.array(l)
    data = {'img': l[:,0]}
    for i in range(10):
        data["c%d"%i] = l[:,i+1]
    df = pd.DataFrame(data, columns=['img'] + ['c%d'%i for i in range(10)])
    df.head(10)
    df = df.sort_values(by='img')
    df.to_csv(csv_name, index=None, float_format='%.3f')

print("done")

done


In [9]:
def make_predictions(MODEL, X_test, image_size, batch_size):
    y_pred = MODEL.predict(X_test, verbose=1)
    # y_pred = y_pred.clip(min=0.005, max=0.995)
    
    gen = ImageDataGenerator()
    path_test_data = 'dataset/to_prediction'
    test_generator = gen.flow_from_directory(path_test_data,image_size, shuffle=False, 
                                             batch_size=batch_size, class_mode=None)
    
    test_id = list()
    for i, file_name in enumerate(test_generator.filenames):
        flbase = os.path.basename(file_name)
        test_id.append(flbase)        
    
    return y_pred, test_id

def create_submission(predictions, test_id):
    result1 = pd.DataFrame(predictions, columns=['c0', 'c1', 'c2', 'c3',
                                                 'c4', 'c5', 'c6', 'c7',
                                                 'c8', 'c9'])
    result1.loc[:, 'img'] = pd.Series(test_id, index=result1.index)
    now = datetime.datetime.now()
    if not os.path.isdir('subm'):
        os.mkdir('subm')
    suffix = str(now.strftime("%Y-%m-%d-%H-%M"))
    sub_file = os.path.join('subm', 'submission_' + suffix + '.csv')
    result1.to_csv(sub_file, index=False)

In [11]:
test_features = [
     "features/finetune_test_VGG16_BN15.h5",
    "features/finetune_test_ResNet50_BN160.h5",
    "features/finetune_test_InceptionV3BN_200.h5"
]
X_test = []
for filename in test_features:
    with h5py.File(filename, 'r') as h:
        X_test.append(np.array(h['test']))
        
X_test = np.concatenate(X_test, axis=1)
predictions, test_id = make_predictions(model_merged, X_test, (299, 299), batch_size)



In [12]:
create_submission(predictions, test_id)