## Load model

In [1]:
import h5py
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


Indicate if you follow the bottleneck approach (2A) or the fit_generator (2,3)

In [2]:
bottleneck = True

## Build model:

In [3]:
if bottleneck:
    from keras.models import load_model
    
    model = load_model("head_model.h5")
    
else:
    from resnet50_GAP import ResNet50_GAP
    from keras.models import Model
    from keras.layers import Dense
    print("BUILDING BODY...")
    body = ResNet50_GAP(input_shape=(300,300,3))
    head = body.output
    head = Dense(2, activation="softmax")(head)
    model = Model(body.input, head)
    print("LOADING WEIGHTS ...")
    model.load_weights("best_model_aug.h5")

## Get test batches for filenames (and training non-bottleneck approach)

In [5]:
TEST_PATH = "D:/GitHub/Kaggle/redux/test"

In [6]:
BATCH_SIZE = 8
gen = ImageDataGenerator()
test_batches = gen.flow_from_directory(TEST_PATH, model.input_shape[1:3], batch_size=BATCH_SIZE,
                                       shuffle=False, class_mode=None)

Found 12500 images belonging to 1 classes.


## Generate predictions

In [8]:
BOTTLENECKS_PATH = "D:/GitHub/Kaggle/redux/300_bottlenecks.h5"

In [9]:
if bottleneck:
    with h5py.File(BOTTLENECKS_PATH) as hf:
        X_test = hf["test"][:]
    y_test = model.predict(X_test)

else:
    y_test = model.predict_generator(test_batches, test_batches.samples // test_batches.batch_size)

## Load sample submission

In [11]:
import pandas as pd

In [13]:
SAMPLE_SUBMISSION = "D:/GitHub/Kaggle/redux/sample_submission.csv"

In [14]:
subm = pd.read_csv(SAMPLE_SUBMISSION)

## Extract oredered ids from ordered bottlenecks

In [15]:
ids = [int(x.split("\\")[1].split(".")[0]) for x in test_batches.filenames]

## Fill the sample submission

In [16]:
for i in range(len(ids)):
    subm.loc[subm.id == ids[i], "label"] = y_test[:,1][i]

## Write the new submission

In [17]:
subm.to_csv("submission.csv", index=False)

In [18]:
subm.head()

Unnamed: 0,id,label
0,1,0.999819
1,2,0.999999
2,3,0.999869
3,4,0.999389
4,5,3.8e-05


## Try clipping

In [19]:
clipped = y_test.clip(min=0.02, max=0.98)

In [20]:
for i in range(len(ids)):
    subm.loc[subm.id == ids[i], "label"] = clipped[:,1][i]

In [21]:
subm.to_csv("submission_clipped.csv", index=False)

In [22]:
subm.head()

Unnamed: 0,id,label
0,1,0.98
1,2,0.98
2,3,0.98
3,4,0.98
4,5,0.02
