## Load model

In [1]:
from keras.models import load_model

In [2]:
model = load_model("head_model.h5")

## Load bottleneck test

In [3]:
import h5py

In [4]:
%%time
with h5py.File("300_bottlenecks.h5") as hf:
    X_test = hf["test"][:]

## Generate predictions

In [5]:
%%time
y_test = model.predict(X_test)

Wall time: 21.3 s


## Load sample submission

In [6]:
import pandas as pd

In [7]:
subm = pd.read_csv("sample_submission.csv")

## Get the order of the bottlenecks

In [8]:
from keras.preprocessing.image import ImageDataGenerator
gen = ImageDataGenerator()
test_batches = gen.flow_from_directory("test", model.input_shape[1:3], shuffle=False, class_mode=None)

Found 12500 images belonging to 1 classes.


## Extract oredered ids from ordered bottlenecks

In [9]:
ids = [int(x.split("\\")[1].split(".")[0]) for x in test_batches.filenames]

## Fill the sample submission

In [10]:
for i in range(len(ids)):
    subm.loc[subm.id == ids[i], "label"] = y_test[:,1][i]

## Write the new submission

In [11]:
subm.to_csv("submission2.csv", index=False)

In [12]:
subm.head()

Unnamed: 0,id,label
0,1,0.999919
1,2,0.999996
2,3,0.999931
3,4,0.999368
4,5,3.1e-05


## Try clipping

In [13]:
clipped = y_test.clip(min=0.02, max=0.98)

In [14]:
for i in range(len(ids)):
    subm.loc[subm.id == ids[i], "label"] = clipped[:,1][i]

In [15]:
subm.to_csv("submission2_clipped.csv", index=False)

In [16]:
subm.head()

Unnamed: 0,id,label
0,1,0.98
1,2,0.98
2,3,0.98
3,4,0.98
4,5,0.02
