In [1]:
import pandas as pd
import numpy as np

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization, Activation

from keras.preprocessing.image import ImageDataGenerator 
#from keras.applications import VGG19

import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.


In [2]:
train = pd.read_csv("../input/train.csv")
train.head()

Unnamed: 0,id,has_cactus
0,0004be2cfeaba1c0361d39e2b000257b.jpg,1
1,000c8a36845c0208e833c79c1bffedd1.jpg,1
2,000d1e9a533f62e55c289303b072733d.jpg,1
3,0011485b40695e9138e92d0b3fb55128.jpg,1
4,0014d7a11e90b62848904c1418fc8cf2.jpg,1


In [3]:
train["has_cactus"] = train["has_cactus"].map(lambda x:str(x))
train.shape

(17500, 2)

In [4]:
%%time
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.1, 
                                  horizontal_flip=True, vertical_flip=True)

train_generator = train_datagen.flow_from_dataframe(dataframe=train,
                                                   directory = "../input/train/train",
                                                   x_col="id", y_col="has_cactus",
                                                   batch_size=32, shuffle=True,
                                                   class_mode="binary",
                                                   target_size=(32, 32),
                                                   subset="training")

Found 15750 validated image filenames belonging to 2 classes.
CPU times: user 240 ms, sys: 364 ms, total: 604 ms
Wall time: 2.28 s


In [5]:
%%time
val_generator = train_datagen.flow_from_dataframe(dataframe=train,
                                                 directory = "../input/train/train",
                                                 x_col="id", y_col="has_cactus",
                                                 batch_size=32, shuffle=True,
                                                 class_mode="binary",
                                                 target_size=(32, 32),
                                                 subset="validation")

Found 1750 validated image filenames belonging to 2 classes.
CPU times: user 108 ms, sys: 36 ms, total: 144 ms
Wall time: 144 ms


In [6]:
from keras import applications

In [7]:
base_model = applications.VGG16(weights='imagenet', 
                     include_top=False, 
                     input_shape=(32, 32, 3))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [8]:
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(256, use_bias=True))
model.add(BatchNormalization())
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(256,activation='relu'))
model.add(BatchNormalization())
model.add(Dense(16, activation='tanh'))
model.add(Dense(1, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 1, 1, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
batch_normalization_1 (Batch (None, 256)               1024      
_________________________________________________________________
activation_1 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
__________

In [9]:
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [10]:
Model = model.fit_generator(generator=train_generator,
                           validation_data=val_generator,
                           validation_steps=int(train.shape[0]/32),
                           steps_per_epoch=int(train.shape[0]/32),
                           epochs=20, verbose=2)

Epoch 1/20
 - 40s - loss: 0.2251 - acc: 0.9059 - val_loss: 0.9795 - val_acc: 0.7393
Epoch 2/20
 - 25s - loss: 0.1065 - acc: 0.9632 - val_loss: 3.2857 - val_acc: 0.2684
Epoch 3/20
 - 24s - loss: 0.0983 - acc: 0.9659 - val_loss: 0.0944 - val_acc: 0.9670
Epoch 4/20
 - 24s - loss: 0.0884 - acc: 0.9689 - val_loss: 0.7709 - val_acc: 0.7832
Epoch 5/20
 - 25s - loss: 0.0802 - acc: 0.9707 - val_loss: 1.1948 - val_acc: 0.7430
Epoch 6/20
 - 24s - loss: 0.0719 - acc: 0.9742 - val_loss: 0.1049 - val_acc: 0.9622
Epoch 7/20
 - 24s - loss: 0.0679 - acc: 0.9774 - val_loss: 1.1327 - val_acc: 0.7429
Epoch 8/20
 - 24s - loss: 0.0655 - acc: 0.9772 - val_loss: 0.2005 - val_acc: 0.9183
Epoch 9/20
 - 25s - loss: 0.0574 - acc: 0.9806 - val_loss: 1.6775 - val_acc: 0.6089
Epoch 10/20
 - 24s - loss: 0.0612 - acc: 0.9784 - val_loss: 0.4185 - val_acc: 0.8705
Epoch 11/20
 - 24s - loss: 0.0516 - acc: 0.9831 - val_loss: 0.0364 - val_acc: 0.9894
Epoch 12/20
 - 24s - loss: 0.0463 - acc: 0.9845 - val_loss: 0.0574 - val_a

In [11]:
test_dir="../input/test/test/"

In [12]:
import os
import cv2
from tqdm import tqdm, tqdm_notebook

X_test = []
X_image = []

for image in tqdm_notebook(os.listdir(test_dir)):
    X_test.append(cv2.imread(test_dir+image))
    X_image.append(image)
X_test = np.array(X_test)
X_test = X_test/255.0

HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))




In [13]:
testPredict = model.predict(X_test)

In [14]:
submission=pd.DataFrame(testPredict,columns=['has_cactus'])

In [15]:
submission['id'] = ''
cols=list(submission.columns)
cols = cols[-1:] + cols[:-1]
submission=submission[cols]
for i, img in enumerate(X_image):
    submission.set_value(i,'id',img)

In [16]:
submission.to_csv('submission.csv',index=False)