In [56]:
import pandas as pd
import numpy as np

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization, Activation

from keras.preprocessing.image import ImageDataGenerator 

import warnings
warnings.filterwarnings("ignore")

In [57]:
train = pd.read_csv("../input/train.csv")

In [58]:
train.head()

Unnamed: 0,id,has_cactus
0,0004be2cfeaba1c0361d39e2b000257b.jpg,1
1,000c8a36845c0208e833c79c1bffedd1.jpg,1
2,000d1e9a533f62e55c289303b072733d.jpg,1
3,0011485b40695e9138e92d0b3fb55128.jpg,1
4,0014d7a11e90b62848904c1418fc8cf2.jpg,1


In [59]:
train["has_cactus"] = train["has_cactus"].map(lambda x:str(x))
train.shape

(17500, 2)

The dataset being relatively small, data augmentation is very important to generalise and learn what a cactus look like. Based on the fact that cactus detection seems like an easy problem and we're dealing with a small amount of data, the batch size is kept small as training will be quick anyway.

### Image Data Generator

In [60]:
%%time
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.1, 
                                  horizontal_flip=True, vertical_flip=True)

train_generator = train_datagen.flow_from_dataframe(dataframe=train,
                                                   directory = "../input/train/train",
                                                   x_col="id", y_col="has_cactus",
                                                   batch_size=32, shuffle=True,
                                                   class_mode="binary",
                                                   target_size=(32, 32),
                                                   subset="training")

Found 15750 images belonging to 2 classes.
CPU times: user 240 ms, sys: 316 ms, total: 556 ms
Wall time: 2.03 s


In [61]:
%%time
val_generator = train_datagen.flow_from_dataframe(dataframe=train,
                                                 directory = "../input/train/train",
                                                 x_col="id", y_col="has_cactus",
                                                 batch_size=32, shuffle=True,
                                                 class_mode="binary",
                                                 target_size=(32, 32),
                                                 subset="validation")

Found 1750 images belonging to 2 classes.
CPU times: user 108 ms, sys: 72 ms, total: 180 ms
Wall time: 179 ms


In [62]:
input_shape = (32, 32, 3)
num_classes = 2

In [63]:
model = Sequential()

model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(0.6))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.6))

model.add(Dense(1))
model.add(Activation('sigmoid'))

In [64]:
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [65]:
Model = model.fit_generator(generator=train_generator,
                           validation_data=val_generator,
                           validation_steps=int(train.shape[0]/32),
                           steps_per_epoch=int(train.shape[0]/32),
                           epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [66]:
test_dir="../input/test/test/"

In [67]:
import os
import cv2
from tqdm import tqdm, tqdm_notebook

X_test = []
X_image = []

for image in tqdm_notebook(os.listdir(test_dir)):
    X_test.append(cv2.imread(test_dir+image))
    X_image.append(image)
X_test = np.array(X_test)
X_test = X_test/255.0

HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))




In [68]:
testPredict = model.predict(X_test)

In [69]:
submission=pd.DataFrame(testPredict,columns=['has_cactus'])

In [70]:
submission['id'] = ''
cols=list(submission.columns)
cols = cols[-1:] + cols[:-1]
submission=submission[cols]
for i, img in enumerate(X_image):
    submission.set_value(i,'id',img)
print(submission)

                                        id  has_cactus
0     6a571b6df250e9575fb82f8904c325a1.jpg    1.000000
1     2edb0bf826248b088d57e22799464c41.jpg    1.000000
2     ea9422f63363a362ba6f482617006e76.jpg    1.000000
3     1021509e308bf12f71a01cac2ddca97f.jpg    1.000000
4     5eacbb413e5cd4e73cb7b1936758abf1.jpg    1.000000
5     861dccb4950b74108760daae0a1e016b.jpg    1.000000
6     6472fab8708bcd522836a9f1c6e9aae6.jpg    0.000000
7     3a77f9113b60c62b7d30c5f41828ab6b.jpg    1.000000
8     305d9cefe442e30abae64d84ecc8340e.jpg    1.000000
9     71957d3a60ca371e441fb6ff5ee6379f.jpg    0.921122
10    cf86a7bd7d483c530ec9bb805f5fd15a.jpg    0.000320
11    028c67154cbac90ff396f41aebe58656.jpg    1.000000
12    cb35339d2d9fd1717f06e3e7f89b17a4.jpg    1.000000
13    da7a0e4e5bbb277efd612bf9e3b507e6.jpg    0.000000
14    a715480e25a3178372affa70f34612d6.jpg    1.000000
15    dc36fd4e43f8646c07dd3cc4481c1792.jpg    1.000000
16    7ffbc679faca1197297ed482d398a32d.jpg    1.000000
17    8abc

In [71]:
submission.to_csv('submission.csv',index=False)

In [None]:
#train_dir=r"../input/train/train/"
#test_dir="../input/test/test/"
#test_datagen = ImageDataGenerator(rescale=1./255)

#test_generator = test_datagen.flow_from_directory(directory = test_dir,
                                       #          target_size=(32, 32),
                                       #         batch_size=1,
                                        #         class_mode='binary',
                                        #         shuffle=False)

In [None]:
#Pred = model.fit_generator(test_generator, steps_per_epoch = len(test_generator.filenames), verbose=1)
#PredBinary = [0 if value<0.50 else 1 for value in Pred]