In [90]:
import numpy as np
import pandas as pd 
from keras.preprocessing.image import ImageDataGenerator, load_img
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential
from keras.callbacks import EarlyStopping,ReduceLROnPlateau
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization
import matplotlib.pyplot as plt
import random
import os
download_list=os.listdir("Downloads/input")
print(download_list)

['sample_submission.csv', 'test', 'train']


In [91]:
FAST_RUN = False
IMAGE_WIDTH=128
IMAGE_HEIGHT=128
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3

In [92]:
filenames = os.listdir("Downloads/input/train/train/")
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    if category=='dog':
        categories.append(1)
    else:
        categories.append(0)

In [93]:
df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

In [94]:
df.head()

Unnamed: 0,filename,category
0,cat.0.jpg,0
1,cat.1.jpg,0
2,cat.10.jpg,0
3,cat.100.jpg,0
4,cat.1000.jpg,0


In [95]:
df.tail()

Unnamed: 0,filename,category
24995,dog.9995.jpg,1
24996,dog.9996.jpg,1
24997,dog.9997.jpg,1
24998,dog.9998.jpg,1
24999,dog.9999.jpg,1


In [96]:
model = Sequential()


model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax')) # 2 because we have cat and dog classes

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])


In [97]:
earlystop=EarlyStopping(patience=10)
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
callbacks=[earlystop,learning_rate_reduction]
batchsize=15

In [98]:
df["category"] = df["category"].replace({0: 'cat', 1: 'dog'}) 

In [99]:
train_df, validate_df = train_test_split(df,train_size=500,test_size=500
                                         , random_state=42)

In [100]:
train_df = train_df.reset_index(drop=True)

In [101]:
train_df

Unnamed: 0,filename,category
0,cat.9801.jpg,cat
1,dog.366.jpg,dog
2,cat.5511.jpg,cat
3,cat.1648.jpg,cat
4,dog.4433.jpg,dog
5,dog.3730.jpg,dog
6,dog.11797.jpg,dog
7,dog.1491.jpg,dog
8,dog.1650.jpg,dog
9,dog.7056.jpg,dog


In [102]:
validate_df = validate_df.reset_index(drop=True)

In [103]:
validate_df

Unnamed: 0,filename,category
0,cat.493.jpg,cat
1,dog.9112.jpg,dog
2,cat.745.jpg,cat
3,dog.11022.jpg,dog
4,dog.11363.jpg,dog
5,cat.5488.jpg,cat
6,cat.3972.jpg,cat
7,dog.10365.jpg,dog
8,cat.1796.jpg,cat
9,cat.7359.jpg,cat


In [104]:
total_train = train_df.shape[0]

In [105]:
total_train

500

In [106]:
total_validate = validate_df.shape[0]

In [107]:
total_validate

500

In [116]:
batch_size=15

In [117]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

In [118]:
train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "Downloads/input/train/train/", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size
)


Found 500 validated image filenames belonging to 2 classes.


In [119]:
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "Downloads/input/train/train/", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size
)

Found 500 validated image filenames belonging to 2 classes.


In [120]:
epochs=5
history = model.fit_generator(
    train_generator, 
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size,
    callbacks=callbacks,
      epochs=epochs
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 5/5


In [121]:
test_filenames = os.listdir("Downloads/input/test/test")
test_df = pd.DataFrame({
    'filename': test_filenames
})

In [122]:
nb_samples = test_df.shape[0]

In [123]:
nb_samples

12500

In [124]:
test_df.head()

Unnamed: 0,filename
0,1.jpg
1,10.jpg
2,100.jpg
3,1000.jpg
4,10000.jpg


In [125]:
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    "Downloads/input/test/test/", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=IMAGE_SIZE,
    batch_size=batch_size,
    shuffle=False
)

Found 12500 validated image filenames.


In [126]:
predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))

In [127]:
predict

array([[0.17790066, 0.8220993 ],
       [0.5601273 , 0.43987274],
       [0.5055365 , 0.49446347],
       ...,
       [0.7096074 , 0.29039258],
       [0.50769395, 0.49230605],
       [0.7214751 , 0.27852485]], dtype=float32)

In [128]:
test_df['category'] = np.argmax(predict, axis=-1)

In [130]:
test_df['category'].head(10)

0    1
1    0
2    0
3    0
4    0
5    1
6    1
7    0
8    1
9    0
Name: category, dtype: int64

In [131]:
submission=pd.DataFrame({'id':test_df['filename'].str.split('.').str[0],'label':test_df['category']})
submission.to_csv('submission20.csv', index=False)

In [132]:
submission.head()

Unnamed: 0,id,label
0,1,1
1,10,0
2,100,0
3,1000,0
4,10000,0
