In [34]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels.h5
/kaggle/input/resnet50/imagenet_class_index.json
/kaggle/input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip
/kaggle/input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv
/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip


In [35]:
import cv2
from tqdm import tqdm          #Used for the purpose of showing iterations getting loaded in bar kind of form
from random import shuffle
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator

weight_loc = '../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'  
We'll not be using this path directly because it will cause some dimension error instead we will download weights of imagenet.
Here is the explanation:
https://stackoverflow.com/questions/60119041/failed-to-load-keras-resnet50-model-offline-using-weight-file

In [36]:
path = '../input/dogs-vs-cats-redux-kernels-edition/'

In [37]:
train_img_path = os.path.join(path,'train.zip')
test_img_path = os.path.join(path,'test.zip')

In [38]:
from zipfile import ZipFile
with ZipFile(train_img_path,'r') as zip:
    zip.extractall('.')

with ZipFile(test_img_path,'r') as zip:
    zip.extractall('.')

In [39]:
print(os.listdir('../'))
print(os.listdir('../working'))

['lib', 'input', 'working']
['__notebook_source__.ipynb', 'test', 'train']


In [40]:
#One hot encoding labels: dog as 1 and cat as 0
def label_image(img):
    category = img.split('.')[-3]
    if category == 'cat': return [1,0]
    elif category == 'dog': return [0,1]

In [41]:
#this function takes the image data, directory in which data is stored, and boolean wheather it's training data or test data as arguments
def process_data(data, data_dir, isTrain=True):
    data_df = []                                   
    for img in tqdm(data):
        path = os.path.join(data_dir,img)   #Assigning path to images by concatenating directory and images 
        if(isTrain):
            label = label_image(img)        #Calling label_image to assign labels to image present in training directory
        else:
            label = img.split('.')[0]
        img = cv2.imread(path,cv2.IMREAD_COLOR)
        img = cv2.resize(img, (224,224))
        data_df.append([np.array(img),np.array(label)])     #append image and labels as numpy array in data_df list
    return data_df

In [42]:
train = process_data(os.listdir('./train/')[0:2000], './train/')       #we are slicing here for decreasing memory consumption
#show_img(train)

100%|██████████| 2000/2000 [00:04<00:00, 431.19it/s]


In [43]:
test = process_data(os.listdir('./test')[0:2000], './test', False)
#show_img(test, True)

100%|██████████| 2000/2000 [00:04<00:00, 432.62it/s]


In [44]:
filename = os.listdir('../working/train')
labels = []
for file in filename:
    category = file.split('.')[0]
    if category == 'cat':
        labels.append('cat')
    else:
        labels.append('dog')
        
df = pd.DataFrame({'filename': filename, 'label': labels})
df.head()

Unnamed: 0,filename,label
0,cat.5412.jpg,cat
1,dog.1541.jpg,dog
2,cat.4069.jpg,cat
3,cat.12243.jpg,cat
4,cat.12483.jpg,cat


In [45]:
train_df, valid_df = train_test_split(df, test_size = 0.2, stratify = df['label'], random_state = 123)
train_df = train_df.reset_index(drop = True)
valid_df = valid_df.reset_index(drop = True)

In [46]:
print(len(train_df))
print(len(valid_df))

20000
5000


In [47]:
datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

In [48]:
train_gen = datagen.flow_from_dataframe(train_df, directory = '../working/train', x_col = 'filename', y_col = 'label', target_size = (224,224), batch_size = 64)
valid_gen = datagen.flow_from_dataframe(valid_df, directory = '../working/train', x_col = 'filename', y_col = 'label', target_size = (224,224), batch_size = 64)

Found 20000 validated image filenames belonging to 2 classes.
Found 5000 validated image filenames belonging to 2 classes.


In [49]:
model = Sequential()
model.add(ResNet50(include_top = False, pooling = 'max', weights = 'imagenet'))
model.add(Dense(1, activation = 'softmax'))
model.layers[0].trainable = False

In [50]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 2049      
Total params: 23,589,761
Trainable params: 2,049
Non-trainable params: 23,587,712
_________________________________________________________________


In [51]:
model.compile(optimizer = 'adam', metrics = ['accuracy'], loss = 'binary_crossentropy')

In [None]:
model.fit_generator(train_gen, epochs = 10, validation_data = valid_gen)

Epoch 1/10
Epoch 3/10