In [1]:
!pip install livelossplot

In [2]:
import os
import tensorflow as tf
import pandas as pd
from tensorflow.keras import layers,models
import numpy as np
import matplotlib.pyplot as plt
from livelossplot import PlotLossesKeras
from keras.preprocessing.image import load_img
import random

In [3]:
!ls ../input/dogs-vs-cats


In [4]:
!mkdir datasets

In [5]:
# unzipping test and train files
!unzip -q ../input/dogs-vs-cats/train.zip -d /kaggle/working/datasets/
!unzip -q ../input/dogs-vs-cats/test1.zip -d /kaggle/working/datasets/

In [6]:
!mkdir datasets/train/dog
!mkdir datasets/train/cat


In [7]:
IMAGE_SIZE=128
CHANNELS=3

In [8]:
data_list = os.listdir("./datasets/train")

for word in data_list:
    if word =="cat" or word=="dog":
        continue
    if "cat" in word:
        os.system(f"mv ./datasets/train/{word} ./datasets/train/cat")
    elif "dog" in word:
        os.system(f"mv ./datasets/train/{word} ./datasets/train/dog")



In [9]:
input_path = []
label = []

for class_name in os.listdir("./datasets/train"):
    for path in os.listdir("./datasets/train/"+class_name):
        if class_name == 'cat':
            label.append(0)
        else:
            label.append(1)
        input_path.append(os.path.join("./datasets/train/", class_name, path))
print(input_path[0], label[0])

In [10]:
df = pd.DataFrame()
df['images'] = input_path
df['label'] = label
df = df.sample(frac=1).reset_index(drop=True)
df.head()

In [11]:
for i in df['images']:
    if '.jpg' not in i:
        print(i)

In [12]:
# to display grid of images
plt.figure(figsize=(25,25))
temp = df[df['label']==1]['images']
start = random.randint(0, len(temp))
files = temp[start:start+25]

for index, file in enumerate(files):
    plt.subplot(5,5, index+1)
    img = load_img(file)
    img = np.array(img)
    plt.imshow(img)
    plt.title('Dogs')
    plt.axis('off')

In [13]:
rescale_and_resize=tf.keras.Sequential([
    layers.experimental.preprocessing.Resizing(IMAGE_SIZE,IMAGE_SIZE) ,# will resize to the IMAGE_SIZE if there's any issue
    layers.experimental.preprocessing.Rescaling(1./255)  # Normalization
])

In [14]:
# to display grid of images
plt.figure(figsize=(25,25))
temp = df[df['label']==0]['images']
start = random.randint(0, len(temp))
files = temp[start:start+25]

for index, file in enumerate(files):
    plt.subplot(5,5, index+1)
    img = load_img(file)
    img = np.array(img)
    plt.imshow(img)
    plt.title('Cats')
    plt.axis('off')

In [15]:
import seaborn as sns
sns.countplot(df['label'])

In [16]:
df['label'] = df['label'].astype('str')

In [17]:
df.head()

In [18]:
# input split
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.2, random_state=42)

In [19]:
input_shape = (224, 224)
batch_size = 64
from keras.preprocessing.image import ImageDataGenerator
train_generator = ImageDataGenerator(
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    rescale = 1./255,  # normalization of images
    rotation_range = 40, # augmention of images to avoid overfitting
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest'
)

val_generator = ImageDataGenerator(rescale = 1./255)

train_iterator = train_generator.flow_from_dataframe(
    train, 
    x_col='images', 
    y_col='label', 
    target_size=input_shape,
    color_mode="rgb",
    class_mode="binary",
    batch_size=batch_size,
    shuffle=True,
    interpolation="lanczos",
)

val_iterator = val_generator.flow_from_dataframe(
    test, 
    x_col='images', 
    y_col='label', 
    target_size=input_shape,
    color_mode="rgb",
    class_mode='binary',
    batch_size=batch_size,
    shuffle=True,
    interpolation="lanczos",
)

In [20]:
input_shape = ( IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = 2


model = models.Sequential([
   
    
    layers.Conv2D(32, kernel_size = (3,3), activation='relu', input_shape=input_shape),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(64,  kernel_size = (3,3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
#     layers.Conv2D(64, (3, 3), activation='relu'),
#     layers.MaxPooling2D((2, 2)),
    
    #layers.Conv2D(64, (3, 3), activation='relu'),
    #layers.MaxPooling2D((2, 2)),
    
   # layers.Conv2D(64, (3, 3), activation='relu'),
    #layers.MaxPooling2D((2, 2)),
    
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(1, activation='sigmoid'),
])

model.build(input_shape=input_shape)

In [21]:
model.summary()

In [22]:
# from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, TerminateOnNaN, EarlyStopping
# mcp = ModelCheckpoint(filepath='CnnNetB0-{epoch:02d}.ckpt', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', save_freq='epoch')
# log = CSVLogger(filename='CnnNetB0.csv', separator=',', append=False)
# ton = TerminateOnNaN()
# esl = EarlyStopping(monitor='val_loss', patience=50, mode='auto', restore_best_weights=True)
# esa = EarlyStopping(monitor='val_accuracy', patience=50, mode='auto', restore_best_weights=True)

In [23]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [25]:
#model training
history = model.fit(
    train_iterator,
#     steps_per_epoch = train.samples // batch_size,
    validation_data=val_iterator,
    verbose=1,
    epochs=50,
#     validation_steps = valid.samples // batch_size,
    callbacks=[PlotLossesKeras()]
    
)

In [None]:
submission_file = pd.read_csv("../input/dogs-vs-cats/sampleSubmission.csv")

In [None]:
submission_file.head()

In [None]:
input_path = []


for class_name in os.listdir("./datasets/test1"):
    input_path.append(os.path.join("./datasets/test1/", class_name))
print(input_path[0])

In [None]:
test_df = pd.DataFrame()
test_df['images'] = input_path

test_df = test_df.sample(frac=1).reset_index(drop=True)
test_df.head()

In [None]:
test_df.head()
labels = ["Null" for i in range(len(test_df))]
test_df["label"]=labels

In [None]:
test_df.head()

In [None]:
test_generator = ImageDataGenerator(rescale = 1./255)

test_iterator = test_generator.flow_from_dataframe(
    test_df, 
    x_col='images',
    y_col="label",
    target_size=input_shape,
    color_mode="rgb",
    class_mode="binary",
    batch_size=batch_size,
    shuffle=True,
    interpolation="lanczos",
    
)

In [None]:
test_labels = model.predict(test_iterator)

In [None]:
test_labels=test_labels.reshape(-1,)

In [None]:
test_labels[0:5]

In [None]:
test_labels_1= [round(i) for i in test_labels]

In [None]:
test_labels_1[0:5]

In [None]:
test_df["label"]=test_labels_1

In [None]:
test_df.head()

In [None]:
id = test_df["images"][0]

In [None]:
test_ids=[]
for i in range(len(test_df)):
    id = test_df["images"][i]
    id = id.split("/")
    id=id[3]
    id = id.split('.')
    id=id[0]
    test_ids.append(id)

In [None]:
test_df["id"]=test_ids
test_df.head()


In [None]:
test_df=test_df.drop("images",axis=1)

In [None]:
test_df.head()

In [None]:
test_df.to_csv("kaggle_submission.csv",index=False,header=True)