In [None]:
import pandas as pd
from pathlib import Path

In [None]:
import numpy as np
import os

import glob

import tensorflow as tf

import matplotlib.pyplot as plt

AUTOTUNE = tf.data.experimental.AUTOTUNE

import torch


In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))


In [None]:
import zipfile
zip_df = zipfile.ZipFile("/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip", 'r')
zip_df.extractall("/kaggle/working/")
zip_df.close()
zip_df = zipfile.ZipFile("/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip", 'r')
zip_df.extractall("/kaggle/working/")
zip_df.close()


In [None]:
#folder check
dataset_path = './dataset'
if not os.path.isdir(dataset_path):
    os.mkdir(dataset_path)

train_path = os.path.join(dataset_path,'train')
val_path = os.path.join(dataset_path,'val')
test_path = os.path.join(dataset_path,'test')
if not os.path.isdir(train_path):
    os.mkdir(train_path)
if not os.path.isdir(val_path):
    os.mkdir(val_path)
if not os.path.isdir(test_path):
    os.mkdir(test_path)


In [None]:
def get_path(path,name):
    return glob.glob(path+'/*.'+name)
check = lambda x: 1 if x.split('.')[1].split('/')[-1] == 'dog' else 0


In [None]:
data_list = get_path('./train','jpg')
result = list(map(check,data_list))


In [None]:
print('dogs:',result.count(1),'cats:',result.count(0))


In [None]:
dogs_list = [i for i in data_list if check(i)]
cats_list = [i for i in data_list if not check(i)]


In [None]:
split_ratio = 0.8


In [None]:
train_data = []
val_data = []
train_label = []
val_label = []

for i in range(12500):
    if (i < len(data_list)/2*split_ratio):
        train_data.append(dogs_list[i])
        train_data.append(cats_list[i])
    else:
        val_data.append(dogs_list[i])
        val_data.append(cats_list[i])

train_label = list(map(check,train_data))
val_label = list(map(check,val_data))


In [None]:
class_label = ['dog','cat']


In [None]:
img_size = 224

def preprocess_image(image):
  image = tf.image.decode_jpeg(image, channels=3)
  image = tf.image.resize(image, [img_size, img_size])

  return image


In [None]:
def load_and_preprocess_image(path):
  image = tf.io.read_file(path)
  return preprocess_image(image)


In [None]:
ds_train = tf.data.Dataset.from_tensor_slices((train_data,train_label))
ds_val = tf.data.Dataset.from_tensor_slices((val_data,val_label))

def load_and_preprocess_from_path_label(path, label):
  return load_and_preprocess_image(path), tf.one_hot(label, 2)

ds_train = ds_train.map(load_and_preprocess_from_path_label)
ds_val = ds_val.map(load_and_preprocess_from_path_label)


In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers


In [None]:
batch_size = 64
dsb_train = ds_train.batch(batch_size=batch_size, drop_remainder=True)
dsb_train = dsb_train.prefetch(tf.data.AUTOTUNE)

dsb_val = ds_val.batch(batch_size=batch_size, drop_remainder=True)


In [None]:
model = EfficientNetB0(weights='imagenet', drop_connect_rate=0.4)


In [None]:
img_augmentation = Sequential(
    [
        layers.RandomRotation(factor=0.15),
        layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        layers.RandomFlip(),
        layers.RandomContrast(factor=0.1),
    ],
    name="img_augmentation",
)


In [None]:
def build_model(num_classes):
    inputs = layers.Input(shape=(img_size, img_size, 3))
    x = img_augmentation(inputs)
    model = EfficientNetB0(include_top=False, input_tensor=x, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="pred")(x)

    # Compile
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
    )
    return model


In [None]:
strategy = tf.distribute.MirroredStrategy()


In [None]:
#with strategy.scope():
new_model = build_model(num_classes=2)

epochs = 10  
hist = new_model.fit(dsb_train, epochs=epochs, validation_data=dsb_val, verbose=2)


In [None]:
def plot_hist(hist):
    plt.plot(hist.history["accuracy"])
    plt.plot(hist.history["val_accuracy"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()


In [None]:
plot_hist(hist)


In [None]:
test_list = get_path('./test','jpg')
id_load = lambda x : int(x.split('/')[-1].split('.')[0])
id_list = list(map(id_load,test_list))


In [None]:
ds_test = tf.data.Dataset.from_tensor_slices((test_list,id_list))

def test(image,id):
    return load_and_preprocess_image(image),id

ds_test = ds_test.map(test)
dsb_test = ds_test.batch(batch_size=100, drop_remainder=True)


In [None]:
submission = {'id':[],'label':[]}
dog_prediction = lambda x:x[1]

for batch in dsb_test:
    results = new_model.predict(batch[0])
    id = batch[1].numpy()
    
    submission['id'].extend(id)
    submission['label'].extend(map(dog_prediction,results))
    


In [None]:
import pandas as pd

submission_df=pd.DataFrame(submission)
submission_df.to_csv('submission.csv',index=False)
