In [1]:

import os
import zipfile

import numpy as np 
import pandas as pd 

import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.models import load_model


from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt


In [2]:

TRAIN_PATH = "../input/dogs-vs-cats-redux-kernels-edition/train.zip"
TEST_PATH = "../input/dogs-vs-cats-redux-kernels-edition/test.zip"

UNZIP_DATA = "../kaggle/files/unzipped/"
UNZIP_TRAIN = "../kaggle/files/unzipped/train"
UNZIP_TEST = "../kaggle/files/unzipped/test"

BATCH_SIZE = 32

SEED = 88888
IMG_SIZE = 224
EPOCHS = 100




In [3]:

with zipfile.ZipFile(TRAIN_PATH, 'r') as zipp:
    zipp.extractall(UNZIP_DATA)
    print('Done!')
    
with zipfile.ZipFile(TEST_PATH, 'r') as zipp:
    zipp.extractall(UNZIP_DATA)
    print('Done!')

Done!
Done!


In [4]:
training_images_files = os.listdir("../kaggle/files/unzipped/train")
test_image_files =os.listdir("../kaggle/files/unzipped/test")


In [5]:
#Creating DataFrame with file names, class names and paths  for training samples
classes = [name.split('.')[0] for name in training_images_files]

train = pd.DataFrame({
    'filename': training_images_files,
    'class': classes,
})

train['Path'] = train['filename'].apply(
    lambda filename: os.path.join('../kaggle/files/unzipped/test', filename))

display(train.head())


#Creating DataFrame with file names and  IDs  for test samples
test = pd.DataFrame(data = test_image_files, columns = ['filename'])
test['id'] = test['filename'].apply(lambda f: int(f.split('.')[0]))
test.sort_values(by = 'id', inplace = True, ignore_index = True)
test.head()



Unnamed: 0,filename,class,Path
0,dog.890.jpg,dog,../kaggle/files/unzipped/test/dog.890.jpg
1,dog.1178.jpg,dog,../kaggle/files/unzipped/test/dog.1178.jpg
2,dog.7845.jpg,dog,../kaggle/files/unzipped/test/dog.7845.jpg
3,dog.4632.jpg,dog,../kaggle/files/unzipped/test/dog.4632.jpg
4,cat.3660.jpg,cat,../kaggle/files/unzipped/test/cat.3660.jpg


Unnamed: 0,filename,id
0,1.jpg,1
1,2.jpg,2
2,3.jpg,3
3,4.jpg,4
4,5.jpg,5


In [6]:
#split train data tu train and validation sets
train_df, valid_df = train_test_split(train, 
                                      test_size = .10, 
                                      shuffle=True ,
                                      random_state=SEED)


In [7]:
# 
train_generator = ImageDataGenerator(preprocessing_function=preprocess_input,
                                    rotation_range=65,
                                    shear_range=0.1,
                                    zoom_range=0.2,
                                    horizontal_flip=False,
                                    vertical_flip=True,
                                    width_shift_range=0.2,
                                    height_shift_range=0.2
                                    )

    

train_generator = train_generator.flow_from_dataframe(
            train_df,
            UNZIP_TRAIN,
            x_col='filename',
            y_col='class',
            target_size=(IMG_SIZE,IMG_SIZE),
            batch_size=32,
            class_mode='binary')


validation_generator = ImageDataGenerator(preprocessing_function=preprocess_input)


validation_generator = validation_generator.flow_from_dataframe(
            valid_df,
            UNZIP_TRAIN,
            x_col = 'filename',
            y_col = 'class',
            target_size = (IMG_SIZE,IMG_SIZE),
            batch_size=32,
            class_mode='binary'

)


Found 22500 validated image filenames belonging to 2 classes.
Found 2500 validated image filenames belonging to 2 classes.


In [8]:
pre_trained_model = EfficientNetB0(input_shape = (IMG_SIZE, IMG_SIZE, 3),
                                include_top = False,
                                weights = 'imagenet')

for layer in pre_trained_model.layers:
    layer.trainable = False


2023-02-15 21:10:08.816954: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-15 21:10:08.940901: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-15 21:10:08.941908: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-15 21:10:08.943907: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


In [9]:
last_layer = pre_trained_model.get_layer('top_activation')

last_output=last_layer.output

def create_model(last_output):
    
    x=keras.layers.GlobalAveragePooling2D()(last_output)
    x=keras.layers.BatchNormalization()(x)
    x=keras.layers.Dense(1, activation='sigmoid')(x)

    model = Model(pre_trained_model.input, x)

    model.compile(optimizer = RMSprop(learning_rate=0.001),
                  loss = 'binary_crossentropy',
                  metrics = ['acc'])
    return model

model=create_model(last_output)
#model.summary()

In [10]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
                                            patience=5,
                                            verbose=1,
                                            factor=0.5,
                                            min_delt=0.001,
                                            min_lr=0.00001)

early_stopping = EarlyStopping(monitor = "val_loss",
                               patience = 20,
                               verbose = 1,
                               mode = "min"
                              )

model_save = ModelCheckpoint(filepath="/kaggle/working/", 
                             monitor = "val_loss",
                             verbose=1, 
                             save_best_only=True,
                             mode="min")


In [11]:
#history = model.fit(
 #   train_generator,
 #   validation_data = validation_generator,
 #   epochs = EPOCHS,
 #   callbacks = [learning_rate_reduction, early_stopping, model_save],
 #   )

In [12]:
model.save('./dog_cat_model')

2023-02-15 21:10:27.417611: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


In [13]:
model = load_model("/kaggle/input/trained-model/ModelCat_Doc")

In [14]:

def plot_hist(hist):
    plt.plot(hist.history["acc"])
    plt.plot(hist.history["val_acc"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()


#plot_hist(history)

In [15]:

test_gen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_generator = test_gen.flow_from_dataframe(
    test, 
    UNZIP_TEST, 
    x_col='filename',
    #y_col=None,
    class_mode= None,
    target_size=(IMG_SIZE,IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=False
)


predict = model.predict(test_generator, verbose = 1)

Found 12500 validated image filenames.


2023-02-15 21:11:01.693022: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2023-02-15 21:11:04.388329: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005




In [16]:

test["predict"] = predict
test["label"] = test["predict"]
result = test[["id", "label"]]

In [17]:
result.to_csv('submission.csv', index=False)