In [1]:
import tensorflow as tf
print(tf.__version__)
import h5py

2.4.1


In [2]:
train_data_directory = 'D:/data science/cats and dogs/train'
test_data_directory = 'D:/data science/cats and dogs/test'

In [3]:
def write_gap(MODEL, image_size, lambda_func=None):
    width = image_size[0]
    height = image_size[1]
    input_tensor = tf.keras.Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        x = lambda_func(x)
    
    base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
    model = tf.keras.Model(base_model.input, tf.keras.layers.GlobalAveragePooling2D()(base_model.output))

    gen = tf.keras.preprocessing.image.ImageDataGenerator(validation_split=0.2)
    train_generator = gen.flow_from_directory(train_data_directory, image_size, shuffle=False, 
                                              batch_size=batch_size, class_mode='binary')
    test_generator = gen.flow_from_directory(test_data_directory, image_size, shuffle=False, 
                                             batch_size=batch_size, class_mode='binary')

    print('calculating training set')
    train = model.predict(train_generator, len(train_generator))
    print('calculating test set')
    test = model.predict(test_generator, len(test_generator))
    print('write file to disk')
    with h5py.File(f"C:/Users/jiahu/Documents/dogs vs cats/tensorflow/features/gap_{model_name}.h5", 'w') as h:
        h.create_dataset("train", data=train)
        h.create_dataset("test", data=test)
        h.create_dataset("label", data=train_generator.classes)

In [4]:
%%time
# NASNetLarge 
# nasnet.preprocess_input
image_size = (331,331)
batch_size = 8
model_name = 'NASNetLarge'
write_gap(tf.keras.applications.NASNetLarge, image_size, lambda_func=tf.keras.applications.nasnet.preprocess_input)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
calculating training set
calculating test set
write file to disk
Wall time: 10min 14s


In [4]:
%%time
# VGG16
# vgg16.preprocess_input
image_size = (224,224)
batch_size = 8
model_name = 'VGG16'
write_gap(tf.keras.applications.VGG16, image_size, tf.keras.applications.vgg16.preprocess_input)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
calculating training set
calculating test set
write file to disk
Wall time: 2min 43s


In [4]:
%%time
# Xception
# xception.preprocess_input
image_size = (299,299)
batch_size = 64
model_name = 'Xception'
write_gap(tf.keras.applications.Xception, image_size, tf.keras.applications.xception.preprocess_input)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
calculating training set
calculating test set
write file to disk
Wall time: 1min 42s


In [4]:
%%time
# InceptionV3
# inception_v3.preprocess_input
image_size = (299,299)
batch_size = 4
model_name = 'InceptionV3'
write_gap(tf.keras.applications.InceptionV3, image_size, tf.keras.applications.inception_v3.preprocess_input)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
calculating training set
calculating test set
write file to disk
Wall time: 3min 42s


In [4]:
%%time
#ResNet50
# resnet50.preprocess_input
image_size = (224,224)
batch_size = 8
model_name = 'ResNet50'
write_gap(tf.keras.applications.ResNet50, image_size, tf.keras.applications.resnet50.preprocess_input)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
calculating training set
calculating test set
write file to disk
Wall time: 1min 29s


In [1]:
import tensorflow as tf
import h5py
import numpy as np
from sklearn.utils import shuffle
np.random.seed(2021)

X_train = []
X_test = []

for filename in ["C:/Users/jiahu/Documents/dogs vs cats/tensorflow/features/gap_ResNet50.h5", 
                 "C:/Users/jiahu/Documents/dogs vs cats/tensorflow/features/gap_Xception.h5", 
                 "C:/Users/jiahu/Documents/dogs vs cats/tensorflow/features/gap_InceptionV3.h5", 
                 "C:/Users/jiahu/Documents/dogs vs cats/tensorflow/features/gap_VGG16.h5", 
                 "C:/Users/jiahu/Documents/dogs vs cats/tensorflow/features/gap_NASNetLarge.h5"]:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])

X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)

X_train, y_train = shuffle(X_train, y_train)

In [2]:
X_train.shape

(25000, 10688)

In [3]:
X_test.shape

(12500, 10688)

In [4]:
# np.random.seed(2021)

input_tensor = tf.keras.Input(X_train.shape[1:])
# x = tf.keras.layers.Dropout(0.99)(input_tensor)
x = tf.keras.layers.Dense(1, activation='sigmoid')(input_tensor)
model = tf.keras.Model(input_tensor, x)

model.compile(optimizer='Adam',
              loss='binary_crossentropy',
              metrics=['acc'])

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 10688)]           0         
_________________________________________________________________
dense (Dense)                (None, 1)                 10689     
Total params: 10,689
Trainable params: 10,689
Non-trainable params: 0
_________________________________________________________________


In [5]:
# Callbacks
checkpoint = tf.keras.callbacks.ModelCheckpoint("C:/Users/jiahu/Documents/dogs vs cats/tensorflow/model/dog_cat_feature_extraction.h5", 
                             monitor='val_acc', 
                             verbose=1, 
                             save_best_only=True, 
                             save_weights_only=False, 
                             mode='auto', 
                             save_freq='epoch')
early = tf.keras.callbacks.EarlyStopping(monitor='val_acc', 
                      min_delta=0, 
                      patience=10, 
                      verbose=1, 
                      mode='auto')
learning_rate_reduction = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_acc',
                                            patience=2,
                                            verbose=1,
                                            factor=0.1,
                                            min_lr=0.000000001)

In [6]:
nb_epochs = 1000

history = model.fit(
    X_train,
    y_train,
    batch_size=32,
    epochs = nb_epochs,
    validation_split=0.2,
    callbacks=[checkpoint, early, learning_rate_reduction])

Epoch 1/1000

Epoch 00001: val_acc improved from -inf to 0.99220, saving model to C:/Users/jiahu/Documents/dogs vs cats/tensorflow/model\dog_cat_feature_extraction.h5
Epoch 2/1000

Epoch 00002: val_acc improved from 0.99220 to 0.99440, saving model to C:/Users/jiahu/Documents/dogs vs cats/tensorflow/model\dog_cat_feature_extraction.h5
Epoch 3/1000

Epoch 00003: val_acc did not improve from 0.99440
Epoch 4/1000

Epoch 00004: val_acc improved from 0.99440 to 0.99460, saving model to C:/Users/jiahu/Documents/dogs vs cats/tensorflow/model\dog_cat_feature_extraction.h5
Epoch 5/1000

Epoch 00005: val_acc did not improve from 0.99460
Epoch 6/1000

Epoch 00006: val_acc did not improve from 0.99460

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 7/1000

Epoch 00007: val_acc did not improve from 0.99460
Epoch 8/1000

Epoch 00008: val_acc did not improve from 0.99460

Epoch 00008: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 9/1

In [7]:
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)

y_pred.shape



(12500, 1)

In [9]:
import pandas as pd

df = pd.read_csv("D:/data science/cats and dogs/sampleSubmission.csv")

In [10]:
gen = tf.keras.preprocessing.image.ImageDataGenerator()
test_generator = gen.flow_from_directory("D:/data science/cats and dogs/test/", (224, 224), shuffle=False, 
                                         batch_size=16, class_mode=None)

Found 12500 images belonging to 1 classes.


In [21]:
for i, file_dir in enumerate(test_generator.filenames):
    file_name = file_dir.split('\\')[-1]
    file_name, file_ext = file_name.split('.')
    df.at[i-1, 'label'] = y_pred[i]

df.to_csv('pred.csv', index=None)
df.head(10)


Unnamed: 0,id,label
0,1.0,0.005
1,2.0,0.005
2,3.0,0.995
3,4.0,0.995
4,5.0,0.005
5,6.0,0.005
6,7.0,0.995
7,8.0,0.995
8,9.0,0.005
9,10.0,0.005
