In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd   
import os   
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input  
from tensorflow.keras.preprocessing.image import ImageDataGenerator  

In [6]:
seed = 13
tf.random.set_seed(seed)
np.random.seed(seed)

In [7]:
!pwd

/home/aquila/dev_ws/DeepLearning/src


In [8]:
train_df = pd.DataFrame({"file": os.listdir("../data/dogs-vs-cats/train")})
train_df["label"] = train_df["file"].apply(lambda x:x.split(".")[0])

train_df.head()

Unnamed: 0,file,label
0,cat.3136.jpg,cat
1,cat.719.jpg,cat
2,cat.55.jpg,cat
3,dog.6158.jpg,dog
4,cat.6574.jpg,cat


In [9]:
test_df = pd.DataFrame({"file": os.listdir("../data/dogs-vs-cats/test1")})
test_df.head()

Unnamed: 0,file
0,10933.jpg
1,6859.jpg
2,1849.jpg
3,8580.jpg
4,7862.jpg


In [10]:
from sklearn.model_selection import train_test_split  
train_data, val_data = train_test_split(train_df, test_size = 0.2,
                                        stratify = train_df["label"],
                                        random_state = 13)

In [11]:
train_datagen = ImageDataGenerator(
    rotation_range = 15,
    horizontal_flip = True,
    preprocessing_function = preprocess_input
)
val_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

In [12]:
FILES = '../data/dogs-vs-cats/'
batch_size = 160
train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_data,
    directory = FILES + "train/",
    x_col = "file",
    y_col = "label",
    class_mode = "categorical",
    target_size = (224, 224),
    batch_size = batch_size,
    seed = 13
)

Found 20000 validated image filenames belonging to 2 classes.


In [14]:
val_generator = val_datagen.flow_from_dataframe(
    dataframe = val_data,
    directory = FILES + "train/",
    x_col = "file",
    y_col = "label",
    class_mode = "categorical",
    target_size = (224, 224),
    batch_size = batch_size,
    seed = 13,
    shuffle = False
)

Found 5000 validated image filenames belonging to 2 classes.


In [15]:
base_model = VGG16(
    weights = "imagenet",
    input_shape = (224, 224, 3),
    include_top = False
)

In [None]:
base_model.summary()

In [None]:
for layers in base_model.layers:
    layers.trainable = False
base_model.summary()

In [None]:
def vgg16_pretrained():
    model = Sequential(
        [
            base_model,
            GlobalAveragePooling2D(),
            Dense(100, activation = "relu"),
            Dropout(0.4),
            Dense(64, activation = "relu"),
            Dense(2, activation = "softmax")
        ]
    )    
    return model  

In [None]:
tf.keras.backend.clear_session()

In [None]:
model = vgg16
model.compile(loss = "categorical_crossentropy",
                  optimizer = "adam",
                  metrics = "accuracy"
                  )
model.summary()

In [None]:
reduce_lr = tf.keras.callbacks.ReduceROnPlateau(
    monitor = "val_accuracy",
    patience = 2,
    verbose = 1,
    factor = 0.5,
    min_lr = 0.000000001
)

In [None]:
early_stopping = tf.keras.callbacks.Earlystopping(
    monitor = "val_accuracy",
    patience = 5,
    verbose = 1,
    mode = "max"
)

In [None]:
checkpoint = tf.keras.cdallbacks.ModelCheckpoint(
    monitor = "val_accuracy",
    filepath = "catdog_vgg16_.{epoch:02d}-{val_accuracy:.6f}.hdf5",
    verbose = 1,
    save_best_only = True,
    save_weights_only = True
)

In [None]:
history = model.fit(
    train_generator,
    epochs = 10,
    validation_data = val_generator,
    validation_steps = val_data.shape[0]
    steps_per_epoch = train_data.shape[0]
    callbacks = [reduce_lr, early_stopping, cbheckpoint]
)

In [None]:
import matplotlib.pyplot as plt  
import seaborn as sns  

fig, axes = plt.subplots(1, 2, figsize = (12, 4))
sns.lineplot(x = range(len(history.history["loss"])),
             y = history.history["loss"], ax = axes[0]
             label = "Training Loss")
sns.lineplot(x = range(len(history.history["loss"])),
             y = history.history["loss"], ax = axes[0]
             label = "Validation Loss")
sns.lineplot(x = range(len(history.history["accuracy"])),
             y = history.history["accuracy"], ax = axes[1]
             label = "Training Accuracy")
sns.lineplot(x = range(len(history.history["accuracy"])),
             y = history.history["val_accuracy"], ax = axes[1]
             label = "Validation Accuracy")
axes[0].set_title("Loss"); axes[1].set_title("Accuracy")
sns.dispine()
plt.show()

In [None]:
val_pred = model.predict(val_generator,
                         steps = np.ceil(val_data.shape[0]/batch_size))
val_data.loc[:, "val_pred"] = np.argmax(val_pred, axis = 1)
labels = dict((v, k) for k , v in val_generator.class_indices.items())
val_data.loc[:, "val_pred"] = val_data.loc[:, "val_pred"].map(labels)

In [None]:
from sklearn.metrics import confusion_matrix  
from sklearn.metrics import ConfusionMatrixDisplay  

fig, ax = plt.subplots(figsize = (9, 6))
cm = confusion_matrix(val_data["label"], val_data["val_pred"])
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ["cat", "dog"])
disp.plot(cmap = plt.cm.Blues, ax = ax)

ax.set_title("Validation Set")
plt.show()

In [None]:
val_errors = val_data[(val_data.label) != (val_data.val_pred)].reset_index(drop = True)
val_errors

In [None]:
fig = plt.figure(1, figsize = (24, 20))
for i in range(81):
    if i == len(val_errors):
        break
    plt.subplot(9, 9, i+1)
    image = plt.imread("./dogs-vs-cats/train/" + val_errors.file[i])
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"True Value: {val_errors['label'][i]} \nPrediction:
    {val_errors['val_pred'][i]}")
plt.tight_layout()
plt.show()

In [None]:
x