# Duck or Cat

DOC - Duck or Cat is a binary classification model. It classifies pictures of ducks and cats.


[GitHub link](https://github.com/GregoryHue/DuckOrCat)

# GPU Installation check

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf 
print(tf.__version__)

print('1: ', tf.config.list_physical_devices('GPU'))
print('2: ', tf.test.is_built_with_cuda)
print('3: ', tf.test.gpu_device_name())
print('4: ', tf.config.get_visible_devices())

print(tf.test.is_built_with_cuda())

# Imports

In [None]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.utils import to_categorical, load_img
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import random
import os

# Checking directory

In [None]:
!ls

# Setting global variables

In [None]:
TRAIN_PATH = "dataset/train/"
TEST_PATH = "dataset/test/"
CATEGORIES = ['Duck', 'Cat']
EPOCHS = 10
IMAGE_WIDTH=128
IMAGE_HEIGHT=128
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3
BATCH_SIZE=15

# Model structure

In [None]:
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, activation='softmax'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#model.summary()

# Data exploration

In [None]:
folder = os.listdir(TRAIN_PATH)
classes = []
filenames = []

for category in CATEGORIES:
  for f in folder:
    if category.casefold() in f.casefold():
      filenames.append(f)
      classes.append(category)

df = pd.DataFrame({
    'filename': filenames,
    'category': classes
})

## Structure

In [None]:
df.head()

In [None]:
df.tail()

## Category repartition

In [None]:
df['category'].value_counts().plot.bar()

## Data sample

In [None]:
random_file = random.choice(os.listdir(TRAIN_PATH))
print(TRAIN_PATH + random_file)
image = load_img(TRAIN_PATH + random_file)
plt.imshow(image)

## Callbacks

### Early stop

In [None]:
earlystop = EarlyStopping(patience=10)

### Learning rate reduction

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

### Checkpoints

In [None]:
checkpoint_path = 'checkpoints/cp-{epoch:04d}.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_path)

checkpoints = ModelCheckpoint(checkpoint_path,
                              verbose=1,
                              save_weights_only=True,
                              save_freq="epoch")

In [None]:
callbacks = [earlystop, learning_rate_reduction, checkpoints]

# Train and validate dataset

# Spliting datasets

In [None]:
train_df, validate_df = train_test_split(df, test_size=0.2, random_state=42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

test_filenames = os.listdir(TEST_PATH)
test_df = pd.DataFrame({
    'filename': test_filenames
})

TOTAL_TRAIN = train_df.shape[0]
TOTAL_VALIDATE = validate_df.shape[0]
TOTAL_TEST = test_df.shape[0]

## Generators

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    TRAIN_PATH, 
    x_col='filename',
    y_col='category',
    color_mode='rgb',
    target_size=IMAGE_SIZE,
    class_mode='binary',
    batch_size=BATCH_SIZE
)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    TRAIN_PATH, 
    x_col='filename',
    y_col='category',
    color_mode='rgb',
    target_size=IMAGE_SIZE,
    class_mode='binary',
    batch_size=BATCH_SIZE
)

test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    TEST_PATH, 
    x_col='filename',
    y_col=None,
    class_mode=None,
    color_mode='rgb',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

## Model fitting

In [None]:


history = model.fit(
    train_generator, 
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=TOTAL_VALIDATE//BATCH_SIZE,
    steps_per_epoch=TOTAL_TRAIN//BATCH_SIZE,
    callbacks=callbacks
)

In [None]:
model.save('my_model.h5')

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 6))
ax1.plot(history.history['loss'], color='b', label="Training loss")
ax1.plot(history.history['val_loss'], color='r', label="validation loss")
ax1.set_xticks(np.arange(1, EPOCHS, 1))
ax1.set_yticks(np.arange(0, 1, 0.1))

ax2.plot(history.history['accuracy'], color='b', label="Training accuracy")
ax2.plot(history.history['val_accuracy'], color='r',label="Validation accuracy")
ax2.set_xticks(np.arange(1, EPOCHS, 1))

legend = plt.legend(loc='best', shadow=True)
plt.tight_layout()
plt.show()

In [None]:
predict = model.predict(test_generator, steps=np.ceil(TOTAL_TEST/BATCH_SIZE))

In [None]:
test_df['category'] = np.argmax(predict, axis=-1)
print(train_generator.class_indices)
label_map = dict((v,k) for k,v in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)
test_df['category'].value_counts().plot.bar()

In [None]:
sample_test = test_df.head(18)
sample_test.head()
plt.figure(figsize=(6, 12))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img(TEST_PATH + filename, target_size=IMAGE_SIZE)
    plt.subplot(6, 3, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ')' )
plt.tight_layout()
plt.show()

In [None]:
results = model.evaluate(validation_generator, batch_size=BATCH_SIZE)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

In [None]:

predictions = (model.predict(validation_generator, batch_size=BATCH_SIZE) >= 0.5).astype(int)


cm = confusion_matrix(validation_generator.labels, predictions)
clr = classification_report(validation_generator.labels, predictions)

plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='g', vmin=0, cmap='Blues', cbar=False)
plt.xticks(ticks=[0.5, 1.5], labels=CATEGORIES)
plt.yticks(ticks=[0.5, 1.5], labels=CATEGORIES)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

print("Classification Report:\n----------------------\n", clr)