In [None]:
%matplotlib inline

import random
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import cv2

from keras.models import Sequential, Model
from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, Input
from keras.layers import GlobalAveragePooling2D, Layer, Reshape, Conv2DTranspose, UpSampling2D
from keras.applications import vgg19, xception, inception_v3, resnet50
from keras.regularizers import l2
from keras.optimizers import Adam
from sklearn.datasets import load_files
from keras.preprocessing import image
from tensorflow import keras
from glob import glob
from pathlib import Path
from keras.utils import np_utils
from PIL import ImageFile
from sklearn.metrics import f1_score, recall_score, precision_score, confusion_matrix, accuracy_score

In [None]:
# Function to load the dataset files
def dataset_loader(filepath):
    dataset = load_files(filepath)
    return np.asarray(dataset['filenames']), np_utils.to_categorical(np.asarray(dataset['target']), 133)

train_files, train_labels = dataset_loader("D:/College/Courses/Spring 21/Neural Networks/final/dog-breed-prediction/dogs/train")
val_files, val_labels = dataset_loader("D:/College/Courses/Spring 21/Neural Networks/final/dog-breed-prediction/dogs/valid")
test_files, test_labels = dataset_loader("D:/College/Courses/Spring 21/Neural Networks/final/dog-breed-prediction/dogs/test")

In [None]:
# Make a lookup dictionary of label : dog breed
dog_breed_labels = glob("dogs/train/*/")
dog_breed_labels = [label[15:-1] for label in dog_breed_labels]
dog_breed_labels_lookup = dict(zip(list(range(133)), dog_breed_labels))

In [None]:
# Detect if dog in picture using resnet50
ResNet50_dog = resnet50.ResNet50(weights = 'imagenet')
def is_dog(model, filepath):
    input_image = np.expand_dims(image.img_to_array(image.load_img(filepath, target_size = (224,224))), axis=0)
    resnet50_image = resnet50.preprocess_input(input_image)
    pred_label = np.argmax(model.predict(resnet50_image))
    print(pred_label)
    return (pred_label >= 151 and pred_label <= 268)

In [None]:
# Detect if human in picture
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
def is_human(filepath):
    gray_image = cv2.cvtColor(cv2.imread(filepath), cv2.COLOR_BGR2GRAY)
    return len(face_cascade.detectMultiScale(gray_image)) > 0

In [None]:
train_data = []
val_data = []
test_data = []

ImageFile.LOAD_TRUNCATED_IMAGES = True

# Load the datasets
for filepath in train_files:
    train_data.append(image.img_to_array(image.load_img(filepath, target_size = (224,224))))
train_data = np.asarray(train_data)

for filepath in val_files:
    val_data.append(image.img_to_array(image.load_img(filepath, target_size = (224,224)))) 
val_data = np.asarray(val_data)

for filepath in test_files:
    test_data.append(image.img_to_array(image.load_img(filepath, target_size = (224,224))))
test_data = np.asarray(test_data)

In [None]:
# Augment the dataset with rotated, shifted, zoomed images
image_gen = image.ImageDataGenerator(rotation_range = 25, width_shift_range = 1.3, height_shift_range = 1.3, zoom_range = 0.2)
augmented_iter = image_gen.flow(train_data, train_labels, save_to_dir = "augmented/", save_format = "jpeg")

In [None]:
# Compile and fit our CNN model
original_model = Sequential([
    Input(shape=(224,224,3)),
    Conv2D(16, kernel_size = (3,3), strides = (2,2), activation = 'relu'),
    MaxPooling2D(pool_size = (2,2)),
    Conv2D(32, kernel_size = (3,3), strides = (1,1), activation = 'relu'),
    MaxPooling2D(pool_size = (2,2)),
    Conv2D(64, kernel_size = (3,3), strides = (1,1), activation = 'relu'),
    MaxPooling2D(pool_size = (2,2)),
    Conv2D(128, kernel_size = (3,3), activation = 'relu'),
    MaxPooling2D(pool_size = (2,2)),
    Dropout(0.4),
    Flatten(),
    Dense(1000, activation = 'relu'),
    Dropout(0.4),
    Dense(133, activation = 'softmax')
])

original_model.summary()

original_model.compile(loss = 'categorical_crossentropy' , optimizer = 'rmsprop', metrics = ['accuracy'])
original_model_hist = original_model.fit(train_data, train_labels, validation_data = (val_data, val_labels), epochs = 20)

In [None]:
# Plot training and validation accuracy
plt.plot(original_model_hist.history['accuracy'])
plt.plot(original_model_hist.history['val_accuracy'])
plt.title("Training Data Model Accuracy")
plt.ylabel("accuracy")
plt.xlabel("epochs")
plt.legend(["train", 'validation'])
plt.ylim([0.0, 0.3])
plt.xlim([0, 20])
plt.show()
# plt.savefig("Training.jpg")


In [None]:
# Evaluate model
original_model.evaluate(test_data, test_labels)
pred_labels_original = original_model.predict(test_data)

print("f1 score:", f1_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_original, axis = 1), depth = 133), average = 'weighted'))
print("precision score:", precision_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_original, axis = 1), depth = 133), average = 'weighted'))
print("recall score:", recall_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_original, axis = 1), depth = 133), average = 'weighted'))
original_model_CM = confusion_matrix(test_labels.argmax(axis=1), tf.math.argmax(pred_labels_original, axis = 1), labels = list(range(133)))

In [None]:
# Get the top 5 accuracy
top_5 = np.argsort(pred_labels_original, axis=1)[:, -5:]
test_labels_max = tf.math.argmax(test_labels, axis=1)
top_5_accuracy = np.mean(np.array([1 if test_labels_max[k] in top_5[k] else 0 for k in range(len(top_5))]))
print("Top 5 accuracy:", top_5_accuracy)

In [None]:
# Get top 5 misclassified pairs
negative_preds = []
for i in range(133):
    for j in range(133):
        if not i == j:
            negative_preds.append([dog_breed_labels_lookup[i], dog_breed_labels_lookup[j], original_model_CM[i,j]])
            
negative_preds.sort(key=lambda x: x[2], reverse=True)
negative_preds[:5]

In [None]:
# Train model on augmented data as well
original_model_aug_hist = original_model.fit(augmented_iter, validation_data = (val_data, val_labels), epochs = 20)

In [None]:
plt.plot(original_model_aug_hist.history['accuracy'])
plt.plot(original_model_aug_hist.history['val_accuracy'])
plt.title("Augmented Data Model Accuracy")
plt.ylabel("accuracy")
plt.xlabel("epochs")
plt.legend(["train", 'validation'])
plt.ylim([0.0, 0.4])
plt.xlim([0, 20])
plt.show()
# plt.savefig("Augmented.jpg")


In [None]:
# Random baseline
pred_labels_random = tf.one_hot(np.random.randint(0, 133, len(test_labels)), depth = 133)
print("f1 score:", f1_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_random, axis = 1), depth = 133), average = 'weighted'))
print("precision score:", precision_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_random, axis = 1), depth = 133), average = 'weighted'))
print("recall score:", recall_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_random, axis = 1), depth = 133), average = 'weighted'))
print("accuracy score:", accuracy_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_random, axis = 1), depth = 133)))


In [None]:
# Get the top 5 accuracy for augmented model
top_5 = np.random.randint(0, 133, (len(test_labels), 5))
test_labels_max = tf.math.argmax(test_labels, axis=1)
top_5_accuracy = np.mean(np.array([1 if test_labels_max[k] in top_5[k] else 0 for k in range(len(top_5))]))
print("Top 5 accuracy:", top_5_accuracy)

In [None]:
# Evaluate model fitted on augmented data
original_model.evaluate(test_data, test_labels)
pred_labels_aug = original_model.predict(test_data)

print("f1 score:", f1_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_aug, axis = 1), depth = 133), average = 'weighted'))
print("precision score:", precision_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_aug, axis = 1), depth = 133), average = 'weighted'))
print("recall score:", recall_score(test_labels, tf.one_hot(tf.math.argmax(pred_labels_aug, axis = 1), depth = 133), average = 'weighted'))
aug_model_CM = confusion_matrix(test_labels.argmax(axis=1), tf.math.argmax(pred_labels_aug, axis = 1), labels = list(range(133)))

In [None]:
# Get the top 5 accuracy for augmented model
top_5 = np.argsort(pred_labels_aug, axis=1)[:, -5:]
test_labels_max = tf.math.argmax(test_labels, axis=1)
top_5_accuracy = np.mean(np.array([1 if test_labels_max[k] in top_5[k] else 0 for k in range(len(top_5))]))
print("Top 5 accuracy:", top_5_accuracy)

In [None]:
# Get top 5 misclassified pairs of breeds
negative_preds = []
for i in range(133):
    for j in range(133):
        if not i == j:
            negative_preds.append([dog_breed_labels_lookup[i], dog_breed_labels_lookup[j], original_model_CM[i,j]])
            
negative_preds.sort(key=lambda x: x[2], reverse=True)
negative_preds[:5]

In [None]:
#vgg19 model
# vgg19_features = vgg19.VGG19(include_top = False, input_shape = (224,224,3))
# global1 = GlobalAveragePooling2D()(vgg19_features.output)
# dropout1 = Dropout(0.3)(global1)
# output1 = Dense(133, activation = 'softmax')(dropout1)
# vgg19_model = Model(inputs = vgg19_features.inputs, outputs = output1)
# vgg19_model.compile(loss = 'categorical_crossentropy' , optimizer = 'rmsprop', metrics = ['accuracy'])
# vgg19_model_hist = vgg19_model.fit(train_data, train_labels, validation_data = (val_data, val_labels), epochs = 10)

In [None]:
# xception model
# xception_features = xception.Xception(include_top = False, input_shape = (224,224,3))
# global2 = GlobalAveragePooling2D()(xception_features.output)
# dropout2 = Dropout(0.3)(global2)
# output2 = Dense(133, activation = 'softmax')(dropout2)
# xception_model = Model(inputs = xception_features.inputs, outputs = output2)
# xception_model.compile(loss = 'categorical_crossentropy' , optimizer = 'rmsprop', metrics = ['accuracy'])
# xception_model_hist = xception_model.fit(train_data, train_labels, validation_data = (val_data, val_labels), epochs = 5)


In [None]:
# Function that takes in a test image, and returns corresponding model output
def classify_image(img_path):
    image_input = np.expand_dims(image.img_to_array(image.load_img(img_path, target_size = (224,224))), axis=0)
    if is_dog(ResNet50_dog, img_path):
        print("The breed is:", dog_breed_labels_lookup[original_model.predict(image_input).argmax(axis=1)[0]])
    elif is_human(img_path):
        print("The human looks like:", dog_breed_labels_lookup[original_model.predict(image_input).argmax(axis=1)[0]])
    else:
        print("Error, neither human nor dog found.")

In [None]:
# Add training data with lower image dimensions
train_data_vae = []

for filepath in train_files:
    train_data_vae.append(image.img_to_array(image.load_img(filepath, target_size = (64,64))))

# Normalize the values 
train_data_vae = np.asarray(train_data_vae).astype('float32') / 255

# Add testing data in same fashion
test_data_vae = []

for filepath in test_files:
    test_data_vae.append(image.img_to_array(image.load_img(filepath, target_size = (64,64))))
    
test_data_vae = np.asarray(test_data_vae).astype('float32') / 255
    
# Encoder architecture
input_img = Input(shape=(64, 64, 3))  
x = Conv2D(48, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(96, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(192, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
encoded_layer = Conv2D(32, (1, 1), activation='relu', padding='same')(x)

# latent space dimensions
latent_dim = (8,8,32)

# Decoder architecture
latent_input = Input(shape = latent_dim)
x = Conv2D(192, (1, 1), activation='relu', padding='same')(latent_input)
x = UpSampling2D((2, 2))(x)
x = Conv2D(192, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(96, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(48, (3, 3), activation='relu', padding='same')(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

# COMPILE
encoder = Model(input_img, encoded_layer)
decoder = Model(latent_input, decoded)
autoencoder = Model(input_img, decoder(encoded))

autoencoder.compile(optimizer='Adam', loss='binary_crossentropy')

In [None]:
# Fit autoencoder on training data
autoencoder.fit(train_data_vae, train_data_vae, epochs = 5)

In [None]:
# Get decoded images 
decoded_imgs = decoder.predict(encoder.predict(test_data_vae))

In [None]:
# Plot original and reconstructed images
plt.figure(figsize=(20, 4))
for i in range(10):
    # display original
    ax = plt.subplot(2, 10, i + 1)
    plt.imshow(test_data_vae[i])
    plt.title("original")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, 10, i + 1 + 10)
    plt.imshow(decoded_imgs[i])
    plt.title("reconstructed")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

In [None]:
# Encode images to latent space
encoded_imgs = encoder.predict(test_data_vae)

In [None]:
# Try to walk around from one image in latent space to another
encoded_imgs_2 = encoded_imgs[:10]
delta = 0.4
for i in range(10):
    encoded_imgs_2[i] += delta

decoded_imgs_2 = decoder.predict(encoded_imgs_2)

In [None]:
# Plot new images
plt.figure(figsize=(20, 4))
for i in range(10):
    # display reconstruction
    ax = plt.subplot(2, 10, i + 1 + 10)
    plt.imshow(decoded_imgs_2[i])
    plt.title("reconstructed")
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)