# Set-Up

In [None]:
# Import required Libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from keras.models import load_model
from keras.utils import to_categorical
from keras.preprocessing import image
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!unzip "/content/gdrive/My Drive/Group 3244-2010-0014/Kaggle Data/petfinder-adoption-prediction.zip"

# Prepare Dataset into Training, Validation and Testing Data

In [None]:
# Read Main CSV File
df = pd.read_csv("/content/train/train.csv")

# Remove Data without Image
df = df[df.PhotoAmt != 0]

# Re-index
df.index = list(range(len(df)))

# Add file extension
df["PetID"] = list(map(lambda x: x + "-1.jpg", df["PetID"]))

# Convert AdoptionSpeed column to Binary values
df["AdoptionSpeed"] = list(map(lambda x: "1" if x != 4 else "0", df["AdoptionSpeed"]))

# Split Dataset into Dogs and Cats separately
df_1 = df[df["Type"] == 1]
df_2 = df[df["Type"] == 2]

df_1.sample(frac = 1, random_state = 6453)
df_2.sample(frac = 1, random_state = 6453)

df_1.index = list(range(len(df_1)))
df_2.index = list(range(len(df_2)))

In [None]:
train_dir = "/content/train_images"
IMG_SIZE = 256
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
batch_size = 128
epochs = 50

In [None]:
train_image_generator = ImageDataGenerator(rescale=1./255, zoom_range=0.3, rotation_range=50,
                                   width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, 
                                   horizontal_flip=True, fill_mode='nearest', validation_split=0.2)
test_image_generator = ImageDataGenerator(rescale=1./255)

In [None]:
# Split a part of data for Testing
df_1_train_amt = int((9/10) * len(df_1))
df_2_train_amt = int((9/10) * len(df_2))

print(df_1_train_amt, df_2_train_amt)

In [None]:
train_data_gen_1 = train_image_generator.flow_from_dataframe(df_1[:df_1_train_amt],
                                                       directory= train_dir,
                                                       x_col='PetID',
                                                       y_col="AdoptionSpeed",
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       subset = "training",
                                                       target_size=(IMG_SIZE,IMG_SIZE),
                                                       class_mode='binary')

validation_data_gen_1 = train_image_generator.flow_from_dataframe(df_1[:df_1_train_amt],
                                                       directory= train_dir,
                                                       x_col='PetID',
                                                       y_col="AdoptionSpeed",
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       subset = "validation",
                                                       target_size=(IMG_SIZE,IMG_SIZE),
                                                       class_mode='binary')

test_data_gen_1 = test_image_generator.flow_from_dataframe(df_1[df_1_train_amt:],
                                                       directory = train_dir,
                                                       x_col = 'PetID',
                                                       y_col = "AdoptionSpeed",
                                                       batch_size = batch_size,
                                                       shuffle = False,
                                                       target_size=(IMG_SIZE,IMG_SIZE),
                                                       class_mode = None)

In [None]:
train_data_gen_2 = train_image_generator.flow_from_dataframe(df_2[:df_2_train_amt],
                                                       directory= train_dir,
                                                       x_col='PetID',
                                                       y_col="AdoptionSpeed",
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       subset = "training",
                                                       target_size=(IMG_SIZE,IMG_SIZE),
                                                       class_mode='binary')

validation_data_gen_2 = train_image_generator.flow_from_dataframe(df_2[:df_2_train_amt],
                                                       directory= train_dir,
                                                       x_col='PetID',
                                                       y_col="AdoptionSpeed",
                                                       batch_size=batch_size,
                                                       shuffle=True,
                                                       subset = "validation",
                                                       target_size=(IMG_SIZE,IMG_SIZE),
                                                       class_mode='binary')

test_data_gen_2 = test_image_generator.flow_from_dataframe(df_2[df_2_train_amt:],
                                                       directory = train_dir,
                                                       x_col = 'PetID',
                                                       y_col = "AdoptionSpeed",
                                                       batch_size = batch_size,
                                                       shuffle = False,
                                                       target_size=(IMG_SIZE,IMG_SIZE),
                                                       class_mode = None)

# Load Previously Trained Model

In [None]:
model_dog = load_model(r"/content/gdrive/My Drive/Group 3244-2010-0014/Code/model_dog.h5")

# Set-up Model

In [None]:
base_model = tf.keras.applications.Xception(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

base_model.trainable = False
base_model.summary()

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
prediction_layer = tf.keras.layers.Dense(1, activation='sigmoid', name='output')

In [None]:
model_dog = tf.keras.Sequential([base_model,
                                 global_average_layer,
                                 prediction_layer])

model_dog.compile(optimizer = SGD(lr=0.001, momentum=0.9),
                  loss ='binary_crossentropy',
                  metrics = ['accuracy'])

model_dog.summary()

In [None]:
model_cat = tf.keras.Sequential([base_model,
                                 global_average_layer,
                                 prediction_layer])

model_cat.compile(optimizer = SGD(lr=0.001, momentum=0.9),
                  loss ='binary_crossentropy',
                  metrics = ['accuracy'])

model_cat.summary()

# Train Model

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience = 10)
history = model_dog.fit(train_data_gen_1,
                    steps_per_epoch = train_data_gen_1.samples // batch_size,
                    validation_data = validation_data_gen_1, 
                    validation_steps = validation_data_gen_1.samples // batch_size,
                    epochs = epochs,
                    callbacks=[es]
                    )

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(len(history.history["loss"]))

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy for 1')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss for 1')
plt.show()

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience = 10)
history2 = model_cat.fit(train_data_gen_2,
                    steps_per_epoch = train_data_gen_2.samples // batch_size,
                    validation_data = validation_data_gen_2, 
                    validation_steps = validation_data_gen_2.samples // batch_size,
                    epochs = epochs,
                    callbacks=[es]
                    )

In [None]:
acc = history2.history['accuracy']
val_acc = history2.history['val_accuracy']

loss=history2.history['loss']
val_loss=history2.history['val_loss']

epochs_range = range(len(history2.history["loss"])) 

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy for 2')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss for 2')
plt.show()

# Save Model

In [None]:
#model_dog.save('model_dog.h5')
#model_cat.save('model_cat.h5')

# Test Model

In [None]:
# Get Predicitions for Dog
dog_prob = model_dog.predict(test_data_gen_1, verbose=1)
dog_pred = list(map(lambda x: 1 if x[0] >= 0.5 else 0, dog_prob))

labels = (train_data_gen_1.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions_1 = [labels[k] for k in dog_pred]

In [None]:
result_1_df = pd.DataFrame({"Category 1 Predictions": predictions_1,
                          "Correct Category 1 Predictions": df_1[df_1_train_amt:]["AdoptionSpeed"]})

result_1_df["Category 1 Predictions"] = result_1_df["Category 1 Predictions"].astype(int)
result_1_df["Correct Category 1 Predictions"] = result_1_df["Correct Category 1 Predictions"].astype(int)

result_1_df["Correct"] = (result_1_df["Category 1 Predictions"] == result_1_df["Correct Category 1 Predictions"])

accuracy_1 = len(result_1_df[result_1_df["Correct"] == True])/len(result_1_df) *100
accuracy_1

In [None]:
result_1_df.to_csv("dog_test.csv")

In [None]:
# Get Predicitions for Cat
cat_prob = model_cat.predict(test_data_gen_2, verbose=1)
cat_pred = list(map(lambda x: 1 if x[0] >= 0.5 else 0, cat_prob))

labels = (train_data_gen_2.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions_2 = [labels[k] for k in cat_pred]

In [None]:
result_2_df = pd.DataFrame({"Category 2 Predictions": predictions_2,
                          "Correct Category 2 Predictions": df_2[df_2_train_amt:]["AdoptionSpeed"]})

result_2_df["Category 2 Predictions"] = result_2_df["Category 2 Predictions"].astype(int)
result_2_df["Correct Category 2 Predictions"] = result_2_df["Correct Category 2 Predictions"].astype(int)
result_2_df["Correct"] = (result_2_df["Category 2 Predictions"] == result_2_df["Correct Category 2 Predictions"])

accuracy_2 = len(result_2_df[result_2_df["Correct"] == True])/len(result_2_df) *100
accuracy_2

In [None]:
result_2_df.to_csv("cat_test.csv")