# Importing libraries

In [None]:
import tensorflow as tf
import seaborn as sns
import numpy as np

from PIL import Image
import glob
from collections import defaultdict
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers

In [None]:
# Sizing Seaborn plots for screenshots
sns.set_theme(rc={'figure.figsize':(4,3)})

# Preprocessing

In [4]:
# Specifying final image size and resizing to that size
IMG_SIZE = (94, 125)
def pixels_from_path(file_path):
    im = Image.open(file_path)

    im = im.resize(IMG_SIZE)
    np_im = np.array(im)
    # Returns 3D array of RGB values for image
    return np_im

In [5]:
# Input relevant filepath before '/cat'
cat_filepath = 'cat/*'
dog_filepath = 'dog/*'
tiger_filepath = 'tiger/*'
lion_filepath = 'lion/*'

In [6]:
shape_counts = defaultdict(int)
for i, cat in enumerate(glob.glob(cat_filepath)):
    # Will print iteration no. if i+1 is a multiple of 500
    if (i+1)%500==0:
        print(i)
    img_shape = pixels_from_path(cat).shape
    shape_counts[str(img_shape)]= shape_counts[str(img_shape)]+ 1

499
999
1499
1999
2499
2999
3499
3999
4499
4999
5499
5999
6499
6999
7499
7999
8499
8999
9499
9999
10499
10999
11499
11999
12499


In [7]:
shape_items = list(shape_counts.items())
shape_items.sort(key = lambda x: x[1])
shape_items.reverse()

In [8]:
# 10% of the data will be used for validation
validation_size = 0.1
img_size = IMG_SIZE # resize images to be 0.25x most common shape (374x500)
num_channels = 3 # RGB
sample_size = 25000 # Using all training data for the sample size

In [9]:
len(glob.glob(cat_filepath))

12500

In [10]:
pixels_from_path(glob.glob(cat_filepath)[5]).shape

(125, 94, 3)

In [None]:
# Training size
SAMPLE_SIZE = 11250
SAMPLE_SIZE_TUNING = 200    # different due to different dataset size

# Validation size
valid_size = 1250
valid_size_tuning = 20      # different due to different dataset size

In [None]:
print("loading training cat images...")
cat_train_set = np.asarray([pixels_from_path(cat) for cat in glob.glob(cat_filepath)[:SAMPLE_SIZE]])
print("loading training dog images...")
dog_train_set = np.asarray([pixels_from_path(dog) for dog in glob.glob(dog_filepath)[:SAMPLE_SIZE]])
print("loading training tiger images...")
tiger_train_set_rgba = np.asarray([pixels_from_path(tiger) for tiger in glob.glob(tiger_filepath)[:SAMPLE_SIZE_TUNING]])
tiger_train_set = tiger_train_set_rgba[..., :3]     # rgb only
print("loading training lion images...")
lion_train_set_rgba = np.asarray([pixels_from_path(lion) for lion in glob.glob(lion_filepath)[:SAMPLE_SIZE_TUNING]])
lion_train_set = lion_train_set_rgba[..., :3]       # rgb only

In [None]:
print("loading validation cat images...")
cat_valid_set = np.asarray([pixels_from_path(cat) for cat in glob.glob(cat_filepath)[-valid_size:]])
print("loading validation dog images...")
dog_valid_set = np.asarray([pixels_from_path(dog) for dog in glob.glob(dog_filepath)[-valid_size:]])
print("loading training tiger images...")
tiger_valid_set_rgba = np.asarray([pixels_from_path(tiger) for tiger in glob.glob(tiger_filepath)[-valid_size_tuning:]])
tiger_valid_set = tiger_valid_set_rgba[..., :3]     # rgb only
print("loading training lion images...")
lion_valid_set_rgba = np.asarray([pixels_from_path(lion) for lion in glob.glob(lion_filepath)[-valid_size_tuning:]])
lion_valid_set = lion_valid_set_rgba[..., :3]       # rgb only

In [None]:
x_train = np.concatenate([cat_train_set, dog_train_set])
# Applying labels based on sample size because data are currently ordered by class
labels_train = np.asarray([1 for _ in range(SAMPLE_SIZE)]+[0 for _ in range(SAMPLE_SIZE)])

In [None]:
x_tune = np.concatenate([tiger_train_set, lion_train_set])
# Applying labels based on sample size because data are currently ordered by class
labels_tune = np.asarray([1 for _ in range(SAMPLE_SIZE_TUNING)]+[0 for _ in range(SAMPLE_SIZE_TUNING)])

In [None]:
x_valid = np.concatenate([cat_valid_set, dog_valid_set])
# Applying labels based on validation sample size because data are currently ordered by class
labels_valid = np.asarray([1 for _ in range(valid_size)]+[0 for _ in range(valid_size)])

In [None]:
x_valid_tune = np.concatenate([tiger_valid_set, lion_valid_set])
# Applying labels based on validation sample size because data are currently ordered by class
labels_valid_tune = np.asarray([1 for _ in range(valid_size_tuning)]+[0 for _ in range(valid_size_tuning)])

In [None]:
# Reshape labels to match output
labels_train = labels_train.reshape(-1,1)
labels_valid = labels_valid.reshape(-1,1)

In [None]:
labels_tune = labels_tune.reshape(-1,1)
labels_valid_tune = labels_valid_tune.reshape(-1,1)

In [None]:
x_train.shape

In [None]:
labels_train.shape

In [None]:
labels_train[:10]  # Checking values to ensure they're not None

In [None]:
labels_train[22490:]  # Checking values to ensure they're not None

In [None]:
# Fully connected layer neuron number
fc_layer_size = 256

# CNN from A2

In [None]:
# Convolution parameters
conv_inputs = keras.Input(shape=(img_size[1], img_size[0],3), name='ani_image')
conv_layer = layers.Conv2D(128, kernel_size=3, activation='relu')(conv_inputs)
conv_layer = layers.MaxPool2D(pool_size=(2,2))(conv_layer)

conv_layer = layers.Conv2D(128, kernel_size=3, activation='relu')(conv_layer)
conv_layer = layers.MaxPool2D(pool_size=(2,2))(conv_layer)

conv_x = layers.Flatten(name = 'flattened_features')(conv_layer) #turn image to vector.

conv_x = layers.Dense(fc_layer_size, activation='relu', name='first_layer')(conv_x)
conv_x = layers.Dense(fc_layer_size, activation='relu', name='second_layer')(conv_x)
conv_outputs = layers.Dense(1, activation='sigmoid', name='class')(conv_x)

catdog_model = keras.Model(inputs=conv_inputs, outputs=conv_outputs)

In [None]:
customAdam = keras.optimizers.Adam(learning_rate=1e-6)
catdog_model.compile(optimizer=customAdam,  # Optimizer
                        # Loss function to minimize
                        loss="BinaryCrossentropy",
                        # List of metrics to monitor
                        metrics=["BinaryCrossentropy","MeanSquaredError", "accuracy"])

In [None]:
print('# Fit model on training data')

history = catdog_model.fit(x_train,
                              labels_train,
                              batch_size=64,
                              shuffle = True,
                              epochs=30,
                              validation_data=(x_valid, labels_valid))

In [None]:
# Evaluating CNN model predictions on validation data

preds = np.asarray(preds).flatten()
labels_flat = np.asarray(labels_valid).flatten()

preds = catdog_model.predict(x_valid)
preds = np.asarray([pred[0] for pred in preds])

np.corrcoef(preds, labels_flat)

In [None]:
# Saving model
catdog_model.save('untuned_model.keras')

# Fine-tuning model on big cats

In [None]:
# Loading model to variable
untuned_model = keras.models.load_model('untuned_model.keras')

In [None]:
for layer in untuned_model.layers:
    if isinstance(layer, layers.Conv2D):
        layer.trainable = False

In [None]:
# Remove the classification head
conv_x = catdog_model.get_layer("flattened_features").output

# Add new dense layers for big cats
conv_x = layers.Dense(fc_layer_size, activation='relu', name='new_fc1')(conv_x)
conv_outputs = layers.Dense(1, activation='sigmoid', name='new_class')(conv_x)

# Create the new model
lion_tiger_model = keras.Model(inputs=catdog_model.input, outputs=conv_outputs)

In [None]:
customAdam = keras.optimizers.Adam(learning_rate=1e-5)  # Smaller learning rate for transfer learning
lion_tiger_model.compile(optimizer=customAdam,
                         loss="BinaryCrossentropy",
                         metrics=["accuracy"])

In [None]:
history = lion_tiger_model.fit(x_train_lion_tiger,
                               labels_train_lion_tiger,
                               batch_size=64,
                               shuffle=True,
                               epochs=10,  # Start with fewer epochs
                               validation_data=(x_valid_lion_tiger, labels_valid_lion_tiger))

In [None]:
for layer in lion_tiger_model.layers:
    if isinstance(layer, layers.Conv2D):
        layer.trainable = True  # Unfreeze the convolutional layers

In [None]:
customAdam = keras.optimizers.Adam(learning_rate=1e-6)  # Even smaller learning rate
lion_tiger_model.compile(optimizer=customAdam,
                         loss="BinaryCrossentropy",
                         metrics=["accuracy"])

history_fine = lion_tiger_model.fit(x_train_lion_tiger,
                                    labels_train_lion_tiger,
                                    batch_size=64,
                                    shuffle=True,
                                    epochs=5,
                                    validation_data=(x_valid_lion_tiger, labels_valid_lion_tiger))

In [None]:
# Evaluating CNN model predictions on validation data
labels_flat = np.asarray(labels_valid_tune).flatten()
preds = bigcat_model.predict(x_valid_tune)
preds = np.asarray([pred[0] for pred in preds])

print(np.corrcoef(preds, labels_flat))

In [None]:
test_loss, test_acc = lion_tiger_model.evaluate(x_test_lion_tiger, labels_test_lion_tiger)
print(f"Test Accuracy: {test_acc}")