In [29]:
import os
import zipfile
import glob
import pandas as pd
from PIL import Image
import random
import tensorflow as tf
from tensorflow import keras
from keras import layers
import numpy as np
from keras.applications import VGG16, MobileNet, ResNet50
from keras.layers import Dense, GlobalAveragePooling2D, BatchNormalization, Activation
from keras.models import Model
from keras.optimizers import Adam
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

# Dataset loading

In [3]:
# Set up Kaggle API Key
os.environ['KAGGLE_USERNAME'] = "XXXXXX"
os.environ['KAGGLE_KEY'] = "XXXXXX"

# Download the dataset
!kaggle datasets download -d samuelcortinhas/muffin-vs-chihuahua-image-classification

# Unzip the dataset
with zipfile.ZipFile("muffin-vs-chihuahua-image-classification.zip", "r") as zip_ref:
    zip_ref.extractall(".")

muffin-vs-chihuahua-image-classification.zip: Skipping, found more recently modified local copy (use --force to force download)


# Data preprocessing

### Reshape the images and convert to RGB

In [4]:
def load_images(image_dir, img_size):
    images = []
    for image_path in os.listdir(image_dir):
        img = Image.open(os.path.join(image_dir, image_path)).convert('RGB')  # Convert to RGB
        img_resized = img.resize((img_size, img_size))
        images.append(img_resized)
    return images


train_dir = 'train'
test_dir = 'test'
img_size = 224

# Load images
train_muffin_images = load_images(os.path.join(train_dir, 'muffin'), img_size)
train_chihuahua_images = load_images(os.path.join(train_dir, 'chihuahua'), img_size)
test_muffin_images = load_images(os.path.join(test_dir, 'muffin'), img_size)
test_chihuahua_images = load_images(os.path.join(test_dir, 'chihuahua'), img_size)


# Create DataFrames for train and test sets
train_data = {'image': train_muffin_images + train_chihuahua_images, 'label': ['muffin'] * len(train_muffin_images) + ['chihuahua'] * len(train_chihuahua_images)}
train_df = pd.DataFrame(train_data)

test_data = {'image': test_muffin_images + test_chihuahua_images, 'label': ['muffin'] * len(test_muffin_images) + ['chihuahua'] * len(test_chihuahua_images)}
test_df = pd.DataFrame(test_data)

# Print the DataFrames
print(train_df)
print(test_df)

                                                  image      label
0     <PIL.Image.Image image mode=RGB size=224x224 a...     muffin
1     <PIL.Image.Image image mode=RGB size=224x224 a...     muffin
2     <PIL.Image.Image image mode=RGB size=224x224 a...     muffin
3     <PIL.Image.Image image mode=RGB size=224x224 a...     muffin
4     <PIL.Image.Image image mode=RGB size=224x224 a...     muffin
...                                                 ...        ...
4728  <PIL.Image.Image image mode=RGB size=224x224 a...  chihuahua
4729  <PIL.Image.Image image mode=RGB size=224x224 a...  chihuahua
4730  <PIL.Image.Image image mode=RGB size=224x224 a...  chihuahua
4731  <PIL.Image.Image image mode=RGB size=224x224 a...  chihuahua
4732  <PIL.Image.Image image mode=RGB size=224x224 a...  chihuahua

[4733 rows x 2 columns]
                                                  image      label
0     <PIL.Image.Image image mode=RGB size=224x224 a...     muffin
1     <PIL.Image.Image image mode=RGB

### Convert the images into np arrays

In [5]:
def images_to_arrays(images):
    return np.stack([np.array(img) for img in images])

# vertical concatenatenation of arrays along the first axis
X_train = np.vstack((images_to_arrays(train_muffin_images), images_to_arrays(train_chihuahua_images)))
X_test = np.vstack((images_to_arrays(test_muffin_images), images_to_arrays(test_chihuahua_images)))

y_train = np.array([0] * len(train_muffin_images) + [1] * len(train_chihuahua_images))
y_test = np.array([0] * len(test_muffin_images) + [1] * len(test_chihuahua_images))

# Normalize the pixel values
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

# Network architectures

## Training the model with 5-Fold CV

During the model.fit() both loss and accuracy for the training and for the validation set are computed, in order to monitoring the possible cases of overfitting. If the loss value decreases while the val_loss value starts to increase, it may indicate that the model is overfitting to the training set and is not generalizing well to new data.

### General model creation and training

In [6]:
def create_model(base_model):
    for layer in base_model.layers: #transfer learning
        layer.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x) #reduces the spatial dimensions of the output
    x = Dense(1024)(x) 
    x = BatchNormalization()(x) #tries to make the process faster
    x = Activation('relu')(x) #introduces non-linearity
    predictions = Dense(1, activation='sigmoid')(x) #produces the binary classification output
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

In [7]:
def train_and_evaluate_model(create_model, learning_rate, epochs, batch_size) : #these hyperparameters will be changed for experimenting
    # KFold cross-validation
    num_folds = 5
    kfold = KFold(n_splits=num_folds, shuffle=True, random_state=42)

    fold_scores = []
    fold_accuracies = []

    for train_index, val_index in kfold.split(X_train):
        X_train_fold, X_val = X_train[train_index], X_train[val_index]
        y_train_fold, y_val = y_train[train_index], y_train[val_index]

        # Create and compile the model
        model = create_model
        model.compile(optimizer=Adam(learning_rate=learning_rate), loss='binary_crossentropy', metrics=['accuracy'])

        # Train the model on the current fold
        history = model.fit(X_train_fold, y_train_fold, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val))

        # Evaluate the model on the test set and store the performance metric (using zero-one loss as required)
        y_pred = model.predict(X_test)
        y_pred = np.round(y_pred)
        zero_one_loss = 1 - np.mean(y_pred.flatten() == y_test)
        fold_scores.append(zero_one_loss)
        fold_accuracies.append(np.mean(history.history['val_accuracy']))

    average_zero_one_loss = np.mean(fold_scores)
    average_accuracy = np.mean(fold_accuracies)
    print(f"Average zero-one loss across {num_folds} folds: {average_zero_one_loss}")
    print(f"Average accuracy across {num_folds} folds: {average_accuracy}")

    return average_zero_one_loss, average_accuracy

### VGG16

In [63]:
base_vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

In [65]:
vgg_loss_1, vgg_accuracy_1 = train_and_evaluate_model(create_model(base_vgg_model), learning_rate=0.001, epochs=4, batch_size=8)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Average zero-one loss across 5 folds: 0.022466216216216205
Average accuracy across 5 folds: 0.9684738248586655


In [66]:
vgg_loss_2, vgg_accuracy_2 = train_and_evaluate_model(create_model(base_vgg_model), learning_rate=0.0001, epochs=8, batch_size=16)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Average zero-one loss across 5 folds: 0.0153716216216216
Average accuracy across 5 folds: 0.9950369313359261


In [11]:
best_vgg_accuracy = max(vgg_accuracy_1, vgg_accuracy_2)
best_vgg_loss = min(vgg_loss_1, vgg_loss_2)

### MobileNet

In [9]:
base_mobilenet_model = MobileNet(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

In [69]:
mobilenet_loss_1, mobilenet_accuracy_1 = train_and_evaluate_model(create_model(base_mobilenet_model), learning_rate=0.001, epochs=4, batch_size=8)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Average zero-one loss across 5 folds: 0.002195945945945965
Average accuracy across 5 folds: 0.9971488922834396


In [10]:
mobilenet_loss_2, mobilenet_accuracy_2 = train_and_evaluate_model(create_model(base_mobilenet_model), learning_rate=0.0001, epochs=8, batch_size=16)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Average zero-one loss across 5 folds: 0.002195945945945921
Average accuracy across 5 folds: 0.9983896523714065


In [12]:
best_mobilenet_accuracy = max(mobilenet_accuracy_1, mobilenet_accuracy_2)
best_mobilenet_loss = min(mobilenet_loss_1, mobilenet_loss_2)

### ResNet50

In [13]:
base_resnet_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

In [14]:
resnet_loss_1, resnet_accuracy_1 = train_and_evaluate_model(create_model(base_resnet_model), learning_rate=0.001, epochs=4, batch_size=8)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Average zero-one loss across 5 folds: 0.266722972972973
Average accuracy across 5 folds: 0.7213862106204033


In [15]:
resnet_loss_2, resnet_accuracy_2 = train_and_evaluate_model(create_model(base_resnet_model), learning_rate=0.0001, epochs=8, batch_size=16)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Average zero-one loss across 5 folds: 0.1609797297297297
Average accuracy across 5 folds: 0.7815316691994667


In [16]:
best_resnet_accuracy = max(resnet_accuracy_1, resnet_accuracy_2)
best_resnet_loss = min(resnet_loss_1, resnet_loss_2)

## Results

In [35]:
print(f"Best VGG accuracy: {best_vgg_accuracy}\nBest MobileNet accuracy: {best_mobilenet_accuracy}\nBest ResNet accuracy: {best_resnet_accuracy}")

Best VGG accuracy: 0.9950369313359261
Best MobileNet accuracy: 0.9983896523714065
Best ResNet accuracy: 0.7815316691994667


In [36]:
print(f"Best VGG loss: {best_vgg_loss}\nBest MobileNet loss: {best_mobilenet_loss}\nBest ResNet loss: {best_resnet_loss}")

Best VGG loss: 0.0153716216216216
Best MobileNet loss: 0.002195945945945921
Best ResNet loss: 0.1609797297297297


In [27]:
best_accuracy = max(best_vgg_accuracy, best_mobilenet_accuracy, best_resnet_accuracy)
best_loss = min(best_vgg_loss, best_mobilenet_loss, best_resnet_loss)

# Define the architecture names
architecture_names = ['VGG', 'MobileNet', 'ResNet']

# Initialize variables to store the best accuracy and loss
best_accuracy = 0.0
best_loss = float('inf')
best_accuracy_architecture = ''
best_loss_architecture = ''

# Iterate over the architectures
for i, architecture in enumerate([best_vgg_accuracy, best_mobilenet_accuracy, best_resnet_accuracy]):
    # Check if the current architecture has the best accuracy
    if architecture > best_accuracy:
        best_accuracy = architecture
        best_accuracy_architecture = architecture_names[i]
        
for i, architecture_loss in enumerate([best_vgg_loss, best_mobilenet_loss, best_resnet_loss]):
    # Check if the current architecture has the best loss
    if architecture_loss < best_loss:
        best_loss = architecture_loss
        best_loss_architecture = architecture_names[i]

# Print the architecture with the best accuracy and best loss
print(f"Best accuracy: {best_accuracy} from {best_accuracy_architecture}")
print(f"Best loss: {best_loss} from {best_loss_architecture}")

Best accuracy: 0.9983896523714065 from MobileNet
Best loss: 0.002195945945945921 from MobileNet
