# Importing Packages

In [41]:
# Import necessary libraries and packages
import os
import tfcoreml
import coremltools
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split

# Reading Dataset

In [27]:
# Define the path to the dataset
dataset_path = 'C:/Users/Blake/Downloads/Dataset'

In [28]:
# Create lists to store image file paths and corresponding labels
image_filenames = []
labels = []

# Collect image file paths and labels from the dataset directory
for class_name in os.listdir(dataset_path):
    class_dir = os.path.join(dataset_path, class_name)
    for filename in os.listdir(class_dir):
        image_filenames.append(os.path.join(class_dir, filename))
        labels.append(class_name)

# Partitioning Data

In [29]:
# Split the data into training, validation and test sets
X_train, X_test, y_train, y_test = train_test_split(
    image_filenames,
    labels,
    test_size = 0.2, # 20% of the dataset will be allocated to validating and testing (~831)
    random_state = 42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_test,
    y_test,
    test_size = 0.5, # the 20% split will be evenly partitioned into validation and testing sets (~415)
    random_state = 42
)

# Preprocessing Data

In [30]:
# Create an image data generator for data augmentation during training
train_datagen = ImageDataGenerator(
    rescale = 1.0 / 255.0,
    rotation_range = 20,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    horizontal_flip = True,
)

# Create a training data generator
train_generator = train_datagen.flow_from_dataframe(
    pd.DataFrame({'image_path': X_train, 'label': y_train}),
    x_col = 'image_path',
    y_col = 'label',
    target_size = (224, 224),
    batch_size = 32,
    class_mode = 'categorical', # Classification task
    shuffle = True,
)

# Create a validation data generator
validation_generator = train_datagen.flow_from_dataframe(
    pd.DataFrame({'image_path': X_val, 'label': y_val}),
    x_col = 'image_path',
    y_col = 'label',
    target_size = (224, 224),
    batch_size = 32,
    class_mode = 'categorical',
    shuffle = False,
)

# Create a test data generator
test_datagen = ImageDataGenerator(rescale = 1.0 / 255.0)

test_generator = test_datagen.flow_from_dataframe(
    pd.DataFrame({'image_path': X_test, 'label': y_test}),
    x_col = 'image_path',
    y_col = 'label',
    target_size = (224, 224),
    batch_size = 32,
    class_mode = 'categorical',
    shuffle = False,
)

Found 3322 validated image filenames belonging to 2 classes.
Found 415 validated image filenames belonging to 2 classes.
Found 416 validated image filenames belonging to 2 classes.


# Setting Up Model

In [31]:
# Define the base ResNet50 model with pre-trained weights
base_model = ResNet50(weights = 'imagenet', include_top = False, input_shape = (224, 224, 3))

In [32]:
# Build the custom classification head on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation = 'relu')(x)
predictions = Dense(len(os.listdir(dataset_path)), activation = 'softmax')(x)

# Create the final model with the base model and custom head
model = Model(inputs = base_model.input, outputs = predictions)

In [33]:
# Compile the model with an optimiser, loss function and evaluation metric
model.compile(optimizer = Adam(learning_rate = 0.001), loss = 'categorical_crossentropy', metrics = ['accuracy'])

# Training Model

In [34]:
# Train the model using the training data generator and validate with the validation data generator
model.fit(train_generator, validation_data = validation_generator, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x163880a8390>

# Evaluating Model

In [35]:
# Evaluate the model on the test data
eval_result = model.evaluate(test_generator)
print('Test loss:', eval_result[0])
print('Test accuracy:', eval_result[1])

Test loss: 0.021004341542720795
Test accuracy: 0.9975961446762085


# Exporting Model
Exported the model as a .keras format so that I could save its configuration and exported it as a .mlmodel to allow for implementation within the app.

In [40]:
# Save the trained model for later use
model.save('ResNet50.keras')

In [55]:
# Save the trained model for app implementation
model.save('ResNet50.mlmodel')

# Live Testing
Since we'll probably need to demo the model after StuVac, we can use the below section to import the saved model, upload a photo of one of the shoes, preprocess the image and get the model to predict what shoe it is (provided it has been trained on it of course).

In [56]:
# Load the trained model for independent testing
test_model = tf.keras.models.load_model('ResNet50.keras')

In [112]:
# Define the path to the shoe image
shoe_path = 'C:/Users/Blake/Downloads/Live Test/Converse 1.jpg'

# Preprocess the shoe image to ensure it's consistent with what the model was trained on
shoe_image = image.load_img(shoe_path, target_size = (224, 224))
shoe_image = image.img_to_array(shoe_image)
shoe_image = np.expand_dims(shoe_image, axis = 0)
shoe_image = shoe_image / 255.0

# Grab the shoe models that the ML model was trained on
shoe_labels = os.listdir(dataset_path)

In [113]:
# Get the model's prediction
prediction = test_model.predict(shoe_image)
predicted_shoe = shoe_labels[np.argmax(prediction)]

# Output the prediction
print('Predicted shoe:', predicted_shoe)

Predicted shoe: Converse Distrito 2.0 Canvas Low Sneaker
