In [9]:
import numpy as np
import idx2numpy
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

First we need to import the data from the given files, i tried to directly import from the keras datasets but it gave an error about not able to import such large files. After importing, we need to convert it in 244 x 244 from 28 x 28. Also, the images we get are in 1 channel i.e. grayscale, so we convert them to RGB so that we can be implemented in ResNet. ResNet is trained for data with three channels, although we don't need three colours we still need to convert the image to RGB for the model. 

In [10]:
x_train = idx2numpy.convert_from_file('train-images-idx3-ubyte')
y_train = idx2numpy.convert_from_file('train-labels-idx1-ubyte')
x_test = idx2numpy.convert_from_file('t10k-images-idx3-ubyte')
y_test = idx2numpy.convert_from_file('t10k-labels-idx1-ubyte')

x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

x_train = np.expand_dims(x_train, -1)  # (28,28,1)
x_test = np.expand_dims(x_test, -1)

x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=0.1, random_state=42
)


Now we will split the training data into training set and validation set. After that, we use the Resnet model with some changes, we exclude the top layer of the model because we want to modify the top layer to fit our use case and add our own classifier head. We also set the trainable parameter to false so that the model is frozen and it freezes the weights to reuse pretrained features without modifying them.

In [11]:
def preprocess(img):
    img = tf.image.grayscale_to_rgb(tf.convert_to_tensor(img, dtype=tf.float32))
    img = tf.image.resize(img, [224, 224])
    return img

train_gen = ImageDataGenerator(preprocessing_function=preprocess)
val_gen = ImageDataGenerator(preprocessing_function=preprocess)
test_gen = ImageDataGenerator(preprocessing_function=preprocess)

train_generator = train_gen.flow(x_train, y_train, batch_size=32, shuffle=True)
val_generator = val_gen.flow(x_val, y_val, batch_size=32, shuffle=False)
test_generator = test_gen.flow(x_test, y_test, batch_size=32, shuffle=False)

Now after making the model and removing the head, we make our own head for the Fashion set making 10 possible classifying choices by making a Dense layer of 10 neurons. At the end we again use softmax which is a very standard funciton used to make all the outputs between 0 and 1 as well as make the sum of all outputs equal to 1. For the middle Dense layer, we have set it to 128 neurons for now, it can be changed to reach maximum accuracy, i will optimize it. Another important part is Relu, it is a simple function which make it so that the output of that neuron is 0 if the output was negative or 0 and it keeps it the same if it is positive. 

In [12]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

inputs = Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(10, activation='softmax')(x)
model = Model(inputs, outputs)

In [13]:
model.compile(optimizer=Adam(1e-3), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5
)

test_loss, test_acc = model.evaluate(test_generator)
print(f"Test accuracy: {test_acc:.4f}, Test loss: {test_loss:.4f}")

ValueError: could not broadcast input array from shape (224,224,3) into shape (28,28,1)