Useful links:
- https://keras.io/guides/transfer_learning/

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import keras
import pandas as pd
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ["KERAS_BACKEND"] = "tensorflow"
seed = 42 # for testability purposes
keras.utils.set_random_seed(seed)
np.random.seed(seed)

  if not hasattr(np, "object"):


In [2]:
def size_to_change(img, size):
    if img.shape[0] <= img.shape[1]:
        return (256, int(np.floor(256 / img.shape[0] * img.shape[1])))
    else:
        return (int(np.floor(256 / img.shape[1] * img.shape[0])), 256)

def get_random_crop(img, cropX=224, cropY=224):
    max_x = img.shape[0] - cropX
    max_y = img.shape[1] - cropY

    x = np.random.randint(0, max_x)
    y = np.random.randint(0, max_y)

    crop = img[x: x + cropX, y: y + cropY]
    
    return crop

def load_data(url_directory, url_file, size=256, crop_dim=224, random_crop=False, central_crop=False, return_original=False):
    X = []
    y = []
    X_original = []
    
    with open(url_directory + url_file) as f:
        for line in f:
            line_split = line.split(",")
            img = cv2.imread(url_directory + line_split[0])
            img_resized = cv2.resize(img, size_to_change(img, size))
            lbl = int(line_split[2].strip())

            if random_crop:
                crop = get_random_crop(img_resized, crop_dim, crop_dim) # is it fine even for val and test?
            elif central_crop: # center crop
                img_center = (img_resized.shape[0] // 2, img_resized.shape[1] // 2)
                crop_dim_half = crop_dim // 2
                crop = img_resized[
                    img_center[0] - crop_dim_half : img_center[0] - crop_dim_half + crop_dim,
                    img_center[1] - crop_dim_half : img_center[1] - crop_dim_half + crop_dim
                    ]
            else:
                crop = img_resized[:crop_dim, :crop_dim]

            X.append(crop)
            y.append(lbl)

            if return_original:
                img_resized_original = img_resized[:size, :size]
                X_original.append(img_resized_original)

    X = np.stack(X, axis=0)
    y = np.array(y)

    if return_original:
        return X, y, np.stack(X_original, axis=0)
    else:
        return X, y

In [3]:
def color_jittering(img, brightness_range=(-60, 60), saturation_range=(-60, 60)):
    rng = np.random.default_rng()
    img_hsv = cv2.cvtColor(img.copy(), cv2.COLOR_BGR2HSV)

    brightness = rng.integers(brightness_range[0], brightness_range[1])
    saturation = rng.integers(saturation_range[0], saturation_range[1])

    img_hsv[:, :, 1] = np.clip(img_hsv[:, :, 1] + saturation, 0, 255)
    img_hsv[:, :, 2] = np.clip(img_hsv[:, :, 2] + brightness, 0, 255)

    img_hsv = img_hsv.astype(np.uint8)

    return cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR)

def contrast_stretching(img, linear_contrast=True, gamma_correction=False, gamma_correction_parameter=1.0):
    img_copy = img.copy()
    
    if linear_contrast:
        p_min = np.percentile(img_copy, 5)
        p_max = np.percentile(img_copy, 95)

        img_copy = np.clip((255/(p_max - p_min)) * (img_copy - p_min), 0, 255).astype(np.uint8)

    if gamma_correction:
        img_copy = np.clip(255 * np.power(img_copy/255, gamma_correction_parameter), 0, 255).astype(np.uint8)

    return img_copy

def cutout(img, size=64, color=50):
    rng = np.random.default_rng()
    img_copy = img.copy()

    x = rng.integers(0, img.shape[0] - size)
    y = rng.integers(0, img.shape[1] - size)

    img_copy[x : x + size, y : y + size] = color
    return img_copy

def zoom(img, zoom_center=None, zoom_pixel=50):
    img_copy = img.copy()
    zoom_tot = img.shape[0] // 2 - zoom_pixel
    
    if zoom_center == None:
        cx = img_copy.shape[0] // 2
        cy = img_copy.shape[1] // 2
    else:
        cx = zoom_center[0]
        cy = zoom_center[y]
    
    pts1 = np.float32([(cx - zoom_tot, cy - zoom_tot), (cx + zoom_tot, cy - zoom_tot), (cx + zoom_tot, cy + zoom_tot), (cx - zoom_tot, cy + zoom_tot)])
    pts2 = np.float32([(0, 0), (img_copy.shape[0], 0), (img_copy.shape[0], img_copy.shape[1]), (0, img_copy.shape[1])])
    M = cv2.getPerspectiveTransform(pts1, pts2)
    dst = cv2.warpPerspective(img_copy, M, (img.shape[0], img.shape[1]))
    
    return dst

In [5]:
url_directory = "../GroceryStoreDataset/dataset/"

X_train = []
y_train = []

X_train, y_train, X_train_original = load_data(url_directory, "train.txt", random_crop=True, return_original=True)
X_val, y_val = load_data(url_directory, "val.txt", central_crop=True)
X_test, y_test = load_data(url_directory, "test.txt", central_crop=True)

In [6]:
df_classes = pd.read_csv(url_directory + "classes.csv")
df_coarse = df_classes.loc[:, ['Coarse Class Name (str)', 'Coarse Class ID (int)']].drop_duplicates().values
labels_coarse = {i: lbl for lbl, i in df_coarse}
n_classes_coarse = len(labels_coarse)

In [7]:
X_train_augmented = []
y_train_augmented = []
n_crop = 1
rng = np.random.default_rng()

for idx, img in enumerate(X_train_original):
    lbl = y_train[idx]

    img_base = get_random_crop(img)
    X_train_augmented.append(img_base)
    y_train_augmented.append(lbl)

    img_base_zoom = zoom(img_base)
    X_train_augmented.append(img_base_zoom)
    y_train_augmented.append(lbl)

    if rng.integers(2) == 0: 
        img_base_flip = cv2.flip(img_base, 1)
        X_train_augmented.append(img_base_flip)
        y_train_augmented.append(lbl)
    else:
        img_base_flip = cv2.flip(img_base, 0)
        X_train_augmented.append(img_base_flip)
        y_train_augmented.append(lbl)

    img_base_cutout = cutout(img_base_flip)
    X_train_augmented.append(img_base_cutout)
    y_train_augmented.append(lbl)

    for j in range(0, n_crop):
        img_new = get_random_crop(img)
        img_new = contrast_stretching(img_new)
        img_new = color_jittering(img_new)
        X_train_augmented.append(img_new)
        y_train_augmented.append(lbl)

        if rng.integers(2) == 0:
            img_new_flip = cv2.flip(img_new, 1) 
            X_train_augmented.append(img_new_flip)
            y_train_augmented.append(lbl)
        else:
            img_new_flip = cv2.flip(img_new, 0)
            X_train_augmented.append(img_new_flip)
            y_train_augmented.append(lbl)

        img_new_cutout = cutout(img_new_flip)
        X_train_augmented.append(img_new_cutout)
        y_train_augmented.append(lbl)


X_train_augmented = np.stack(X_train_augmented, axis=0)
y_train_augmented = np.array(y_train_augmented)
print(X_train_augmented.shape, y_train_augmented.shape)

(18480, 224, 224, 3) (18480,)


#### MODEL BUILDING

In [8]:
import keras_hub
import keras
from keras import layers

# First, instantiate a base model with pre-trained weights
base_model = keras_hub.models.ResNetBackbone.from_preset("resnet_18_imagenet")

# Then, freeze the base model
base_model.trainable = False

# Create a new model on top
inputs = keras.Input(shape=X_train_augmented.shape[1:])
scale_layer = keras.layers.Rescaling(scale=1./255)
x = scale_layer(inputs)

x = base_model(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
outputs = keras.layers.Dense(units=n_classes_coarse, activation='softmax')(x)
model = keras.Model(inputs, outputs)

model.summary()

Downloading to C:\Users\Manuel\.cache\kagglehub\models\keras\resnetv1\keras\resnet_18_imagenet\3\config.json...


100%|█████████████████████████████████████████████████████████████████████████████████| 836/836 [00:00<00:00, 71.1kB/s]


Downloading to C:\Users\Manuel\.cache\kagglehub\models\keras\resnetv1\keras\resnet_18_imagenet\3\model.weights.h5...


100%|█████████████████████████████████████████████████████████████████████████████| 42.8M/42.8M [00:07<00:00, 6.26MB/s]


#### MODEL TRAINING

In [9]:
batch_size = 32 #128
epochs = 15

# Train the model on new data
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

model.fit(
    X_train_augmented,
    y_train_augmented,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_val, y_val)
)

Epoch 1/15
[1m578/578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 396ms/step - accuracy: 0.5185 - loss: 1.8707 - val_accuracy: 0.5338 - val_loss: 1.4456
Epoch 2/15
[1m578/578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 403ms/step - accuracy: 0.7936 - loss: 0.9311 - val_accuracy: 0.6318 - val_loss: 1.1626
Epoch 3/15
[1m578/578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 406ms/step - accuracy: 0.8688 - loss: 0.6446 - val_accuracy: 0.6520 - val_loss: 1.0509
Epoch 4/15
[1m578/578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 407ms/step - accuracy: 0.9021 - loss: 0.4955 - val_accuracy: 0.6926 - val_loss: 0.9925
Epoch 5/15
[1m578/578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m233s[0m 403ms/step - accuracy: 0.9229 - loss: 0.4020 - val_accuracy: 0.7027 - val_loss: 0.9576
Epoch 6/15
[1m578/578[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 399ms/step - accuracy: 0.9365 - loss: 0.3372 - val_accuracy: 0.7162 - val_loss: 0.9353
Epoc

<keras.src.callbacks.history.History at 0x1edd41bf6b0>

#### FINE TUNING

In [10]:
base_model.trainable = True
model.summary()

In [None]:
epochs_fine_tuning = 7

model.compile(
    optimizer=keras.optimizers.Adam(1e-5),  # Low learning rate
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

model.fit(
    X_train_augmented,
    y_train_augmented,
    epochs=epochs_fine_tuning,
    batch_size=batch_size,
    validation_data=(X_val, y_val)
)

Epoch 1/7
[1m106/578[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m10:10[0m 1s/step - accuracy: 0.9807 - loss: 0.1240

#### MODEL EVALUATION