In [None]:
!cp -r /kaggle/input/mates-only-v3 /kaggle/working/mates-only-v3
!cp -r /kaggle/input/transfer-learning-base /kaggle/working/transfer-learning-base
!cp -r /kaggle/input/new-transfer-data /kaggle/working/new-transfer-data

In [None]:
%pip install keras==2.15.0

In [None]:
SHUFFLE_NUMBER = 42

In [None]:
import math
import os
import random
import re
import shutil
import sys
import time
from collections import Counter
from pathlib import Path
from typing import Any, Callable

import albumentations as A
import cv2
import imgaug
import keras
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tqdm
from keras import backend as K
from keras import layers, losses
from keras.applications import MobileNet
from keras.layers import (Activation, Conv2D, Dense, Dropout, GlobalAveragePooling2D, Flatten, Lambda,
                          MaxPooling2D)
from keras.models import Sequential, Model
from keras.utils import to_categorical
from PIL import Image
from rich import print
from sklearn.calibration import LabelEncoder
from sklearn.model_selection import train_test_split


In [None]:
def identity(x):
    return x

def swap(t):
    return (t[1],t[0])

def count_fds(path):
    items = 0
    for _ in Path(path).iterdir():
        items += 1
    return items

def count_dirs(path):
    items = 0
    for p in Path(path).iterdir():
        if p.is_dir():
            items += 1
    return items

In [None]:
random.seed(SHUFFLE_NUMBER)
imgaug.seed(SHUFFLE_NUMBER)

In [None]:
default_transform = A.Compose(
    [
#         A.RandomCrop(width=200, height=200),
        A.RandomCropFromBorders (crop_left=0.1, crop_right=0.1, crop_top=0.1, crop_bottom=0.1),
        A.HorizontalFlip(p=0.5),
        # A.RandomScale(scale_limit=(-0.5, 2.0), p=0.5),
        A.ShiftScaleRotate(p=0.5, shift_limit=0.1, scale_limit=0.2, rotate_limit=15),
        A.RandomBrightnessContrast(p=0.2),
        A.Blur(blur_limit=3, p=0.2),
        A.Downscale(scale_min=0.70, scale_max=0.80, p=1)
    ]
)



def augment_data(
    img: np.ndarray,
    augmentation_count: int = 10,
    augmentation_pipeline=None,
    desired_shape: tuple[int, int] = (250, 250),
) -> list[np.ndarray]:
    images = []
    if augmentation_pipeline is None:
        augmentation_pipeline = default_transform
    for _ in range(augmentation_count):
        aug_image = augmentation_pipeline(image=img)["image"]
        aug_image = Image.fromarray(aug_image).resize(swap(desired_shape))
        aug_image = np.array(aug_image)
        images.append(aug_image)
    return images


In [None]:
class LazyData:
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    
    def __init__(
        self,
        filename: str,
        load_strategy: str | Callable[[str | bytes], Any] | None = None,
        *args,
        **kwargs,
    ):
        self._filename = filename
        self._data = None
        self._load_strategy = load_strategy
        self._args = args
        self._kwargs = kwargs

    def __call__(self):
        if self._data is None:
            self._build_data()
        return self._data

    def __load_strategy_image(self, buf) -> np.ndarray[np.uint8]:
        img = mpimg.imread(buf, format="jpg")
        if self._kwargs.get("desired_shape") is not None:
            pilimg = Image.fromarray(img).resize(self._kwargs.get("desired_shape"), Image.ANTIALIAS)
            img = np.array(pilimg)
        return img
    
    def __load_strategy_image_face(self, buf) -> np.ndarray[np.uint8]:
        img = mpimg.imread(buf, format="jpg")
#         print("__load_strategy_image_face")
        if self._kwargs.get("desired_shape") is not None:
            desired_shape = self._kwargs.get("desired_shape")
            if desired_shape[0] > img.shape[0] or desired_shape[1] > img.shape[1]:
#                 print("Failure point 1: Desired Shape can't be fit inside image")
#                 print(f"{desired_shape[0] = }, {desired_shape[1] = }")
#                 print(f"{img.shape[0] = }, {img.shape[1] = }")
                return img
            
            image = img.copy()
            faces = LazyData.face_cascade.detectMultiScale(image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
            if len(faces) > 0:
                x, y, w, h = faces[0]
            else:
                x, y, w, h = 0, 0, img.shape[1], img.shape[0]
            
            cx, cy = (x + x + w)//2, (y + y + h)//2
            nw, nh = desired_shape[1], desired_shape[0]
            exw, exh = w, h
            
            if nw < w:
                exw = nw
            if nh < h:
                exh = nh
            if nw > w:
                lb = cx - nw//2
                rb = cx + nw//2
                if lb < 0 and rb > img.shape[1]:
                    raise Exception("AbsurdError: Desired shape greater than image")
                elif lb < 0:
                    cx += -lb
                elif rb > img.shape[1]:
                    cx -= (rb - img.shape[1])
            if nh > h:
                ub = cy - nh//2
                db = cy + nh//2
                if ub < 0 and db > img.shape[0]:
                    raise Exception("AbsurdError: Desired shape greater than image")
                elif ub < 0:
                    cy += -ub
                elif db > img.shape[0]:
                    cy -= (db - img.shape[0])
            
            new_img = img[(cy-nh//2):(cy+nh//2), (cx-nw//2):(cx+nw//2)]
#             print(new_img.shape)
            # possible off by ones correction
#             print(swap(desired_shape))
            pilimg = Image.fromarray(img).resize(swap(desired_shape), Image.ANTIALIAS)
            img = np.array(pilimg)
#         print(img.shape)
        return img
            
    def _build_data(self):
        with open(self._filename, "rb") as f:
            if self._load_strategy is None:
                self._data = f.read()
            elif self._load_strategy == "image":
                self._data = self.__load_strategy_image(self._filename)
            elif self._load_strategy == "image_face":
                self._data = self.__load_strategy_image_face(self._filename)
            else:
                buf = f.read()
                self._data = self._load_strategy(buf)

In [None]:
def dataset(
    path: str,
    min_faces: int | None = 20,
    max_faces: int | None = None,
    hard_limit: bool = False,
    shuffle: bool = True,
    random_state: int | None = None,
    verbose: bool | None = True,
    desired_shape: tuple[int,int] = (250,250)
):
    if random_state is not None:
        np.random.seed(random_state)

    excluded_dirs = []
    capped_dirs = []
    capped_counts = {}
    for direc in os.listdir(path):
        if os.path.isdir(os.path.join(path, direc)):
            if min_faces is not None:
                if len(os.listdir(os.path.join(path, direc))) < min_faces:
                    excluded_dirs.append(direc)
            if max_faces is not None:
                if len(os.listdir(os.path.join(path, direc))) > max_faces:
                    if hard_limit:
                        excluded_dirs.append(direc)
                    else:
                        capped_dirs.append(direc)

    ds = []
    tracker = tqdm.tqdm if verbose else identity
    pattern = re.compile(r"(.*)_(?:\d+)\.jpg")

    for root, dirs, files in tracker(os.walk(path)):
        for file in files:
            if file.endswith(".jpg"):
                match = pattern.match(file)
                if match:
                    target = match.group(1)

                    if target in excluded_dirs:
                        continue

                    if max_faces is not None:
                        if target in capped_dirs:
                            if target not in capped_counts:
                                capped_counts[target] = 0
                            capped_counts[target] += 1
                            if capped_counts[target] > max_faces:
                                continue

                    ds.append(
                        [
                            LazyData(
                                os.path.abspath(os.path.join(root, file)),
                                load_strategy="image_face",
                                desired_shape=desired_shape
                            ),
                            target,
                        ]
                    )

    if shuffle:
        np.random.shuffle(ds)

    return np.array(ds)


def fetch_lfw_people(
    path_to_dataset: str = "Dataset/Raw",
    min_faces: int | None = 20,
    max_faces: int | None = None,
    hard_limit: bool = False,
    shuffle: bool = True,
    random_state: int | None = None,
    verbose: bool = True,
    desired_shape: tuple[int,int] = (250, 250)
):
    dst = dataset(
        path_to_dataset,
        min_faces=min_faces,
        max_faces=max_faces,
        hard_limit=hard_limit,
        shuffle=shuffle,
        random_state=random_state,
        verbose=verbose,
        desired_shape=desired_shape,
    )
    X = dst[:, 0]
    Y = dst[:, 1]
    return X, Y


In [None]:
def visualize_image(axes_array, images_array, labels_array, figure):
    axes_array = axes_array.flatten()

    # assert len(axes_array) == len(images_array) == len(labels_array)
    assert len(axes_array) <= len(images_array)
    assert len(axes_array) <= len(labels_array)

    for i, ax in enumerate(axes_array):
        img = ax.imshow(images_array[i], cmap="gray")
        figure.colorbar(img, ax=ax)
        ax.set_title(labels_array[i])
        ax.axis("off")


In [None]:


# TODO: Documentation


def dump_test_files(x_test, y_test, path_prefix: str, verbose=True):
    # clear out the directory
    if os.path.exists(path_prefix):
        shutil.rmtree(path_prefix)

    if os.path.exists(path_prefix + ".zip"):
        os.remove(path_prefix + ".zip")

    os.makedirs(path_prefix)

    freq_table = {}
    for i, x in enumerate(x_test):
        target = y_test[i]
        if target not in freq_table:
            freq_table[target] = 0
            os.makedirs(os.path.join(path_prefix, str(target)))
        freq_table[target] += 1

        # write image as jpeg
        Image.fromarray(x).save(
            os.path.join(
                path_prefix, str(target), f"{target}_{freq_table[target]:04}.jpg"
            )
        )

    shutil.make_archive(path_prefix, "zip", path_prefix)
    if verbose:
        print("[bold green]Test files dumped successfully[/bold green]")

    shutil.rmtree(os.path.join(path_prefix))


def export_dataset_objects(
    path_to_dataset: str = "Dataset/Raw",
    min_faces: int | None = 20,
    max_faces: int | None = None,
    hard_limit: bool = False,
    shuffle=True,
    random_state=None,
    test_size=0.2,
    verbose=True,
    augment=True,
    desired_shape: tuple[int, int] = (250, 250),
    augmentation_count: int = 10,
    augmentation_upto: int | None = None,
    augmentation_pipeline=None,
    experimental_export: bool = True,
    export: bool = True,
):
    X, Y = fetch_lfw_people(
        path_to_dataset=path_to_dataset,
        min_faces=min_faces,
        max_faces=max_faces,
        hard_limit=hard_limit,
        shuffle=shuffle,
        random_state=random_state,
        verbose=verbose,
        desired_shape=desired_shape,
    )

    log = print if verbose else identity

    log("[bold green]Dataset loaded successfully[/bold green]")

    export_path = os.path.dirname(path_to_dataset)

    tracker = tqdm.tqdm if verbose else identity

    # print(sys.getsizeof(X[0]))
    # exit(1)

    __x = []
    for x in tracker(X):
        __x.append(x())
    # # exit(1)

    __x = np.array(__x, copy=False)
    log("[bold green]Binary data loaded successfully[/bold green]")
    # __x = np.zeros((X.shape[0], 250, 250, 3), dtype=np.uint8)
    # print(__x.shape)
    # for i, x in tracker(enumerate(X)):
    #     __x[i] = x()

    x_train, x_test, y_train, y_test = train_test_split(
        __x,
        Y,
        test_size=test_size,
        random_state=random_state,
        stratify=Y,
    )

    log("[bold green]Dataset split successfully[/bold green]")

    if augment:
        if augmentation_upto is None:
            xy = []
            for i, x in tracker(enumerate(x_train)):
                aug_data = augment_data(
                    x,
                    desired_shape=desired_shape,
                    augmentation_count=augmentation_count,
                    augmentation_pipeline=augmentation_pipeline,
                )

                for data in aug_data:
#                     data = Image.fromarray(data).convert("L")
#                     data = np.array(data, copy=False)
                    xy.append([data, y_train[i]])
                # print(xy)
                # fig, axes = plt.subplots(2, 5, figsize=(20, 10))
                # axes = axes.flatten()

                # for img, ax in zip(aug_data, axes):
                #     ax.imshow(img)
                #     ax.axis("off")
                # plt.tight_layout()
                # plt.show()

                # exit(1)
            log("[bold green]Augmentation done successfully[/bold green]")
            xy_data = np.array(xy, dtype=object, copy=False)
            log("[bold green]Augmented data converted to numpy array[/bold green]")
            if shuffle:
                np.random.shuffle(xy_data)
                log("[bold green]Augmented data shuffled successfully[/bold green]")

            x_train_data = xy_data[:, 0]
            log("[bold green]x_train_data split successfully[/bold green]")
            y_train_data = xy_data[:, 1]
            log("[bold green]y_train_data split successfully[/bold green]")
#             x_train_data = x_train_data / 255
            log("[bold green]x_train_data normalized successfully[/bold green]")
#             x_test = x_test / 255
            log("[bold green]x_test normalized successfully[/bold green]")
            if export:
                if not experimental_export:
                    x_train_data.dump(os.path.join(export_path, "x_train.npy"))
                    y_train_data.dump(os.path.join(export_path, "y_train.npy"))
                    x_test.dump(os.path.join(export_path, "x_test.npy"))
                    y_test.dump(os.path.join(export_path, "y_test.npy"))
                else:
                    np.savez_compressed(
                        os.path.join(export_path, "data.npz"),
                        x_train=x_train_data,
                        y_train=y_train_data,
                        x_test=x_test,
                        y_test=y_test,
                    )
                    log("[bold green]Dataset exported successfully[/bold green]")

            # og_data = np.array(__x)
            # og_data.dump("Dataset/x_og.npy")
            # y_data.dump("Dataset/y.npy")
        else:
            xy = []
            idents = {}
            for i, y in enumerate(y_train):
                if y in idents:
                    idents[y].append(i)
                else:
                    idents[y] = [i]
            log("[bold green]Identified classes successfully[/bold green]")

            if augmentation_upto == 0:
                # augment upto max_faces
                if max_faces is None:
                    raise Exception("max_faces must be int if augmentation_upto is 0")

                for y in idents:
                    augmentation_len = max_faces - len(idents[y])
                    for _k in range(augmentation_len):
                        # choose random image
                        random_image = np.random.choice(idents[y])
                        # augment it
                        aug_data = augment_data(
                            x_train[random_image],
                            desired_shape=desired_shape,
                            augmentation_count=1,
                            augmentation_pipeline=augmentation_pipeline,
                        )
                        aug_image = aug_data[0]
#                         aug_image = Image.fromarray(aug_image).convert("L")
#                         aug_image = np.array(aug_image, copy=False)
                        # add it to the dataset
                        xy.append([aug_image, y])
                    log(
                        "[bold green]Augmented upto max_faces successfully[/bold green]"
                    )
            elif augmentation_upto > 0:
                # augment upto augmentation_upto
                for y in idents:
                    faces = len(idents[y])
                    if augmentation_upto > faces:
                        augmentation_len = augmentation_upto - len(idents[y])
                        for _k in range(augmentation_len):
                            # choose random image
                            random_image = np.random.choice(idents[y])
                            # augment it
                            aug_data = augment_data(
                                x_train[random_image],
                                desired_shape=desired_shape,
                                augmentation_count=1,
                                augmentation_pipeline=augmentation_pipeline,
                            )
                            aug_image = aug_data[0]
#                             aug_image = Image.fromarray(aug_image).convert("L")
#                             aug_image = np.array(aug_image, copy=False)
                            # add it to the dataset
                            xy.append([aug_image, y])
                log(
                    "[bold green]Augmented upto augmentation_upto successfully[/bold green]"
                )
            else:
                raise Exception("augmentation_upto must be bool or int")

            # add the original image with the label
            for i, x in tracker(enumerate(x_train)):
                cv_image = x
#                 cv_image = Image.fromarray(cv_image).convert("L")
#                 cv_image = np.array(cv_image, copy=False)
                xy.append([cv_image, y_train[i]])
            log("[bold green]Original images added successfully[/bold green]")

            xy_data = np.array(xy, dtype=object, copy=False)
            log("[bold green]Augmented data converted to numpy array[/bold green]")
            if shuffle:
                np.random.shuffle(xy_data)
                log("[bold green]Augmented data shuffled successfully[/bold green]")
            x_train_data = xy_data[:, 0]
            log("[bold green]x_train_data split successfully[/bold green]")
            y_train_data = xy_data[:, 1]
            log("[bold green]y_train_data split successfully[/bold green]")
#             x_train_data = x_train_data / 255
            log("[bold green]x_train_data normalized successfully[/bold green]")
#             x_test = x_test / 255
            log("[bold green]x_test normalized successfully[/bold green]")

            if export:
                if not experimental_export:
                    x_train_data.dump(os.path.join(export_path, "x_train.npy"))
                    y_train_data.dump(os.path.join(export_path, "y_train.npy"))
                    x_test.dump(os.path.join(export_path, "x_test.npy"))
                    y_test.dump(os.path.join(export_path, "y_test.npy"))
                else:
                    np.savez_compressed(
                        os.path.join(export_path, "data.npz"),
                        x_train=x_train_data,
                        y_train=y_train_data,
                        x_test=x_test,
                        y_test=y_test,
                    )
                    log("[bold green]Dataset exported successfully[/bold green]")

    else:
        # x_data = np.array(__x)
        # x_data.dump("Dataset/x.npy")
        # Y.dump("Dataset/y.npy")
#         x_train = x_train / 255
#         x_test = x_test / 255
        if export:
            if not experimental_export:
                x_train.dump(os.path.join(export_path, "x_train.npy"))
                y_train.dump(os.path.join(export_path, "y_train.npy"))
                x_test.dump(os.path.join(export_path, "x_test.npy"))
                y_test.dump(os.path.join(export_path, "y_test.npy"))
            else:
                np.savez_compressed(
                    os.path.join(export_path, "data.npz"),
                    x_train=x_train,
                    y_train=y_train,
                    x_test=x_test,
                    y_test=y_test,
                )


if __name__ == "__main__":
    has_new = False
    if count_dirs("/kaggle/working/new-transfer-data/New-Data/") > 0:
        has_new = True
        export_dataset_objects(
            path_to_dataset="/kaggle/working/new-transfer-data/New-Data/",
            shuffle=True,
            random_state=SHUFFLE_NUMBER,
            min_faces=0,
            max_faces=60,
            hard_limit=False,
            augment=True,
            augmentation_upto=120,
            experimental_export=True,
            test_size=0.3,
            desired_shape=(848,464),
            export=True,
        )


In [None]:
# load existing dataset
has_base = False
if Path("/kaggle/working/transfer-learning-base/Transfer-learning-data-v2/Transfer-learning-data/data.npz").exists():
    has_base = True
if has_base:
    data = np.load("/kaggle/working/transfer-learning-base/Transfer-learning-data-v2/Transfer-learning-data/data.npz", allow_pickle=True)
    X = data['x_train']
    Y = data['y_train']
    x_test = data['x_test']
    y_test = data['y_test']

    label_counts = Counter(Y)

    for label, count in label_counts.items():
        print(f"Label {label}: {count} images")


    unique_identities = np.unique(Y)
    n_unique_identities = len(unique_identities)

    print(f"There are {len(unique_identities)} unique identities.")

In [None]:
# load new images into a different variable
if has_new:
    new_data = np.load("/kaggle/working/new-transfer-data/New-Data/data.npz", allow_pickle=True)
    new_X = new_data['x_train']
    new_Y = new_data['y_train']
    new_x_test = new_data['x_test']
    new_y_test = new_data['y_test']

    new_label_counts = Counter(new_Y)

    for label, count in new_label_counts.items():
        print(f"Label {label}: {count} images")

    new_unique_identities = np.unique(new_Y)
    new_n_unique_identities = len(new_unique_identities)

    print(f"There are {len(new_unique_identities)} unique identities.")

In [None]:
new_X[0].shape

In [None]:
# augment the new images into a new dataset
# split the new datasets
# wait these are already done wtf

In [None]:
# merge the two datasets in train and test

In [None]:
# print(X.shape)
# print(new_X.shape)
if has_new and has_base:
    merged_X = np.concatenate((X, new_X))
#     print(merged_X.shape)
    merged_Y = np.concatenate((Y, new_Y))
    merged_x_test = np.concatenate((x_test, new_x_test))
    merged_y_test = np.concatenate((y_test, new_y_test))

    merged_XY = np.array(list(zip(merged_X, merged_Y)), dtype=object, copy=False)
    merged_xy_test = np.array(list(zip(merged_x_test, merged_y_test)), dtype=object, copy=False)
    print(merged_XY.shape)
elif has_new:
    merged_X = new_X
    merged_Y = new_Y
    merged_x_test = new_x_test
    merged_y_test = new_y_test
    merged_XY = np.array(list(zip(merged_X, merged_Y)), dtype=object, copy=False)
    merged_xy_test = np.array(list(zip(merged_x_test, merged_y_test)), dtype=object, copy=False)
elif has_base:
    merged_X = X
    merged_Y = Y
    merged_x_test = x_test
    merged_y_test = y_test
    merged_XY = np.array(list(zip(merged_X, merged_Y)), dtype=object, copy=False)
    merged_xy_test = np.array(list(zip(merged_x_test, merged_y_test)), dtype=object, copy=False)
else:
    raise Exception("HUH?")

In [None]:
np.random.seed(SHUFFLE_NUMBER)
np.random.shuffle(merged_XY)
np.random.shuffle(merged_xy_test)

In [None]:
transfer_X = merged_XY[:, 0]
transfer_Y = merged_XY[:, 1]
transfer_x_test = merged_xy_test[:, 0]
transfer_y_test = merged_xy_test[:, 1]

In [None]:
final_unique_identities = np.unique(transfer_Y)
final_n_unique_identities = len(final_unique_identities)

print(f"There are {len(final_unique_identities)} unique identities.")

In [None]:
# Function to plot images
def plot_images(images, labels, title):
    fig = plt.figure(figsize=(10,10))
    fig.suptitle(title)
    for i in range(5):
        plt.subplot(1,5,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i], cmap=plt.cm.binary)
        plt.xlabel(labels[i])
    plt.tight_layout()
    plt.show()

# Plot images from X
plot_images(transfer_X, transfer_Y, "X")

# Plot images from x_test
plot_images(transfer_x_test, transfer_y_test, "x_test")

In [None]:
import cv2
import matplotlib.pyplot as plt

def detect_and_crop_faces_in_image(input_image):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    image = input_image.copy()
    
    faces = face_cascade.detectMultiScale(image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    
    # If a face is detected, crop it out
    if len(faces) > 0:
        x, y, w, h = faces[0]
        cropped_face = image[y:y+h, x:x+w]
        cropped = True
    else:
        cropped = False
        cropped_face = image

    # Resize the cropped face or the original image to (227, 227)
    resized_image = cv2.resize(cropped_face, (224, 224))
    return resized_image, cropped

cropped_face_X = []
cropped_face_x_test = []
cropped_face_Y = []
cropped_face_y_test = []

tracker = tqdm.tqdm

for image, label in tracker(list(zip(transfer_X, transfer_Y))):
    cropped_face, cropped = detect_and_crop_faces_in_image(image)
    if cropped:
        cropped_face_X.append(cropped_face) 
        cropped_face_Y.append(label)

for image, label in tracker(list(zip(transfer_x_test, transfer_y_test))):
    cropped_face, cropped = detect_and_crop_faces_in_image(image)
    if cropped:
        cropped_face_x_test.append(cropped_face)
        cropped_face_y_test.append(label)

In [None]:
fig, axes = plt.subplots(2, 5, figsize=(40, 10))
axes = axes.flatten()
for i in range(10):
    if i < 5:
        ax = axes[i]
        ax.imshow(cropped_face_X[i], cmap="gray")
        ax.axis("off")
    else:
        ax = axes[i]
        ax.imshow(cropped_face_x_test[i - 5])
        ax.axis("off")
plt.tight_layout()
plt.show()

In [None]:
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

# def normalize_images(image_array):
#     # Normalize the images 
#     for i, image in enumerate(image_array):
#         image = ((image - np.min(image)) / (np.max(image) - np.min(image)))
#         image_array[i] = image
#     return image_array

def normalize_image(image):
    final = ((image - tf.math.reduce_min(image)) / (tf.math.reduce_max(image) - tf.math.reduce_min(image)))
    if tf.reduce_any(tf.math.is_nan(final)):
        print("NAN FOUND")
        final = ((image - tf.math.reduce_min(image)) / 1)
    return final

# # Apply the function to our final_X and final_x_test 
# final_X = normalize_images(cropped_face_X)
# final_x_test = normalize_images(cropped_face_x_test)

# Convert lists to Numpy arrays
# final_X = np.array(final_X)
# final_x_test = np.array(final_x_test)


# X_data = tf.data.Dataset.from_generator(iter(cropped_face_X))
# x_test_data = tf.data.Dataset.from_generator(iter(cropped_face_x_test))

# final_X = X_data.apply(normalize_image)
# final_x_test = x_test_data.apply(normalize_image)

# Initialize the label encoder
le = LabelEncoder()

# Fit the label encoder and transform the labels
Y_int = le.fit_transform(cropped_face_Y)
print(Y_int)

Y_one_hot = to_categorical(Y_int, num_classes=final_n_unique_identities)

dataset_size = int(len(cropped_face_X))

train_val_data = tf.data.Dataset.from_tensor_slices((cropped_face_X, Y_one_hot))
print("created tensor slice")
train_val_data = train_val_data.map(lambda x, y: (normalize_image(x), y))
print("added mapping")

train_val_data = train_val_data.shuffle(5000)
print("shuffled")
train_data = train_val_data.take(int(0.9 * dataset_size)).batch(32)
print("train data")
val_data = train_val_data.skip(int(0.9 * dataset_size)).batch(32)
print("val data")

In [None]:
%pip show keras

In [None]:
# load model
model = keras.models.load_model('/kaggle/working/transfer-learning-base/Transfer-learning-data-v2/Transfer-learning-data/model_mobilenet-base.h5')

In [None]:
# %pip install keras==2.15.0

In [None]:
# %pip show tensorflow
# %pip show keras

In [None]:
# model.summary()

In [None]:
# destroy top


In [None]:
model.trainable = False

In [None]:
# create new top
new_layer = keras.layers.Dense(final_n_unique_identities, activation='softmax', name="dense_output")(model.layers[-2].output)

In [None]:
transfer_model = keras.models.Model(inputs=model.input, outputs=new_layer)

In [None]:
transfer_model.summary()

In [None]:
base_learning_rate = 0.00005
transfer_model.compile(optimizer=keras.optimizers.Adam(learning_rate=base_learning_rate),
                       loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                       metrics=["accuracy"])

In [None]:
# base_learning_rate = 0.001
# model.compile(optimizer=keras.optimizers.Adam(learning_rate=base_learning_rate),
#                        loss=keras.losses.CategoricalCrossentropy(from_logits=False),
#                        metrics=["accuracy"])

In [None]:
# train
transfer_model.fit(train_data, epochs=50, validation_data=val_data, callbacks=keras.callbacks.EarlyStopping(patience=5))

In [None]:
# Initialize the label encoder
# le = LabelEncoder()

# Fit the label encoder and transform the labels
# Y_int_test = le.fit_transform(y_test)
# Y_int_test = le.inverse_transform(y_test)
Y_int_test = le.transform(cropped_face_y_test)
Y_one_hot_test = to_categorical(Y_int_test, num_classes=final_n_unique_identities)

In [None]:
from sklearn.metrics import classification_report

test_data = tf.data.Dataset.from_tensor_slices((cropped_face_x_test,))
test_data = test_data.map(lambda x: (normalize_image(x))).batch(32)

# models = {}
# for model_file in os.listdir("/kaggle/input/test-models/model"):
#     if model_file.endswith(".h5"):
#         models[model_file] = load_model("/kaggle/input/test-models/model/" + model_file)

# for model in models:

print(f"Model: {transfer_model}")
val_predictions = transfer_model.predict(test_data)

# Convert predictions classes to one hot vectors 
val_predictions_classes = np.argmax(val_predictions, axis = 1) 
# Convert validation observations to one hot vectors
val_true = np.argmax(Y_one_hot_test, axis = 1)

# Generate the classification report
report = classification_report(val_true, val_predictions_classes)
print(report)


In [None]:
!mkdir -p /kaggle/working/export

In [None]:
with open(Path("/kaggle/working/export/labels.txt"), "w") as f:
    for c in le.classes_:
        f.write(f"{c}\n")

In [None]:
# export model and dataset
export_time = math.floor(time.time())
transfer_model.save(f'/kaggle/working/export/transfer-model-mobilenet-25I-{export_time}.h5')

In [None]:
np.savez_compressed(
    "/kaggle/working/export/data.npz",
    x_train=transfer_X,
    y_train=transfer_Y,
    x_test=transfer_x_test,
    y_test=transfer_y_test
)

In [None]:
!rm -rf /kaggle/working/transfer-learning-base
!rm -rf /kaggle/working/mates-only-v3
!rm -rf /kaggle/working/new-transfer-data

In [None]:
shutil.make_archive('/kaggle/working/export_data', 'zip', '/kaggle/working/export')

In [None]:
!rm -rf /kaggle/working/export