# 0. Preliminaries
**All codes in Sections 0 and 1 need to be run before any models can be built.**

## 0.1 Constants Declaration
**Constants that represent absolute paths should be changed to match the folder and file locations of the inputs.**

In [1]:
FACE_VERIFICATION_DIR = r"D:\ARCHIVED\University Files\VI\aml\a2\verification_data"
FACE_VERIFICATION_TXT_PATH = r"D:\ARCHIVED\University Files\VI\aml\a2\verification_pairs_val.txt"
IMAGE_SIZE = (256,256)
IMAGES_PER_BATCH = 128
CLASSES = 1000

## 0.2 Simple ML Builder Class
This class is created entirely by hand, to simplify the process of building and training the model.

In [2]:
import sys, math

def smlb_log(*message, sep:str=" "):
	print("[SMLB]", *message, sep=sep)

def smlb_log_error(*message, sep:str=" "):
	print("[SMLB]", *message, sep=sep, file=sys.stderr)

smlb_log("Loading TensorFlow... this will take a while.")
import tensorflow as TensorFlow
Keras = TensorFlow.keras
smlb_log("TensorFlow loaded! TensorFlow version is", TensorFlow.__version__ + ".")

smlb_log("Loading NumPy...")
import numpy as NumPy
smlb_log("NumPy loaded!")

smlb_log("Loading PyPlot...")
from matplotlib import pyplot as PyPlot
smlb_log("PyPlot loaded!")

smlb_log("All imports successful!")

class SimpleMLBuilder:
	def __init__(self, verbose:bool=False):
		self.datasets = {"training": [None, None], "validation": None, "testing": [None, None]}
		self.layers = []
		self.labels = []
		self.verbose = verbose
		self.history = None
		self.log("Fully initialized!")
	
	def log(self, *message, sep:str=" ", nonVerbose:bool=False):
		if self.verbose or nonVerbose:
			smlb_log(*message, sep=sep)
	
	def log_error(self, *message, sep:str=" ", nonVerbose:bool=False):
		if self.verbose or nonVerbose:
			smlb_log_error(*message, sep=sep)
	
	def load_preset_dataset(self, preset:str):
		"""Loads a preset dataset with Keras.
		
		Built-in presets: MNIST & Fashion MNIST.
		Useful for testing the SMLB.
		"""
		preset = preset.lower()
		if preset == "mnist":
			self.log("Loading preset \"MNIST\"...")
			
			(trainingXs, trainingYs), (testingXs, testingYs) = Keras.datasets.mnist.load_data()
			
			self.datasets["training"] = [trainingXs, trainingYs]
			self.log("Training set loaded from preset.")
			
			self.datasets["testing"] = [testingXs, testingYs]
			self.log("Testing set loaded from preset.")
			
			self.log("Preset \"MNIST\" loaded successfully.")
		elif preset == "fashion mnist":
			self.log("Loading preset \"Fashion MNIST\"...")
			
			(trainingXs, trainingYs), (testingXs, testingYs) = Keras.datasets.fashion_mnist.load_data()
			self.labels = [
				None,
				["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
				"Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
			]
			
			self.datasets["training"] = [trainingXs, trainingYs]
			self.log("Training set loaded from preset. Training images has shape of", str(trainingXs.shape) + ".")
			
			self.datasets["testing"] = [testingXs, testingYs]
			self.log("Testing set loaded from preset. Testing images has shape of", str(testingXs.shape) + ".")
			
			self.log("Preset \"Fashion MNIST\" loaded successfully.")
		else:
			self.log_error("Preset \"" + preset + "\" not found.")
	
	def set_training_features(self, features):
		"""Sets the features used for training.
		
		Features can be a list of entries or a dataset.
		"""
		
		self.datasets["training"][0] = features
		self.log("Training X values have been set!")
	def set_training_labels(self, labels=None):
		"""Sets the labels used for training.
		
		Labels should be a list of entries.
		If a TensorFlow.data.Dataset is used for the training features, the labels specified here are ignored.
		"""
		
		self.datasets["training"][1] = labels
		self.log("Training Y values have been set!")
	def set_testing_features(self, features):
		"""Sets the features used for testing.
		
		Features can be a list of entries or a dataset.
		"""
		
		self.datasets["testing"][0] = features
		self.log("Testing X values have been set!")
	def set_testing_labels(self, labels=None):
		"""Sets the labels used for testing.
		
		Labels should be a list of entries.
		If a TensorFlow.data.Dataset is used for the testing features, the labels specified here are ignored.
		"""
		
		self.datasets["testing"][1] = labels
		self.log("Testing Y values have been set!")
	def set_validation_dataset(self, dataset):
		"""Sets the dataset used for validation.
		
		Dataset should either be a TensorFlow.data.Dataset or a list of feature-label tuples.
		"""
		
		self.datasets["validation"] = dataset
		self.log("Validation dataset has been set!")
	
	def get_training_features(self):
		return self.datasets["training"][0]
	def get_training_labels(self):
		return self.datasets["training"][1]
	def get_testing_features(self):
		return self.datasets["testing"][0]
	def get_testing_labels(self):
		return self.datasets["testing"][1]
	
	def get_feature_classes(self):
		return self.labels[0]
	def get_label_classes(self, y:bool=False):
		return self.labels[1]
	
	def start_layering(self, inputShape:tuple=None):
		"""Starts the creation of a new model.
		
		This method also creates an input layer and adds it to the model.
		"""
		
		self.layers.clear()
		layer = {"type": "Input", "shape": inputShape}
		self.layers.append(layer)
		self.log("Layering started. Added layer:", layer)
	def _add_layer(self, layer:dict):
		self.layers.append(layer)
		self.log("Added layer:", layer)
	
	def add_dense_layer(self, neurons:int):
		"""Adds a densely-connected layer to the model.
		
		Dense layers are the bread and butter of any Deep Neural Network.
		"""
		layer = {"type": "Dense", "units": neurons}
		self._add_layer(layer)
	def add_activation_layer(self, activation:str=None):
		"""Adds an activation layer to the model.
		
		Possible activations: relu.
		If there is a dense or convolution layer before this layer, that layer will be modified instead.
		"""
		layer = self.layers.pop()
		layerType = layer["type"]
		if layerType == "Dense" or layerType == "Conv2D":
			layer["activation"] = activation
			self.layers.append(layer)
			self.log("Modified layer:", layer)
		else:
			self.layers.append(layer)
			self._add_layer({"type": activation})
	def add_rescaling_layer(self, scale:float=1.0/255, offset:float=0.0):
		"""Adds a rescaling layer to the model.
		
		Used to add and multiply values of the previous layer.
		Typically used as a preprocessing layer.
		"""
		layer = {"type": "Rescaling", "scale": scale, "offset": offset}
		self._add_layer(layer)
	def add_regularization_sublayer(self, regularization:str=None, regAmount:float=0.0):
		"""Modifies the previous layer to use regularization.
		
		Possible regularizations: l1, l2.
		"""
		layer = self.layers.pop()
		if regularization=="l1":
			layer["kernel_regularizer"] = Keras.regularizers.l1(regAmount)
		elif regularization=="l2":
			layer["kernel_regularizer"] = Keras.regularizers.l2(regAmount)
		self.layers.append(layer)
		self.log("Modified layer:", layer)
	def add_flatten_layer(self):
		"""Adds a flattening layer to the model.
		
		Flattening layers turn a n-dimensional input into a (n-1)-dimensional input,
		where each vector in the tensor is concatenated with the last.
		"""
		layer = {"type": "Flatten"}
		self._add_layer(layer)
	def add_dropout_layer(self, probability:float):
		"""Adds a dropout layer to the model.
		
		Dropout layers have a chance to output 0 instead of the previous layer's values.
		"""
		layer = {"type": "Dropout", "rate": probability}
		self._add_layer(layer)
	def add_normalization_layer(self, axis:int=None):
		"""Normalizes input to be within a normal distribution of mean 0 and standard variance 1."""
		layer = {"type": "Normalization", "axis": axis}
		self._add_layer(layer)
	def add_convolution_layer(self, filters:int, filterSize:tuple, stride:tuple=(1,1), pad:bool=False):
		"""Adds a convolution layer to the model.
		
		Convolution layers help to get certain image data features of the previous layer.
		"""
		layer = {"type": "Conv2D", "filters": filters, "kernel_size": filterSize, "strides": stride, "padding": "same" if pad else "valid"}
		self._add_layer(layer)
	def add_pooling_layer(self, method:str="max", filterSize:tuple=(1,1), stride:tuple=(1,1)):
		"""Adds a pooling layer to the model.
		
		Pooling layers help to summarize image data of the previous layer.
		"""
		if method=="max":
			layer = {"type": "MaxPool2D", "pool_size": filterSize, "strides": stride}
			self._add_layer(layer)
	
	def add_random_contrast_layer(self, minimum:float, maximum:float=None):
		"""Adds random image contrast to the input and outputs it.
		
		Input can be negative to reduce image contrast."""
		if not maximum:
			maximum = minimum
		else:
			minimum = -minimum
		
		layer = {"type": "RandomContrast", "factor": (minimum, maximum)}
		self._add_layer(layer)
	def add_random_flip_layer(self, x:bool=False, y:bool=False):
		"""Has a 50% chance to flip the input around a given axis.
		
		x = allow horizontal flip, y = allow vertical flip
		"""
		flip = y and [x and "horizontal_and_vertical" or "vertical"] or "horizontal"
		
		layer = {"type": "RandomFlip", "mode": flip}
		self._add_layer(layer)
	def add_random_rotation_layer(self, minimum:float, maximum:float=None):
		"""Randomly rotates the input around its center clockwise by the given amount of radians.
		
		Input can be negative to rotate counter-clockwise.
		"""
		if not maximum:
			maximum = minimum
			minimum = -minimum
		
		layer = {"type": "RandomRotation", "factor": (minimum, maximum)}
		self._add_layer(layer)
	def add_random_zoom_layer(self, minimum:float, maximum:float=None):
		"""Randomly zooms the input image by the given multiplier.
		
		Input can be negative to zoom out, up to > -1.
		"""
		if not maximum:
			maximum = minimum
			minimum = -minimum
		
		layer = {"type": "RandomZoom", "height_factor": (minimum, maximum)}
		self._add_layer(layer)
	
	def get_layers(self) -> list:
		return self.layers
	
	def set_scc_loss_function(self):
		"""Sets the loss function to TensorFlow.keras.losses.SparseCategoricalCrossentropy.
		
		Always softmaxes input.
		"""
		self.lossFunction = Keras.losses.SparseCategoricalCrossentropy(from_logits=True)
		self.log("Set loss function:", self.lossFunction)
	def set_bc_loss_function(self):
		"""Sets the loss function to TensorFlow.keras.losses.BinaryCrossentropy.
		
		Always softmaxes input.
		"""
		self.lossFunction = Keras.losses.BinaryCrossentropy(from_logits=True)
		self.log("Set loss function:", self.lossFunction)
	def set_mae_loss_function(self):
		"""Sets the loss function to the mean absolute error."""
		self.lossFunction = "mean_absolute_error"
		self.log("Set loss function:", self.lossFunction)
	def set_custom_loss_function(self, loss:Keras.losses):
		"""Sets the loss function to the passed value."""
		self.lossFunction = loss
		self.log("Set loss function:", self.lossFunction)
	
	def _create_layer(self, layerData:dict) -> Keras.layers.Layer:
		layerType = layerData.pop("type")
		layer = None
		if layerType == "Input":
			layer = Keras.Input(**layerData)
		else:
			layerCreationFunc = getattr(Keras.layers, layerType)
			layer = layerCreationFunc(**layerData)
			if layerType == "Normalization":
				dataset = self.datasets["training"][0]
				if dataset is TensorFlow.data.Dataset:
					layer.adapt(dataset)
				else:
					layer.adapt(NumPy.array(dataset))
		return layer
	
	def build(self, learningRate:float=0.001, additionalMetrics:list=[]):
		"""Compiles the model based on the layers added."""
		if len(self.layers)==0:
			self.log_error("Please add layers to the builder template before building.")
		elif not hasattr(self, "lossFunction"):
			self.log_error("Please specify the loss function first.")
		else:
			self.log("Building model with learning rate = ", learningRate, "...", sep="")
			
			kerasLayers = []
			for layer in self.layers:
				kerasLayers.append(self._create_layer(layer))
			model = Keras.models.Sequential(kerasLayers)
			if self.lossFunction == "mean_absolute_error":
				model.compile(
					optimizer=TensorFlow.optimizers.Adam(learning_rate=learningRate),
					loss=self.lossFunction
				)
			else:
				model.compile(
					optimizer=TensorFlow.optimizers.Adam(learning_rate=learningRate),
					loss=self.lossFunction,
					metrics=["accuracy"] + additionalMetrics
				)
			self.compiledModel = model
			self.log("Model built! Details:")
			model.summary()
	
	def destroy(self):
		del self.compiledModel
		self.log("Model destroyed!")
	
	def get_model(self):
		return self.compiledModel
	
	def save(self, name:str="Unnamed"):
		if hasattr(self, "compiledModel"):
			self.log("Saving model...")
			self.compiledModel.save(name)
			self.log("Save complete!")
		else:
			self.log_error("No model to save!")
	
	def load(self, name:str):
		self.log("Loading model...")
		self.compiledModel = Keras.models.load_model(name)
		self.log("Load complete!")
	
	def get_history(self):
		return self.history
	
	def _create_early_stopping_callback(self, epochs:int, hasValidation:bool=False) -> Keras.callbacks.EarlyStopping:
		return Keras.callbacks.EarlyStopping(monitor="val_loss" if hasValidation else "loss", mode="min", patience=math.ceil(epochs ** 0.5), restore_best_weights=True)
	
	def run(self, epochs:int, validationSplit:float=0.0, earlyStop:bool=False):	
		"""Causes the model to start trying to fit to the training data.
		
		validationSplit is ignored if the validation dataset was specified via set_validation_dataset().
		earlyStop causes the model to stop training if the validation loss (or the training loss if no validation specified) does not improve after the square root amount of epochs, rounded up."""
		if self.compiledModel:
			fitArguments = {
				"x": self.datasets["training"][0],
				"y": self.datasets["training"][1],
				"epochs": epochs,
				"validation_data": self.datasets["validation"],
				"validation_split": validationSplit
			}
			if earlyStop:
				fitArguments["callbacks"] = [self._create_early_stopping_callback(epochs, True if self.datasets["validation"] else validation_split > 0.0)]
			self.history = self.compiledModel.fit(**fitArguments)
			self.log("======== TRAINING DONE ========", nonVerbose=True)
		else:
			self.log_error("Please build the model first.")
	
	def plot(self):
		"""Plots the training progress via MatPlotLib."""
		if self.history:
			self.log("Creating graphs, please wait...", nonVerbose=True)
			
			historyDict = self.history.history
			
			epochsRange = range(self.history.params["epochs"])
			accuracy = historyDict["accuracy"]
			loss = historyDict["loss"]
			
			if "val_accuracy" in historyDict:
				validationAccuracy = historyDict["val_accuracy"]
				validationLoss = historyDict["val_loss"]
			
			PyPlot.figure(figsize=(12, 6))
			PyPlot.subplot(1, 2, 1)
			PyPlot.plot(epochsRange, accuracy, label="Training Accuracy")
			PyPlot.plot(epochsRange, validationAccuracy, label="Validation Accuracy")
			PyPlot.legend()
			PyPlot.title("Accuracy")
			
			PyPlot.subplot(1, 2, 2)
			PyPlot.plot(epochsRange, loss, label="Training Loss")
			PyPlot.plot(epochsRange, validationLoss, label="Validation Loss")
			PyPlot.legend()
			PyPlot.title("Loss")
			PyPlot.show()
		else:
			self.log_error("Please run the model first.")
	
	def evaluate(self) -> tuple:
		"""Evaluates the model over the given training dataset."""
		if self.compiledModel:
			results = self.compiledModel.evaluate(self.datasets["testing"][0], self.datasets["testing"][1], verbose=2)
			self.log("======== TESTING DONE ========", nonVerbose=True)
			
			return results
		else:
			self.log_error("Please build the model first.")
	
	def predict(self, features) -> NumPy.ndarray:
		"""Makes the model do predictions over the given testing dataset."""
		if self.compiledModel:
			predictionModel = Keras.Sequential([self.compiledModel, Keras.layers.Softmax()])
			return predictionModel.predict(features)
		else:
			self.log_error("Please build and train the model first.")

smlb_log("Initialization successful!")

[SMLB] Loading TensorFlow... this will take a while.
[SMLB] TensorFlow loaded! TensorFlow version is 2.7.0.
[SMLB] Loading NumPy...
[SMLB] NumPy loaded!
[SMLB] Loading PyPlot...
[SMLB] PyPlot loaded!
[SMLB] All imports successful!
[SMLB] Initialization successful!


# 1. Dataset Preparation
`verification_data.zip` is a large collection of face images. We can't use `tf.keras.utils.image_dataset_from_directory` here since the model needs two inputs, so we have to build the dataset from scratch again.
## 1.1 Image Loading

In [None]:
# Turn the classification image folder into a tf.data.Dataset object
Keras.utils.image_dataset_from_directory()