#Modules Importation and preparation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
WORK_DIR = "/content/drive/My Drive/Colab Notebooks/Writer_identification"
!pip uninstall -y opencv-python
!pip install -U opencv-contrib-python
% cd "/content/drive/My Drive/Colab Notebooks/Writer_identification"

In [None]:
import Clusterer, Global_feature_exractors, Local_features_extractor, Norms, Image, PCA_reduction, Distances
from Dataset_loader import train_test_IAM, train_test_ICDAR2013, train_test_TrigraphSlant
from Accuracy import accuracy
import numpy as np
import matplotlib.pyplot as plt
import pickle, json, os
import Norms, Local_features_extractor, Image, Autoencoder, Clusterer
from google.colab.patches import cv2_imshow

In [None]:
training_session = "training_16Sep"

with open("./config_train.json") as config_file:
  configuration = json.load(config_file)["Sift_Encoder_Vlad_Pca"][training_session]

CLUSTERS_CENTERS_PATH = training_session+"/centers_clusters_NN_VLAD.npy"
PCA_MODEL_PATH = training_session+"/pca_model_NN_VLAD.pkl"
IMAGES_PATCHS_PATH = training_session+"/pickle_patchs_NN_VLAD.dat"
ENCODER_MODEL = training_session+"/Encoder_model.h5"

if not os.path.exists(training_session):
  os.mkdir(training_session)

In [None]:
train, test = train_test_IAM("./dataset/IAM", size_train=configuration["train_size"], size_test=configuration["test_size"])
_, _, images_train_set = map(list, zip(*train))
writers_test_set, images_names_test_set, images_test_set = map(list, zip(*test))

#Train Model

In [None]:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam

In [None]:
class Autoencoder_train:
	def __init__(self, configuration, data_path, model_path):
		self.configuration = configuration
		self.shape_images = (configuration["shape_images"]*2,configuration["shape_images"]*2)
		self.model_path = model_path
		self.data_path = data_path

	def train_network(self):
		test_ration = self.configuration["autoencoder_test_ration"]
		EPOCHS = self.configuration["EPOCHS"]
		BS = self.configuration["BS"]

		trainX, testX = self.load_data_patchs(self.data_path, test_ration)

	# add a channel dimension to every image in the dataset, then scale
	# the pixel intensities to the range [0, 1]
		trainX 	= np.expand_dims(trainX, axis=-1)
		testX 	= np.expand_dims(testX, axis=-1)
		trainX 	= trainX.astype("float32") / 255.0
		testX 	= testX.astype("float32") / 255.0

	# construct the convolutional autoencoder
		print("[INFO] building autoencoder...")
		(encoder, decoder, autoencoder) = self.build(self.shape_images[0], self.shape_images[1], 1)
		autoencoder.compile(loss="mse", optimizer=Adam(lr=1e-3))

	# train the convolutional autoencoder
		H = autoencoder.fit(trainX, trainX,
							validation_data=(testX, testX),
							epochs=EPOCHS,
							batch_size=BS)

		self.plot_loss_accuracy(H, EPOCHS)
		encoder = Model(inputs=autoencoder.input, outputs=autoencoder.get_layer("encoder").output)
		self.save_model(encoder, self.model_path)
		self.test_autoencoder(autoencoder, testX)
	
	def test_autoencoder(self, autoencoder, testX):
		print("[INFO] making predictions...")
		decoded = autoencoder.predict(testX)
		outputs = None
		for i in range(0, 20):
			original = (testX[i] * 255).astype("uint8")
			recon = (decoded[i] * 255).astype("uint8")
			output = np.hstack([original, recon])
			if outputs is None:
				outputs = output
			else:
				outputs = np.vstack([outputs, output])
		cv2_imshow(outputs)

	def load_data_patchs(self, PIK, test_ration):
		with open(PIK,'rb') as f:
			data = np.array(pickle.load(f))

		size_test = int(len(data)*test_ration)
		np.random.shuffle(data)
		return data[size_test:], data[:size_test]


	def save_model(self, model, model_path):
		save_format = "h5"
		print("[INFO] saving encoder...")
		model.save(model_path, save_format=save_format, include_optimizer=False)


	def plot_loss_accuracy(self, H, EPOCHS):
		N = np.arange(0, EPOCHS)
		plt.style.use("ggplot")
		plt.figure()
		plt.plot(N, H.history["loss"], label="train_loss")
		plt.plot(N, H.history["val_loss"], label="val_loss")
		plt.title("Training Loss and Accuracy")
		plt.xlabel("Epoch #")
		plt.ylabel("Loss/Accuracy")
		plt.legend(loc="lower left")


	
	def build(self, width, height, depth, filters=(32, 64)):
		latentDim = self.configuration["latentDim"]
		inputShape = (height, width, depth)

		inputs = Input(shape=inputShape)
		x = inputs

		for f in filters:
			x = Conv2D(f, (3, 3), strides=2, padding="same")(x)
			x = LeakyReLU(alpha=0.2)(x)
			x = BatchNormalization()(x)

		volumeSize = K.int_shape(x)
		x = Flatten()(x)
		latent = Dense(latentDim)(x)
		encoder = Model(inputs, latent, name="encoder")

		print(encoder.summary())

		latentInputs = Input(shape=(latentDim,))
		x = Dense(np.prod(volumeSize[1:]))(latentInputs)
		x = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(x)

		for f in filters[::-1]:
			x = Conv2DTranspose(f, (3, 3), strides=2, padding="same")(x)
			x = LeakyReLU(alpha=0.2)(x)
			x = BatchNormalization()(x)

		x = Conv2DTranspose(depth, (3, 3), padding="same")(x)
		outputs = Activation("sigmoid")(x)
		decoder = Model(latentInputs, outputs, name="decoder")

		autoencoder = Model(inputs, decoder(encoder(inputs)), name="autoencoder")
		return (encoder, decoder, autoencoder)

In [None]:
def generate_patchs(PatchsPickle, images, height):
	sift = Local_features_extractor.Local_feature_exractor(Norms.Norm.No_norm)
	images_pre = [Image.Image(image, local_feature_extractor=sift) for image in images]
	images_patch = list()

	for image in images_pre:
		for key_point in image.key_points:
			y,x = int(key_point.pt[0]),int(key_point.pt[1])
			xm, ym = image.image.shape
			if (x-height < 0) or (x+height > xm) or (y-height < 0) or (y+height > ym):
				continue
			cropped = image.image[x-height:x+height,y-height:y+height]
			images_patch.append(cropped)

	with open(PatchsPickle, "wb") as f:
		pickle.dump(images_patch, f)

In [None]:
def choose_number_clusters(images, model_path, max_no_improvement, shape_images, max_key_points, test_values=range(1, 500, 50)):
	autoencoder = Autoencoder.Encoder_NN((shape_images, shape_images), max_key_points)
	autoencoder.set_model(model_path=model_path)
	sift_autoencoder = Local_features_extractor.Local_feature_exractor(Norms.Norm.No_norm, local_feature_extractor=autoencoder)

	images_pre = [Image.Image(image, local_feature_extractor=sift_autoencoder) for image in images]

	Clusterer.Clusterer.elbow_method_kmeans(images_pre, 
	                                        max_no_improvement=max_no_improvement,
																					test_values=test_values)

In [None]:
def save_new_clusters_centers(images, model_path, path_to_save, nb_clusters, max_no_improvement, shape_images, max_key_points):
	autoencoder = Autoencoder.Encoder_NN((shape_images, shape_images), max_key_points)
	autoencoder.set_model(model_path=model_path)
	sift_autoencoder = Local_features_extractor.Local_feature_exractor(Norms.Norm.No_norm, local_feature_extractor=autoencoder)

	images_pre = [Image.Image(image, local_feature_extractor=sift_autoencoder) for image in images]

	Clusterer.Clusterer.fit_new_trainig(images_pre, 
																			path_to_save, 
																			nb_clusters=nb_clusters, 
																			max_no_improvement=max_no_improvement,
																			verbose=0)

In [None]:
def new_pca_components_NN_vlad(images, path_to_save, cluster_centers_path, encoder_model_path, percentage_variance, shape_images, max_key_points):
	autoencoder = Autoencoder.Encoder_NN((shape_images, shape_images), max_key_points)
	autoencoder.set_model(model_path=encoder_model_path)
	sift_autoencoder = Local_features_extractor.Local_feature_exractor(Norms.Norm.No_norm, local_feature_extractor=autoencoder)

	clusters_centers = Clusterer.Clusterer.fit_ancient_data(cluster_centers_path)
	vlad = Global_feature_exractors.VLAD(clusters_centers)

	images_pre = [Image.Image(path_image, local_feature_extractor=sift_autoencoder, global_feature_extractor=vlad) for path_image in images]
	vlad_vectors = [image.global_descriptor for image in images_pre]

	PCA_reduction.PCA_reduction.plot_variance_nbComponents(vlad_vectors, percentage_variance=percentage_variance)

	PCA_reduction.PCA_reduction.create_new_pca_model(vectors=vlad_vectors, 
	                                                 path_to_save=path_to_save, 
																									 percentage_variance=percentage_variance)

In [None]:
generate_patchs(IMAGES_PATCHS_PATH, 
                images_train_set,
                configuration["shape_images"])

In [None]:
autoencoder = Autoencoder_train(configuration, IMAGES_PATCHS_PATH, ENCODER_MODEL).train_network()

In [None]:
choose_number_clusters(images=images_train_set,
                        model_path=ENCODER_MODEL,
                        max_no_improvement=configuration["max_no_improvement"],
                        shape_images=configuration["shape_images"]*2,
                        max_key_points=configuration["max_key_points"],
                        test_values = range(1, 300, 50))

In [None]:
nb_clusters_kmeans = 50

In [None]:
save_new_clusters_centers(images_train_set,
                          model_path=ENCODER_MODEL,
                          path_to_save=CLUSTERS_CENTERS_PATH, 
                          nb_clusters=nb_clusters_kmeans, 
                          max_no_improvement=configuration["max_no_improvement"],
                          shape_images=configuration["shape_images"]*2,
                          max_key_points=configuration["max_key_points"])

In [None]:
new_pca_components_NN_vlad(images=images_train_set, 
                            path_to_save=PCA_MODEL_PATH, 
                            cluster_centers_path=CLUSTERS_CENTERS_PATH, 
                            encoder_model_path=ENCODER_MODEL, 
                            percentage_variance=configuration["pca_percentage_variance"],
                            shape_images=configuration["shape_images"]*2,
                            max_key_points=configuration["max_key_points"])

#Test Model

In [None]:
import importlib, Accuracy
importlib.reload(Accuracy)
from Accuracy import accuracy

In [None]:
def test_sift_NN_vlad(images, images_names, writers, pca_path, cluster_centers_path, model_path, max_key_points, accuracy_calculator, shape_images):
  
  autoencoder = Autoencoder.Encoder_NN((shape_images, shape_images), max_key_points)
  autoencoder.set_model(model_path=model_path)
  sift_autoencoder = Local_features_extractor.Local_feature_exractor(Norms.Norm.No_norm, local_feature_extractor=autoencoder)

  clusters_centers = Clusterer.Clusterer.fit_ancient_data(cluster_centers_path)
  pca_instance = PCA_reduction.PCA_reduction(pca_path)
  vlad = Global_feature_exractors.VLAD(clusters_centers, pca_instance=pca_instance)

  images_pre = [Image.Image(image, image_name=image_name, local_feature_extractor=sift_autoencoder, global_feature_extractor=vlad) for image, image_name in zip(images,images_names)]

  cosine_distance = Distances.Distance.cosine_distance
  accuracy_value = accuracy_calculator(X_test=images_pre, 
                                       Y_test=writers, 
                                       global_feature_extractor=vlad, 
                                       distance_metric=cosine_distance)
  return accuracy_value

In [None]:
accuracy_value = test_sift_NN_vlad(images=images_test_set,
                                    images_names=images_names_test_set,
                                    writers=writers_test_set,
                                    pca_path=PCA_MODEL_PATH,
                                    cluster_centers_path=CLUSTERS_CENTERS_PATH,
                                    model_path=ENCODER_MODEL,
                                    max_key_points=configuration["max_key_points"],
                                    accuracy_calculator=accuracy,
                                    shape_images=configuration["shape_images"]*2)

print()
print("Accuracy value for <", training_session,"> : ","{:.2%}".format(accuracy_value), sep="")