#Modules Importation and preparation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
WORK_DIR = "/content/drive/My Drive/Colab Notebooks/Writer_identification"
!pip uninstall -y opencv-python
!pip install -U opencv-contrib-python
% cd "/content/drive/My Drive/Colab Notebooks/Writer_identification"

In [None]:
import Clusterer, Global_feature_exractors, Local_features_extractor, Norms, Image, PCA_reduction, Distances
from Dataset_loader import train_test_IAM, train_test_ICDAR2013, train_test_TrigraphSlant
from Accuracy import accuracy_optimised
import json, os
from sklearn.neighbors import BallTree
import numpy as np
from google.colab.patches import cv2_imshow

Precise the key of the entry in config_train.json file

In [None]:
training_session = "training_16Sep"

with open("./config_train.json") as config_file:
  configuration = json.load(config_file)["Sift_Vlad_Pca"][training_session]

CLUSTERS_CENTERS_PATH = training_session+"/centers_clusters_VLAD.npy"
PCA_MODEL_PATH = training_session+"/pca_model_VLAD.pkl"

if not os.path.exists(training_session):
  os.mkdir(training_session)

In [None]:
train, test = train_test_IAM("./dataset/IAM", size_train=configuration["train_size"], size_test=configuration["test_size"])
_, _, images_train_set = map(list, zip(*train))
writers_test_set, images_names_test_set, images_test_set = map(list, zip(*test))

#Train Model

In [None]:
def choose_number_clusters(images, max_no_improvement, test_values=range(1, 500, 50)):
  hellinger_normalization = Norms.Norm.hellinger_normalization
  sift = Local_features_extractor.Local_feature_exractor(hellinger_normalization)
  images_pre = [Image.Image(path_image, local_feature_extractor=sift, verbose=False) for path_image in images]

  Clusterer.Clusterer.elbow_method_kmeans(images=images_pre, 
                                          max_no_improvement=max_no_improvement, 
                                          test_values=test_values)

In [None]:
def save_new_clusters_centers(images, path_to_save, nb_clusters, max_no_improvement):
	hellinger_normalization = Norms.Norm.hellinger_normalization
	sift = Local_features_extractor.Local_feature_exractor(hellinger_normalization)
	images_pre = [Image.Image(path_image, local_feature_extractor=sift, verbose=False) for path_image in images]
 
	Clusterer.Clusterer.fit_new_trainig(images=images_pre, 
																			path_to_save=path_to_save, 
																			nb_clusters=nb_clusters, 
																			max_no_improvement=max_no_improvement,
																			verbose=0)

In [None]:
def new_pca_components_vlad(images, path_to_save_pca, cluster_centers_path, percentage_variance):
	clusters_centers = Clusterer.Clusterer.fit_ancient_data(cluster_centers_path)
	vlad = Global_feature_exractors.VLAD(clusters_centers)

	hellinger_normalization = Norms.Norm.hellinger_normalization
	sift = Local_features_extractor.Local_feature_exractor(hellinger_normalization)

	images_pre = [Image.Image(path_image, local_feature_extractor=sift, global_feature_extractor=vlad, verbose=False) for path_image in images]
	vlad_vectors = [image.global_descriptor for image in images_pre]

	PCA_reduction.PCA_reduction.plot_variance_nbComponents(vlad_vectors, percentage_variance=percentage_variance)

	PCA_reduction.PCA_reduction.create_new_pca_model(vectors=vlad_vectors, 
	                                                 path_to_save=path_to_save_pca, 
																									 percentage_variance=percentage_variance)

In [None]:
choose_number_clusters(images=images_train_set, 
                       max_no_improvement=configuration["max_no_improvement"], 
                       test_values = range(1, 300, 50))

In [None]:
nb_clusters_kmeans = 50

In [None]:
save_new_clusters_centers(images=images_train_set,
                          path_to_save=CLUSTERS_CENTERS_PATH, 
                          nb_clusters=nb_clusters_kmeans,
                          max_no_improvement=configuration["max_no_improvement"])

In [None]:
new_pca_components_vlad(images=images_train_set,
                        path_to_save_pca=PCA_MODEL_PATH, 
                        cluster_centers_path=CLUSTERS_CENTERS_PATH, 
                        percentage_variance=configuration["pca_percentage_variance"])

#Test Model

In [None]:
import importlib
importlib.reload(Clusterer)

In [None]:
def test_sift_descr_vlad(images, images_names, writers, cluster_centers_path, accuracy_calculator, pca_path=None):
  
  hellinger_normalization = Norms.Norm.hellinger_normalization
  sift = Local_features_extractor.Local_feature_exractor(hellinger_normalization)

  clusters_centers = Clusterer.Clusterer.fit_ancient_data(cluster_centers_path)
  if pca_path is not None: pca_instance = PCA_reduction.PCA_reduction(pca_path)
  else: pca_instance = None
  vlad = Global_feature_exractors.VLAD(clusters_centers, pca_instance=pca_instance)

  images_pre = [Image.Image(image, image_name=image_name, local_feature_extractor=sift, global_feature_extractor=vlad) for image, image_name in zip(images,images_names)]
  
  chi2_distance = Distances.Distance.chi2_distance
  accuracy_value = accuracy_calculator(X_test=np.array(images_pre), 
                                       Y_test=writers, 
                                       global_feature_extractor=vlad, 
                                       distance_metric=chi2_distance)
  return accuracy_value

In [None]:
accuracy_value = test_sift_descr_vlad(images=images_test_set,
                                      images_names=images_names_test_set,
                                      writers=writers_test_set,
                                      cluster_centers_path=CLUSTERS_CENTERS_PATH,
                                      accuracy_calculator = accuracy_optimised,
                                      pca_path=PCA_MODEL_PATH)

print()
print("Accuracy value for <", training_session,"> : ","{:.2%}".format(accuracy_value), sep="")