In [None]:
%cd "."

In [None]:
import Clusterer, Global_feature_extractor, Local_features_extractor, Norms, Image, PCA_reduction, Distances, Autoencoder_train, Encoder_NN
from Dataset_loader import load_dataset
from Accuracy import accuracy_optimised, accuracy
import json, os, cv2, pickle
import numpy as np

In [None]:
import importlib
importlib.reload(Global_feature_extractor)
import Autoencoder_train

In [None]:
PIPELINE_PATHS = {
    "local_patch_extraction_representation":[("SIFT", "SIFT"), ("A-KAZE", "A-KAZE"), ("SIFT", "Autoencoder"), ("A-KAZE", "Autoencoder")],
    "codebook_generation": ["MiniBatchKMeans", "KMedoids"],
    "feature_encoding_and_pooling": ["BoVW", "VLAD"],
    "dimentionality_reduction": [None, "PCA"]
}
DATASETS = [("IAM", None), ("TrigraphSlant", False), ("TrigraphSlant", True), ("ICDAR", "en"), ("ICDAR", "ar")]

In [None]:
pipline = [0, 0, 1, 1]

training_session = {
    "id": "Tanya",
    "datasets": [1,2],
    "training_size": 10,
    "testing_size": 1
}

if not os.path.exists(training_session["id"]):
  os.mkdir(training_session["id"])

In [None]:
train_big_set, test_big_set = list(), list()

for choice in training_session["datasets"]:
    train_mini_set, test_mini_set = load_dataset(dataset=DATASETS[choice][0],
                                                 path="./dataset", 
                                                 size_train=training_session["training_size"], 
                                                 size_test=training_session["testing_size"],
                                                 parametre=DATASETS[choice][1])
    train_big_set.extend(train_mini_set)
    test_big_set.extend(test_mini_set)
    
_, _, images_train_set = map(list, zip(*train_big_set))
writers_test_set, images_names_test_set, images_test_set = map(list, zip(*test_big_set))

In [None]:
print("Number of training images:",len(images_train_set))
print("Number of testing images:",len(images_test_set))

In [None]:
modules_chosen = PIPELINE_PATHS["local_patch_extraction_representation"][pipline[0]]

In [None]:
if modules_chosen == ("SIFT", "SIFT"):
  hellinger_normalization = Norms.Norm.hellinger_normalization
  algo = cv2.xfeatures2d.SIFT_create()
  local_features_extractor_descriptor = Local_features_extractor.Local_feature_extractor(algorithm=algo, norm=hellinger_normalization)
elif modules_chosen == ("A-KAZE", "A-KAZE"):
  hellinger_normalization = Norms.Norm.hellinger_normalization
  algo = cv2.AKAZE_create()
  local_features_extractor_descriptor = Local_features_extractor.Local_feature_extractor(algorithm=algo, norm=hellinger_normalization)
else:
    shape_images = '?'
    max_key_points = '?'
    model_path = '?'
    if modules_chosen[0] == "SIFT":
      local_features_detector = cv2.xfeatures2d.SIFT_create()
    elif modules_chosen[0] == "A-KAZE":
      local_features_detector = cv2.AKAZE_create()
    encoder = Encoder_NN.Encoder_NN((network_configuration[shape_images]*2, network_configuration[shape_images]*2),
                                     network_configuration[max_key_points], 
                                     local_features_detector=local_features_detector)
    encoder.set_model(model_path=model_path)
    local_features_extractor_descriptor = Local_features_extractor.Local_feature_extractor(algorithm=encoder)

In [None]:
def get_descriptors(local_features_extractor_descriptor, images_train_set, mini_size_sample=12):
    images_pre_clustering = [Image.Image(image, local_feature_extractor=local_features_extractor_descriptor) for image in images_train_set]
    list_local_descriptors = []
    list_local_descriptors_all = []
    for image in images_pre_clustering:
        mini_list_local_descriptors = np.array(image.local_descriptors)
        list_local_descriptors_all.extend(mini_list_local_descriptors[np.random.choice(mini_list_local_descriptors.shape[0], len(mini_list_local_descriptors), replace=False)])
        list_local_descriptors.extend(mini_list_local_descriptors[np.random.choice(mini_list_local_descriptors.shape[0], min(mini_size_sample,len(mini_list_local_descriptors)), replace=False)])
    return list_local_descriptors_all, list_local_descriptors

In [None]:
descriptors_all, descriptors_sample = get_descriptors(local_features_extractor_descriptor, images_train_set)
print(len(descriptors_sample))
print(len(descriptors_all))

In [None]:
clustering_algo = PIPELINE_PATHS["codebook_generation"][pipline[1]]
max_no_improvement = 500
test_values=range(2, 100, 20)

In [None]:
Clusterer.Clusterer.choose_number_clusters_clustering(vectors=descriptors_sample, 
                                                      algo=clustering_algo,
                                                      max_no_improvement=max_no_improvement, 
                                                      test_values=test_values,
                                                      verbose=1)

In [None]:
nb_clusters = 50
clusters_centers_path = training_session["id"]+"/Centers_clusters_"+str(nb_clusters)+"nb.npy"

In [None]:
Clusterer.Clusterer.fit_new_trainig(vectors=descriptors_all,
                                    algo= clustering_algo,
                                    path_to_save=clusters_centers_path,
                                    nb_clusters=nb_clusters, 
                                    max_no_improvement=max_no_improvement,
                                    metric=None,
                                    verbose=0)

In [None]:
module_chosen = PIPELINE_PATHS["feature_encoding_and_pooling"][pipline[2]]
if module_chosen == "BoVW":
  global_feature_extractor = Global_feature_extractor.BOW(clusters_centers_path=clusters_centers_path)
elif module_chosen == "VLAD":
  global_feature_extractor = Global_feature_extractor.VLAD(clusters_centers_path=clusters_centers_path)

In [None]:
percentage_variance = 0.98
pca_model_path = training_session["id"]+"/pca_model.pkl"

In [None]:
if (PIPELINE_PATHS["dimentionality_reduction"][pipline[3]] == "PCA") and (module_chosen == "VLAD"):
    images_pre = [Image.Image(image, local_feature_extractor=local_features_extractor_descriptor, global_feature_extractor=global_feature_extractor) for image in images_train_set]
    global_descriptors = [image.global_descriptor for image in images_pre]
    PCA_reduction.PCA_reduction.plot_variance_nbComponents(global_descriptors, percentage_variance=percentage_variance)
    PCA_reduction.PCA_reduction.create_new_pca_model(vectors=global_descriptors, 
                                                    path_to_save=pca_model_path, 
                                                    percentage_variance=percentage_variance)

    pca_instance = PCA_reduction.PCA_reduction(pca_model_path)
    global_feature_extractor = Global_feature_extractor.VLAD(clusters_centers_path=clusters_centers_path, pca_instance=pca_instance)

In [None]:
local_patch_representation = PIPELINE_PATHS["local_patch_extraction_representation"][pipline[0]][1]
if local_patch_representation=="Autoencoder":
  distance_metric = Distances.Distance.angular_distance
  accuracy_calculator = accuracy
else:
  distance_metric = Distances.Distance.chi2_distance
  accuracy_calculator = accuracy_optimised

In [None]:
images_pre = [Image.Image(image, image_name=image_name, local_feature_extractor=local_features_extractor_descriptor, global_feature_extractor=global_feature_extractor) for image, image_name in zip(images_test_set,images_names_test_set)]

In [None]:
accuracy_value = accuracy_calculator(X_test=images_pre, 
                                     Y_test=writers_test_set,
                                     distance_metric=distance_metric)

print("*"*24)
print("Accuracy value for <", training_session,"> : ","{:.2%}".format(accuracy_value), sep="")