In [None]:
dataset_root = "../dataset/"
images_dir = dataset_root + "complete/"
features_dir = dataset_root + 'handcrafted/'

recompute_features = False
classify_images = True

In [None]:
import sys
import cv2 as cv
import matplotlib.pyplot as plt
import os
from tensorflow import keras
import pandas as pd
import numpy as np
sys.path.append("..")

from keras.applications.efficientnet_v2 import EfficientNetV2B0, preprocess_input
from feature_extraction import build_feature_extractor 
from similarity_search.neural_similarity_search import extract_features, order_prediction
from handcrafted_extraction import load_all_features, load_features


model = keras.models.load_model('../classification/tuned_models/efficientnet_v2_noise_extended')
extracted_features_root = "./extracted_features/"

def builder_cosine():
    model = keras.models.load_model('../classification/tuned_models/efficientnet_v2_cosine')
    return keras.Sequential(model.layers[:-1])
    #return build_feature_extractor(EfficientNetV2B0, 'top_dropout')

def builder_base():
    return build_feature_extractor(EfficientNetV2B0, 'block6h_se_reduce')

In [None]:
batch_size = 128 
image_size = (224, 224)

def align_features(source_name, target_name, data):
    #aligning all features order to same name order
    order = [target_name.tolist().index(name) for name in source_name]
    data = data[order]
    return data, order

def check_name_aligned(names, other_names):
    aligned = True
    for (n1, n2) in zip(names, other_names):
        if n1 != n2:
            aligned = False
    return aligned

def compute_features(model, filename):
    extracted_features, filenames = extract_features(model, images_dir, batch_size=batch_size, image_size=image_size, rebuild_interval=50, preprocessing=preprocess_input)
    os.makedirs(extracted_features_root, exist_ok=True)
    names_df = pd.DataFrame({'filename': filenames})
    names_df.to_csv(extracted_features_root + filename + '_filenames.csv', header=None)
    np.save(extracted_features_root + filename, extracted_features)

if recompute_features:
    compute_features(builder_cosine, 'efficient_net_not_tuned_similarity');

    compute_features(builder_base, 'efficient_net_similarity')

    names_tuned = pd.read_csv(extracted_features_root + 'efficient_net_similarity_filenames.csv', header=None).iloc[:, 1].values
    names_not_tuned = pd.read_csv(extracted_features_root + 'efficient_net_not_tuned_similarity_filenames.csv', header=None).iloc[:, 1].values
    data_not_tuned = np.load(extracted_features_root + 'efficient_net_not_tuned_similarity.npy')
    
    names_df = pd.DataFrame({'filename': names_tuned})
    names_df.to_csv(extracted_features_root + 'common_filenames.csv', header = None)

    if check_name_aligned(names_tuned, names_not_tuned):
        print("need to align data with a one common filenames representation")
        data_not_tuned, order = align_features(names_not_tuned, names_tuned, data_not_tuned)
        names_not_tuned = names_not_tuned[order]
        
    

## Classify images for similarity search

In [None]:
from misc import load, load_class_labels
from data_loader import data_loader
from tqdm import tqdm

dataset_root = "../dataset/"
labels_path = dataset_root + 'labels/'

complete_dir = dataset_root + "complete/"

train_info = load(labels_path + 'train.csv', 1, 1)
val_info = load(labels_path + 'validation.csv')
test_info = load(labels_path + 'test.csv')
all_info = pd.concat([train_info, val_info], axis=0)
all_info = pd.concat([all_info, test_info], axis=0)

dl = data_loader(all_info, complete_dir, 400, (224, 224));

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


In [None]:
all_filenames = []
all_labels = []
all_confidence = []

significance_threshold = 0.9

if classify_images:
    for batch in tqdm(range(0, dl.number_of_batch())):
        images, _, filenames = dl.get_batch(batch, preprocessing=preprocess_input);
        predictions = model.predict(images, verbose=0, device='cpu')
        for filename, prediction in zip(filenames, predictions):
            prediction, ordered_classes = order_prediction(prediction, significance_threshold)

            all_filenames += [filename]
            all_confidence += [prediction]
            all_labels += [ordered_classes]

    df = pd.DataFrame({'filenames': all_filenames, 'labels': np.array(all_labels), 'confidence': np.array(all_confidence)})
    df.to_pickle('./extracted_features/predictions.pickle')
