In [None]:
import cv2
import os
import pickle
import numpy as np
import pandas as pd
import src.preprocess as preprocess
from src.preprocess import ImagePreprocessor
import src.calculation as calculation
import src.deepvectorizer as dp
import matplotlib.pyplot as plt
import seaborn as sns
import json

In [None]:
image1_path = './data/raw/test/Abdullah_Gul_0006.jpg'
image2_path = './data/raw/test/Abdullah_Gul_0005.jpg'
image3_path = './data/raw/test/Adrien_Brody_0005.jpg'

In [None]:
vectorizer = dp.get_deepface_vectorizer('./pretrained_models/VGGFace2_DeepFace_weights_val-0.9034.h5')

In [None]:
resized_path = './data/preprocessed/resized_for_original_deepface/all'
dest_root = './data/vectorized/padded_152'

if not os.path.exists(dest_root):
    os.makedirs(dest_root)
    
for i, image_name in enumerate(os.listdir(resized_path)):
    image_path = os.path.join(resized_path, image_name)
    print(f'Vectorizing image {i + 1} from {image_path}')
    vector_path = os.path.join(dest_root, image_name.split('.')[0] + '-pad_152.pickle')
    
    if os.path.exists(vector_path):
        print(f'Vector {i + 1} already exists as {vector_path}')
        continue
    
    image = cv2.imread(image_path)[:, :, ::-1]
    vector = dp.get_image_vector(image, vectorizer)
    
    with open(vector_path, 'wb') as f:
        print(f'Pickling vector {i + 1} at {vector_path}')
        pickle.dump(vector, f)
    

In [None]:
padded_152_vector_path = './data/vectorized/padded_152'

In [None]:
unique_names = sorted(list(set(['_'.join(x.split('_')[0:-2]) for x in os.listdir(padded_152_vector_path)])))

In [None]:
intra_distances = {}

for name in unique_names:
    intra_distances[name] = {'l2_distances': [], 'cosine_distances': []}

In [None]:
for i, name in enumerate(unique_names):
    print(f'Calculating intra-label distances for {name}')
    vector_paths = sorted([x for x in os.listdir(padded_152_vector_path) if name in x])
    name_l2_distances = []
    name_cos_distances = []
    
    for j, vector1_leaf in enumerate(vector_paths):
        if j == len(vector_paths) - 1:
            continue
            
        vector1_path = os.path.join(padded_152_vector_path, vector1_leaf)
        with open(vector1_path, 'rb') as f:
            print(f'Loading {vector1_path}')
            vector_1 = pickle.load(f)
        for vector2_leaf in vector_paths[j + 1:]:
            vector2_path = os.path.join(padded_152_vector_path, vector2_leaf)
            with open(vector2_path, 'rb') as g:
                print(f'Loading {vector2_path}')
                vector_2 = pickle.load(g)
            print(f'Calculating distances between {vector1_leaf} and {vector2_leaf}')
            name_l2_distances.append((f'{vector1_leaf}', f'{vector2_leaf}',
                                      calculation.l2_distance(vector_1, vector_2).astype(float)))
            name_cos_distances.append((f'{vector1_leaf}', f'{vector2_leaf}',
                                       calculation.cosine_distance(vector_1, vector_2).astype(float)))
    print(f'Appending intra-label distances for {name} to dictionary')
    intra_distances[name]['l2_distances'] = name_l2_distances
    intra_distances[name]['cosine_distances'] = name_cos_distances

In [None]:
with open('./data/vectorized/padded_152_intra_distances.json', 'w') as f:
    json.dump(intra_distances, f, indent = '\t')

In [None]:
intra_df = pd.DataFrame.from_dict(intra_distances, orient = 'index').reset_index()

In [None]:
intra_df = intra_df.loc[intra_df.l2_distances.apply(len).gt(0)]

In [None]:
intra_l2_df = intra_df[['index', 'l2_distances']]
intra_l2_df = intra_l2_df.explode('l2_distances')
intra_l2_df.l2_distances = intra_l2_df.l2_distances.apply(lambda x: x[-1])
intra_l2_df.to_csv('./data/vectorized/padded_152_intra_distances_l2.csv')

In [None]:
intra_cos_df = intra_df[['index', 'cosine_distances']]
intra_cos_df = intra_cos_df.explode('cosine_distances')
intra_cos_df.cosine_distances = intra_cos_df.cosine_distances.apply(lambda x: x[-1])
intra_cos_df.to_csv('./data/vectorized/padded_152_intra_distances_cos.csv')

In [None]:
intra_l2_df.describe()

In [None]:
sns.histplot(data = intra_l2_df, x = 'l2_distances', stat = 'count', bins = 50);

In [None]:
sns.histplot(data = intra_cos_df, x = 'cosine_distances', stat = 'count', bins = 50);

In [None]:
inter_distances = {}

for name in unique_names:
    inter_distances[name] = {'l2_distances': [], 'cosine_distances': []}

In [None]:
for i, name in enumerate(unique_names):
    print(f'Calculating inter-label distances for {name}')
    vector1_paths = sorted([x for x in os.listdir(padded_152_vector_path) if name in x])
    vector2_paths = sorted([x for x in os.listdir(padded_152_vector_path) if name not in x])
    name_l2_distances = []
    name_cos_distances = []
    
    for vector1_leaf in vector1_paths:
        vector1_path = os.path.join(padded_152_vector_path, vector1_leaf)
        with open(vector1_path, 'rb') as f:
            print(f'Loading {vector1_path}')
            vector_1 = pickle.load(f)
        for vector2_leaf in vector2_paths:
            vector2_path = os.path.join(padded_152_vector_path, vector2_leaf)
            with open(vector2_path, 'rb') as g:
                print(f'Loading {vector2_path}')
                vector_2 = pickle.load(g)
            print(f'Calculating distances between {vector1_leaf} and {vector2_leaf}')
            name_l2_distances.append((f'{vector1_leaf}', f'{vector2_leaf}',
                                      calculation.l2_distance(vector_1, vector_2).astype(float)))
            name_cos_distances.append((f'{vector1_leaf}', f'{vector2_leaf}',
                                       calculation.cosine_distance(vector_1, vector_2).astype(float)))
    print(f'Appending inter-label distances for {name} to dictionary')
    intra_distances[name]['l2_distances'] = name_l2_distances
    intra_distances[name]['cosine_distances'] = name_cos_distances

In [None]:
prep1 = ImagePreprocessor(image1_path)
prep2 = ImagePreprocessor(image2_path)
prep3 = ImagePreprocessor(image3_path)

In [None]:
prep1.set_vectorizer(vectorizer)
prep2.set_vectorizer(vectorizer)
prep3.set_vectorizer(vectorizer)

In [None]:
prep1.preprocess_image(pad = True)
prep2.preprocess_image(pad = True)
prep3.preprocess_image(pad = True)

In [None]:
prep1.vectorize()
prep2.vectorize()
prep3.vectorize()

In [None]:
vector1 = prep1.get_vector()
vector2 = prep2.get_vector()
vector3 = prep3.get_vector()

In [None]:
dist_v1v2 = calculation.l2_distance(vector1, vector2)
dist_v1v3 = calculation.l2_distance(vector1, vector3)
dist_v2v3 = calculation.l2_distance(vector2, vector3)

In [None]:
dist_v1v2, dist_v1v3, dist_v2v3

In [None]:
distcos_v1v2 = calculation.cosine_distance(vector1, vector2)
distcos_v1v3 = calculation.cosine_distance(vector1, vector3)
distcos_v2v3 = calculation.cosine_distance(vector2, vector3)

In [None]:
distcos_v1v2, distcos_v1v3, distcos_v2v3

In [None]:
fig, ax = plt.subplots(1, 3, figsize = (15, 10))

ax[0].imshow(prep1.get_image());
ax[1].imshow(prep2.get_image());
ax[2].imshow(prep3.get_image());

In [None]:
fig, ax = plt.subplots(1, 3, figsize = (15, 10))

ax[0].imshow(prep1.get_resized_image());
ax[1].imshow(prep2.get_resized_image());
ax[2].imshow(prep3.get_resized_image());