In [None]:
# Author Markus Laubenthal

In [None]:
!mkdir -p input

In [None]:
!wget https://storage.googleapis.com/laubenthal_spatiolab/duplicates.h5 -O input/duplicates_horse.h5 --no-verbose
!wget https://storage.googleapis.com/laubenthal_spatiolab/a2d2.h5 -O input/a2d2.h5 --no-verbose


In [None]:
!rm -rf functions
!git clone https://github.com/Lennard-Alms/lab_bd.git functions

In [None]:
import tensorflow as tf
import numpy as np
import glob
import cv2
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.metrics import jaccard_score
import matplotlib.pyplot as plt
import math
import h5py
import keras
from keras.layers import Input
from keras import backend as K
from keras import layers
import gc
from scipy.spatial import distance_matrix
import seaborn as sns
from operator import itemgetter 
from google.colab.patches import cv2_imshow
from functions.preprocessing.BatchToFile import BatchProcessToFile
from functions.preprocessing.FeatureExtractor import VGGFeatureExtractorMax
from functions.preprocessing.FeatureExtracorMaxNoPatches import VGGFeatureExtractorMaxNoPatches
from functions.preprocessing.ImageMutation import PatchMutation
from functions.preprocessing.HelperFunctions import get_patches_from_image


import pandas as pd

In [None]:
def get_angle_matrix(feature_vectors, feature_vectors_b = None):
  if feature_vectors_b is None:
    feature_vectors_b = feature_vectors.copy()
  norms = np.linalg.norm(feature_vectors, axis=1)
  norms_b = np.linalg.norm(feature_vectors_b, axis=1)
  angle_matrix = (np.dot(feature_vectors, feature_vectors_b.T) / np.dot(norms[:,np.newaxis], norms_b[np.newaxis, :])).flatten().clip(-1,1)
  angle_matrix = np.arccos(angle_matrix)
  angle_matrix.sort()
  return angle_matrix

In [None]:
h5_filename = "input/duplicates_horse.h5"
f = h5py.File(h5_filename, 'r')
for key in f.keys():
  print(key)

In [None]:
f = h5py.File(h5_filename, 'r')
white_background_horses = f['white_background_horses'][:]
white_background_horses_label = f['white_background_horses_label'][:]

a2d2_background_horses_20_cover = f['a2d2_background_horses_20_cover'][:]
a2d2_background_horses_20_cover_label = f['a2d2_background_horses_20_cover_label'][:]

a2d2_background_horses_40_cover = f['a2d2_background_horses_40_cover'][:]
a2d2_background_horses_40_cover_label = f['a2d2_background_horses_40_cover_label'][:]

a2d2_background_horses_50_cover = f['a2d2_background_horses_50_cover'][:]
a2d2_background_horses_50_cover_label = f['a2d2_background_horses_50_cover_label'][:]

queries_100 = f['query(100, 100)'][:]
queries_150 = f['query(150, 150)'][:]
queries_200 = f['query(200, 200)'][:]
queries_300 = f['query(300, 300)'][:]
queries_400 = f['query(400, 400)'][:]
query_labels = np.arange(0,queries_100.shape[0]) + 1
f.close()


In [None]:
f = h5py.File('input/a2d2.h5', 'r')
a2d2_dataset = f['a2d2'][:]
f.close()

In [None]:
def get_images_with_label(images, labels, label):
  return images[labels.flatten() == label]

def get_images_with_not_label(images, labels, label):
  return images[labels.flatten() != label]

def get_images_with_greater_zero(images, labels):
  return images[labels.flatten() > 0]

def get_same_label_angle_matrix(key, query, key_labels, query_labels, key_not_label=False, greater_zero_label=False):
  angle_list = []
  for label in query_labels:
    k = None
    if key_not_label:
      k = get_images_with_not_label(key, key_labels, label)
    elif greater_zero_label:
      k = get_images_with_greater_zero(key, key_labels)
    else:
      k = get_images_with_label(key, key_labels, label)
    q = get_images_with_label(query, query_labels, label)
    angles = get_angle_matrix(k, q)
    angle_list.append(angles)
  angle_list = np.concatenate(angle_list)
  return angle_list

# Data generated by GeM with parameter 3
# Exploratory analysis for near duplicates

# Calculate angle matrix between duplicates with different scales
scale_100_200_duplicate_angles = get_same_label_angle_matrix(white_background_horses, queries_100, white_background_horses_label, query_labels)
scale_150_200_duplicate_angles = get_same_label_angle_matrix(white_background_horses, queries_150, white_background_horses_label, query_labels)
scale_200_200_duplicate_angles = get_same_label_angle_matrix(white_background_horses, queries_200, white_background_horses_label, query_labels)
scale_300_200_duplicate_angles = get_same_label_angle_matrix(white_background_horses, queries_300, white_background_horses_label, query_labels)
scale_400_200_duplicate_angles = get_same_label_angle_matrix(white_background_horses, queries_400, white_background_horses_label, query_labels)

scale_200_200_not_same_class = get_same_label_angle_matrix(white_background_horses, queries_200, white_background_horses_label, query_labels, key_not_label=True)

cover_20_angles = get_same_label_angle_matrix(a2d2_background_horses_20_cover, queries_200, a2d2_background_horses_20_cover_label, query_labels)
cover_40_angles = get_same_label_angle_matrix(a2d2_background_horses_40_cover, queries_200, a2d2_background_horses_40_cover_label, query_labels)
cover_50_angles = get_same_label_angle_matrix(a2d2_background_horses_50_cover, queries_200, a2d2_background_horses_50_cover_label, query_labels)

# Plain A2D2
a2d2_angles = get_angle_matrix(a2d2_dataset, queries_200)

In [None]:
plt.boxplot([scale_100_200_duplicate_angles, scale_150_200_duplicate_angles, scale_200_200_duplicate_angles, scale_300_200_duplicate_angles, scale_400_200_duplicate_angles, cover_20_angles, cover_40_angles, cover_50_angles, scale_200_200_not_same_class, a2d2_angles], showfliers=False)
fig, ax = plt.subplots(figsize=(12,5))
# Create a plot
ax.violinplot([scale_100_200_duplicate_angles, scale_150_200_duplicate_angles, scale_200_200_duplicate_angles, scale_300_200_duplicate_angles, scale_400_200_duplicate_angles, cover_20_angles, cover_40_angles, cover_50_angles, a2d2_angles])
ax.set_ylabel('Cosine distance')
ax.set_xticks(np.arange(10))
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(7,5))
# Create a plot
ax.violinplot([scale_100_200_duplicate_angles, scale_150_200_duplicate_angles, scale_200_200_duplicate_angles, scale_300_200_duplicate_angles, scale_400_200_duplicate_angles, a2d2_angles])
ax.set_ylabel('Cosine distance')
ax.set_xlabel("Query Scale")
ax.set_xticklabels([0, 0.25, 0.5, 1.0, 1.5, 2.0, "Dissimilar images"])
# ax.set_xticks(np.arange(7))
plt.show()


In [None]:
fig, ax = plt.subplots(figsize=(7,5))
# Create a plot
ax.set_xticks(np.arange(5))
ax.violinplot([cover_20_angles, cover_40_angles, cover_50_angles, a2d2_angles])
ax.set_ylabel('Cosine distance')
ax.set_xlabel("Cover Rate (R)")
ax.set_xticklabels([0, "20%", "40%", "50%", "Dissimilar images"])
plt.show()
