# Import libraries

In [10]:
import os
import time
import h5py
import re
import numpy as np
from torch.utils.data import DataLoader

from src.utils import *
from src.datasets import ExcavatorDataset
from src.metrics import VLAD, FisherVector
from src.evaluate import compute_and_save_ssim_matrices
from src.config import TRANSFORMER

In [11]:
root = f'/home/ais/Bachelorarbeit/similarity_metrics_of_images/'
dataset = ExcavatorDataset(return_type='image+mask+path', purpose='train')


# Load k-means and GMM models

In [16]:
k_means_models = [
    model for model in os.listdir(rf'{root}models/pickle_model_files') if 'k_means' in model
]
print("KMeans models:", k_means_models)
gmm_model = [
    model for model in os.listdir(rf'{root}models/pickle_model_files') if 'gmm' in model
]
print("GMM models:", gmm_model)

KMeans models: ['k_means_model_k16_root_sift.pkl', 'k_means_model_k16_sift.pkl', 'k_means_model_k24_root_sift.pkl', 'k_means_model_k24_sift.pkl']
GMM models: ['gmm_model_k16_root_sift.pkl', 'gmm_model_k24_sift.pkl', 'gmm_model_k24_root_sift.pkl', 'gmm_model_k16_sift.pkl']


# Compute and save VLAD vector matrix in `HD5` format

In [17]:
loader = DataLoader(dataset, batch_size=10, shuffle=False)

for model in k_means_models:
    num_clusters = int(re.findall(r'\d+', model)[0])
    vect_length = 128 * num_clusters if not 'pca' in model else 128 * num_clusters // 2
    print(f"Number of clusters: {num_clusters}, Vector length: {vect_length}")
    data = {}
    feature = 'root_sift' if 'root' in model else 'sift'
    for img, *_, path in dataset:

        vlad = VLAD(
            image=img,
            k_means=load_model(rf'{root}models/pickle_model_files/{model}'),
            flatten=True,
            feature=feature
        ).vector
        if len(vlad) != vect_length:
            raise ValueError(f"Expected {vect_length}, got {len(vlad)}")
        data[path] = vlad
    model_name = model.replace('.pkl', '')
    save_to_hdf5(rf'{root}res/vlad/{model_name}.h5', data)

Number of clusters: 16, Vector length: 2048
Number of clusters: 16, Vector length: 2048
Number of clusters: 24, Vector length: 3072
Number of clusters: 24, Vector length: 3072


# Compute and save Fisher vector matrix in `HD5` format

In [18]:
for model in gmm_model:
    num_clusters = int(re.findall(r'\d+', model)[0])
    vect_length = (2 * 128 * num_clusters + num_clusters) if not 'pca' in model else (2 * 128 * num_clusters + num_clusters) // 2
    print(f"Number of clusters: {num_clusters}, Vector length: {vect_length}")
    data = {}
    feature = 'root_sift' if 'root' in model else 'sift'

    for img, *_, path in dataset:

        fisher = FisherVector(
            image=img,
            gmm=load_model(rf'{root}models/pickle_model_files/{model}'),
            flatten=True,
            feature=feature
        ).vector
        if len(fisher) != vect_length:
            raise ValueError(f"Expected {vect_length}, got {len(fisher)}")
        path = [pth.replace('/', '|').replace('\\', '|') for pth in path]
        data[path] = fisher


    model_name = model.replace('.pkl', '')
    save_to_hdf5(rf'{root}res/fisher/{model_name}.h5', data)

Number of clusters: 16, Vector length: 4112


TypeError: unhashable type: 'list'

# SSIM

1. Choosing kernel size for `gaussian_blur` function

Using the empirical rule, the kernel radius should span 3 times the standard deviation. Which means:

```python
kernel_radius = int(3 * sigma)
kernel_size = 2 * kernel_radius + 1 # In order that the kernel is centered around the central pixel
```

In [6]:
dataset = ExcavatorDataset(return_type='image+mask+path', purpose='train', transform=TRANSFORMER)

In [9]:
output_dir = f'{root}res/ssim'
os.makedirs(output_dir, exist_ok=True)

batch_size = 40
gaussian_sigmas = [2 * i for i in range(1, 10, 2)]  # [2, 6, 10, 14, 18]

for sigma in gaussian_sigmas:
    compute_and_save_ssim_matrices(dataset=dataset,
                                   output_dir=output_dir,                     batch_size=batch_size,
                                   gaussian_sigma=sigma)


Kernel size used for sigma=2: 13


Computing SSIM/MS-SSIM:: 100%|██████████| 39672/39672 [34:55<00:00, 18.93it/s]


Saved SSIM and MS-SSIM matrices using: 
sigma=2, kernel_size=13, compression_quality=None
Kernel size used for sigma=6: 37


Computing SSIM/MS-SSIM:: 100%|██████████| 39672/39672 [34:47<00:00, 19.00it/s]


Saved SSIM and MS-SSIM matrices using: 
sigma=6, kernel_size=37, compression_quality=None
Kernel size used for sigma=10: 61


Computing SSIM/MS-SSIM:: 100%|██████████| 39672/39672 [34:43<00:00, 19.04it/s]


Saved SSIM and MS-SSIM matrices using: 
sigma=10, kernel_size=61, compression_quality=None
Kernel size used for sigma=14: 85


Computing SSIM/MS-SSIM:: 100%|██████████| 39672/39672 [34:39<00:00, 19.08it/s]


Saved SSIM and MS-SSIM matrices using: 
sigma=14, kernel_size=85, compression_quality=None
Kernel size used for sigma=18: 109


Computing SSIM/MS-SSIM:: 100%|██████████| 39672/39672 [34:37<00:00, 19.09it/s]


Saved SSIM and MS-SSIM matrices using: 
sigma=18, kernel_size=109, compression_quality=None


### Optional: Compute SSIM and MS-SSIM matrices for different compression qualities

In [None]:
compression_qualities = [i for i in range(10, 101, 10)]
output_dir = f'{root}res/ssim/compression'
os.makedirs(output_dir, exist_ok=True)

for quality in compression_qualities:
    compute_and_save_ssim_matrices(dataset=dataset,
                                   output_dir=output_dir,
                                   batch_size=batch_size,
                                   compression_quality=quality)