# Import libraries

In [1]:

import re

from src.utils import *
from src.datasets import ExcavatorDataset
from src.metrics import VLAD, FisherVector
from scripts.evaluate import compute_and_save_ssim_matrices, compute_and_save_ssim_matrices_train_val
from src.config import TRANSFORMER, ROOT

  warn(
  from .autonotebook import tqdm as notebook_tqdm


Device used: cuda


In [2]:
root = ROOT

In [3]:

train_dataset = ExcavatorDataset(return_type='image+mask+path', purpose='train')
val_dataset = ExcavatorDataset(return_type='image+mask+path', purpose='test')


  key: torch.tensor(value / 255.0, dtype=torch.float32)


# Load k-means and GMM models

In [5]:
k_means_models = [
    model for model in os.listdir(rf'{root}/models/pickle_model_files') if 'k_means' in model
]
print("KMeans models:", k_means_models)
gmm_model = [
    model for model in os.listdir(rf'{root}/models/pickle_model_files') if 'gmm' in model
]
print("GMM models:", gmm_model)

KMeans models: ['k_means_model_k256_root_sift.pkl', 'k_means_model_k64_sift.pkl', 'k_means_model_k16_root_sift.pkl', 'k_means_model_k32_sift.pkl', 'k_means_model_k16_sift.pkl', 'k_means_model_k32_root_sift.pkl', 'k_means_model_k256_sift.pkl', 'k_means_model_k64_root_sift.pkl', 'k_means_model_k128_sift.pkl', 'k_means_model_k24_root_sift.pkl', 'k_means_model_k128_root_sift.pkl', 'k_means_model_k24_sift.pkl']
GMM models: ['gmm_model_k32_sift.pkl', 'gmm_model_k256_sift.pkl', 'gmm_model_k256_root_sift.pkl', 'gmm_model_k64_root_sift.pkl', 'gmm_model_k16_root_sift.pkl', 'gmm_model_k24_sift.pkl', 'gmm_model_k24_root_sift.pkl', 'gmm_model_k64_sift.pkl', 'gmm_model_k32_root_sift.pkl', 'gmm_model_k16_sift.pkl', 'gmm_model_k128_root_sift.pkl', 'gmm_model_k128_sift.pkl']


# Compute and save VLAD vector matrix in `HD5` format

In [8]:
if not os.path.exists(rf'{root}/res/vlad/train'):
    os.makedirs(rf'{root}/res/vlad/train')

if not os.path.exists(rf'{root}/res/vlad/validation'):
    os.makedirs(rf'{root}/res/vlad/validation')

for model in k_means_models:
    num_clusters = int(re.findall(r'\d+', model)[0])
    vect_length = 128 * num_clusters if not 'pca' in model else 128 * num_clusters // 2
    print(f"Number of clusters: {num_clusters}, Vector length: {vect_length}")
    train_data = {}
    val_data = {}
    feature = 'root_sift' if 'root' in model else 'sift'
    for img, *_, path in train_dataset:

        vlad = VLAD(
            image=img,
            k_means=load_model(rf'{root}/models/pickle_model_files/{model}'),
            flatten=True,
            feature=feature
        ).vector
        if len(vlad) != vect_length:
            raise ValueError(f"Expected {vect_length}, got {len(vlad)}")
        path = os.path.basename(path)
        train_data[path] = vlad

    for img, *_, path in val_dataset:

        vlad = VLAD(
            image=img,
            k_means=load_model(rf'{root}/models/pickle_model_files/{model}'),
            flatten=True,
            feature=feature
        ).vector
        if len(vlad) != vect_length:
            raise ValueError(f"Expected {vect_length}, got {len(vlad)}")
        path = os.path.basename(path)
        val_data[path] = vlad

    model_name = model.replace('.pkl', '')
    save_to_hdf5(rf'{root}/res/vlad/train/{model_name}.h5', train_data)
    save_to_hdf5(rf'{root}/res/vlad/validation/{model_name}.h5', val_data)

Number of clusters: 256, Vector length: 32768
Number of clusters: 64, Vector length: 8192
Number of clusters: 16, Vector length: 2048
Number of clusters: 32, Vector length: 4096
Number of clusters: 16, Vector length: 2048
Number of clusters: 32, Vector length: 4096
Number of clusters: 256, Vector length: 32768
Number of clusters: 64, Vector length: 8192
Number of clusters: 128, Vector length: 16384
Number of clusters: 24, Vector length: 3072
Number of clusters: 128, Vector length: 16384
Number of clusters: 24, Vector length: 3072


# Compute and save Fisher vector matrix in `HD5` format

In [11]:
if not os.path.exists(rf'{root}/res/fisher/train'):
    os.makedirs(rf'{root}/res/fisher/train')

if not os.path.exists(rf'{root}/res/fisher/validation'):
    os.makedirs(rf'{root}/res/fisher/validation')

for model in gmm_model:
    num_clusters = int(re.findall(r'\d+', model)[0])
    vect_length = (2 * 128 * num_clusters + num_clusters) if not 'pca' in model else (
                                                                                                 2 * 128 * num_clusters + num_clusters) // 2
    print(f"Number of clusters: {num_clusters}, Vector length: {vect_length}")
    train_data = {}
    val_data = {}
    feature = 'root_sift' if 'root' in model else 'sift'

    for img, *_, path in train_dataset:

        fisher = FisherVector(
            image=img,
            gmm=load_model(rf'{root}/models/pickle_model_files/{model}'),
            flatten=True,
            feature=feature
        ).vector
        if len(fisher) != vect_length:
            raise ValueError(f"Expected {vect_length}, got {len(fisher)}")
        path = os.path.basename(path)
        train_data[path] = fisher

    for img, *_, path in val_dataset:

        fisher = FisherVector(
            image=img,
            gmm=load_model(rf'{root}/models/pickle_model_files/{model}'),
            flatten=True,
            feature=feature
        ).vector
        if len(fisher) != vect_length:
            raise ValueError(f"Expected {vect_length}, got {len(fisher)}")
        path = os.path.basename(path)
        val_data[path] = fisher

    model_name = model.replace('.pkl', '')
    save_to_hdf5(rf'{root}/res/fisher/train/{model_name}.h5', train_data)
    save_to_hdf5(rf'{root}/res/fisher/validation/{model_name}.h5', val_data)

Number of clusters: 32, Vector length: 8224
Number of clusters: 256, Vector length: 65792
Number of clusters: 256, Vector length: 65792
Number of clusters: 64, Vector length: 16448
Number of clusters: 16, Vector length: 4112
Number of clusters: 24, Vector length: 6168
Number of clusters: 24, Vector length: 6168
Number of clusters: 64, Vector length: 16448
Number of clusters: 32, Vector length: 8224
Number of clusters: 16, Vector length: 4112
Number of clusters: 128, Vector length: 32896
Number of clusters: 128, Vector length: 32896


# SSIM

1. Choosing kernel size for `gaussian_blur` function

Using the empirical rule, the kernel radius should span 3 times the standard deviation. Which means:

```python
kernel_radius = int(3 * sigma)
kernel_size = 2 * kernel_radius + 1 # In order that the kernel is centered around the central pixel
```

In [3]:
train_dataset = ExcavatorDataset(return_type='image+mask+path', purpose='train', transform=TRANSFORMER)
val_dataset = ExcavatorDataset(return_type='image+mask+path', purpose='test', transform=TRANSFORMER)

## A) Compute SSIM Matrix within the dataset

In the code below, the data is first saved as:

```python
{
    'image_paths': List[str],
    'ssim': np.ndarray,
    'ms_ssim': np.ndarray
}
```
because of computational constraints (could takr up to 16 hours/iteration).

In [5]:
output_dir = f'{root}/res/ssim/within_train'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

batch_size = 20
gaussian_sigmas = [i for i in range(0, 12, 2)]  # [0, 2, 4, 6, 8, 10]

for sigma in gaussian_sigmas:
    compute_and_save_ssim_matrices(dataset=train_dataset,
                                   output_dir=output_dir,
                                   batch_size=batch_size,
                                   sigma=sigma)

Kernel size used for sigma=4: 25




KeyboardInterrupt: 

## B) Compute SSIM Matrix between train and validation datasets

In [5]:
output_dir = f'{root}/res/ssim/train_vs_val'
if not os.path.exists(output_dir):
    os.makedirs(output_dir, exist_ok=True)

batch_size = 20
gaussian_sigmas = [i for i in range(0, 12, 2)]  # [0, 2, 4, 6, 8, 10]
for sigma in gaussian_sigmas:
    compute_and_save_ssim_matrices_train_val(train_dataset=train_dataset, # TODO: Fix this method (it currently creates too large files)
                                             val_dataset=val_dataset,
                                             output_dir=output_dir,
                                             batch_size=batch_size,
                                             sigma=sigma)



Computing ssim and ms_ssim with sigma=4 for all pairs (val vs train).
Kernel size used for sigma = 4: 25
Transformer used: Compose(
    ToTensor()
    Resize(size=(640, 640), interpolation=bilinear, max_size=None, antialias=warn)
    GaussianBlur(kernel_size=(25, 25), sigma=(4, 4))
)
All validation paths loaded.
All training paths loaded.


Computing SSIM/MS-SSIM (val vs train): 100%|██████████| 187/187 [3:09:52<00:00, 60.92s/it]  

Saved train-val SSIM and MS-SSIM matrices at C:\workspace\similarity_metrics_of_images/res/ssim/train_vs_val with sigma=4, kernel_size=25.





# C) B, but with grayscale images

In [6]:
output_dir = f'{root}/res/ssim/train_vs_val/grayscale'
if not os.path.exists(output_dir):
    os.makedirs(output_dir, exist_ok=True)

batch_size = 20
gaussian_sigmas = [i for i in range(0, 12, 2)]  # [0, 2, 4, 6, 8, 10]
for sigma in gaussian_sigmas:
    compute_and_save_ssim_matrices_train_val(train_dataset=train_dataset,
                                             val_dataset=val_dataset,
                                             output_dir=output_dir,
                                             grayscale=True,
                                             batch_size=batch_size,
                                             sigma=sigma)

2024-12-27 12:53:14,572 - root - INFO - Training dataset initialized.
2024-12-27 12:53:14,574 - root - INFO - Test dataset initialized.
Computing ssim and ms_ssim with sigma=0 for all pairs (val vs train).
2024-12-27 12:53:14,604 - ExcavatorDataset - INFO - RGB mask detected with shape: torch.Size([3, 640, 640]). Converting to class mask.
2024-12-27 12:53:14,619 - ExcavatorDataset - INFO - Mask converted with new shape: torch.Size([640, 640])
2024-12-27 12:53:14,641 - ExcavatorDataset - INFO - RGB mask detected with shape: torch.Size([3, 640, 640]). Converting to class mask.
2024-12-27 12:53:14,651 - ExcavatorDataset - INFO - Mask converted with new shape: torch.Size([640, 640])
2024-12-27 12:53:14,671 - ExcavatorDataset - INFO - RGB mask detected with shape: torch.Size([3, 640, 640]). Converting to class mask.
2024-12-27 12:53:14,681 - ExcavatorDataset - INFO - Mask converted with new shape: torch.Size([640, 640])
2024-12-27 12:53:14,698 - ExcavatorDataset - INFO - RGB mask detected w

KeyboardInterrupt: 