В этом ноутбуке посчитаем FID и MMD на основании эмбеддингов, высчитанных на основе ImageRadNet весов

In [4]:
import sys
import pathlib

In [5]:
sys.path.insert(0, str(pathlib.Path().resolve().parent))

In [6]:

import torch

from torch.utils.data import Dataset

from domain_adaptation.cyclegan.dataloaders.base_dataloader import UnpairedDataset, create_dataloader
from datasets_adapters.fetal_planes_db.fpd_dataset import FetalPlanesDBDataset
from datasets_adapters.fetal_head_circ.fhc_dataset import FetalHeadCircDataset
from quality.mmd import calculate_mmd_from_datasets

from domain_adaptation.cyclegan.train import CycleGANTrainer
from domain_adaptation.cyclegan.model import CycleGAN
from domain_adaptation.cyclegan.dataloaders.translated_dataset import CycleGANTranslatedDataset

In [7]:
device = torch.device('cuda')

In [8]:
dataset_a = FetalPlanesDBDataset(
    root = '/home/jupyter/datasphere/project/ultrasound/datasetss/fetal_planes_db',
    transform = None,
    target_size = (224, 224),
    csv_file = 'FETAL_PLANES_DB_data.csv',
    images_dir = 'Images',
    train = None,
)

dataset_b = FetalHeadCircDataset(
    images_dir = '/home/jupyter/datasphere/project/ultrasound/datasetss/fetal_head_circumference/training_set',
    csv_file = '/home/jupyter/datasphere/project/ultrasound/datasetss/fetal_head_circumference/training_set_pixel_size_and_HC.csv',
    transform = None,
    target_size = (224, 224),
    load_annotations = True
)

Loaded 12400 images from /home/jupyter/datasphere/project/ultrasound/datasetss/fetal_planes_db
Loaded 999 images from /home/jupyter/datasphere/project/ultrasound/datasetss/fetal_head_circumference/training_set
Found 999 annotation images


In [9]:
# load cyclegan
model = CycleGAN(
    input_channels_a = 1,
    input_channels_b = 1,
    n_residual_blocks = 3,
)
trainer = CycleGANTrainer(
    model=model,
    device=torch.device('cpu'),
    lambda_cycle=10,
    lambda_identity=0.5,
    lr_g=2e-4,
    lr_d=2e-4
)
checkpoint = trainer.load_checkpoint('./checkpoints/cyclegan/best_model.pt')

In [10]:
initial_mmd_radnet = calculate_mmd_from_datasets(
    dataset_a, 
    dataset_b, 
    device=device, 
    image_size=224, 
    model_name='radimagenet', 
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5'
)

Calculating MMD between two datasets...
Dataset 1: 12400 samples
Dataset 2: 999 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [02:34<00:00,  2.51it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [00:10<00:00,  2.97it/s]

Extracted 999 features of dimension 2048

Calculating MMD score...



  dist_sq = sq + sq.T - 2.0 * (subset @ subset.T)
  dist_sq = x_sq + y_sq.T - 2.0 * (x @ y.T)    # [n, m]


In [11]:
print(f'initial_mmd_radnet: {initial_mmd_radnet}')

initial_mmd_radnet: 0.01559159228898821


In [15]:
dataset_a_to_b = CycleGANTranslatedDataset(
    cyclegan_model = model,
    source_dataset = dataset_a,
    device = device)

In [17]:
a_to_b_mmd_score = calculate_mmd_from_datasets(dataset_a_to_b,
    dataset_b, 
    device=device, 
    image_size=224, 
    model_name='radimagenet', 
    num_workers=0,
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5'
)

Calculating MMD between two datasets...
Dataset 1: 12400 samples
Dataset 2: 999 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [04:12<00:00,  1.54it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [00:15<00:00,  2.08it/s]


Extracted 999 features of dimension 2048

Calculating MMD score...


In [18]:
print(f'a_to_b_mmd_score: {a_to_b_mmd_score}')

a_to_b_mmd_score: 0.05774889886616075


In [19]:
dataset_b_to_a = CycleGANTranslatedDataset(
    cyclegan_model = model,
    source_dataset = dataset_b,
    device = torch.device('cpu'),
    b2a = True)

In [20]:
b_to_a_mmd_score = calculate_mmd_from_datasets(dataset_a, dataset_b_to_a, device=device, 
    image_size=224, 
    model_name='radimagenet', 
    num_workers=0,
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5')

Calculating MMD between two datasets...
Dataset 1: 12400 samples
Dataset 2: 999 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [02:27<00:00,  2.63it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [03:41<00:00,  6.93s/it]

Extracted 999 features of dimension 2048

Calculating MMD score...





In [21]:
print(f'b_to_a_mmd_score: {b_to_a_mmd_score}')

b_to_a_mmd_score: 0.0063262735335456455


In [22]:
from datasets_adapters.dogs.dogs_dataset import DogsDataset
doggies = DogsDataset(
    root='/home/jupyter/datasphere/project/ultrasound/datasetss/dogs_dataset',
)

Loaded 494 images across 4 breeds from /home/jupyter/datasphere/project/ultrasound/datasetss/dogs_dataset


In [23]:
a_to_dog_mmd = calculate_mmd_from_datasets(dataset_a, doggies, device=device, image_size=224, model_name='radimagenet', 
    num_workers=0,
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5')

Calculating MMD between two datasets...
Dataset 1: 12400 samples
Dataset 2: 494 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [02:24<00:00,  2.69it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 16/16 [00:25<00:00,  1.62s/it]

Extracted 494 features of dimension 2048

Calculating MMD score...





In [24]:
print(f'a_to_dog_mmd: {a_to_dog_mmd}')

a_to_dog_mmd: 0.5024962468843472


In [25]:
b_to_dog_mmd = calculate_mmd_from_datasets(doggies, dataset_b, device=device, image_size=224, model_name='radimagenet', 
    num_workers=0,
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5')

Calculating MMD between two datasets...
Dataset 1: 494 samples
Dataset 2: 999 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 16/16 [00:21<00:00,  1.35s/it]


Extracted 494 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [00:14<00:00,  2.19it/s]

Extracted 999 features of dimension 2048

Calculating MMD score...





In [26]:
print(f'b_to_dog_mmd: {b_to_dog_mmd}')

b_to_dog_mmd: 0.578191125702202


In [27]:
from quality.fid import calculate_fid_from_datasets

In [28]:
initial_fid = calculate_fid_from_datasets(dataset_a, 
    dataset_b, 
    device=device, 
    image_size=224, 
    model_name='radimagenet', 
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5'
)

Calculating FID between two datasets...
Dataset 1: 12400 samples
Dataset 2: 999 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [00:52<00:00,  7.33it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [00:06<00:00,  5.29it/s]

Extracted 999 features of dimension 2048

Calculating FID score...





In [30]:
print(f'initial_fid: {initial_fid}')

initial_fid: 372.19697013961377


In [31]:
a_to_b_fid_score = calculate_fid_from_datasets(dataset_a_to_b,
    dataset_b, 
    device=device, 
    image_size=224, 
    model_name='radimagenet', 
    num_workers=0,
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5'
)

Calculating FID between two datasets...
Dataset 1: 12400 samples
Dataset 2: 999 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [04:05<00:00,  1.58it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [00:15<00:00,  2.06it/s]


Extracted 999 features of dimension 2048

Calculating FID score...


In [32]:
print(f'a_to_b_fid_score: {a_to_b_fid_score}')

a_to_b_fid_score: 1195.5379834110472


In [33]:
b_to_a_fid_score = calculate_fid_from_datasets(dataset_a, dataset_b_to_a, device=device, 
    image_size=224, 
    model_name='radimagenet', 
    num_workers=0,
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5')

Calculating FID between two datasets...
Dataset 1: 12400 samples
Dataset 2: 999 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [02:32<00:00,  2.54it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [03:39<00:00,  6.86s/it]

Extracted 999 features of dimension 2048

Calculating FID score...





In [34]:
print(f'b_to_a_fid_score: {b_to_a_fid_score}')

b_to_a_fid_score: 340.51013811241137


In [35]:
a_to_dog_fid = calculate_fid_from_datasets(dataset_a, doggies, device=device, image_size=224, model_name='radimagenet', 
    num_workers=0,
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5')

Calculating FID between two datasets...
Dataset 1: 12400 samples
Dataset 2: 494 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [02:30<00:00,  2.57it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 16/16 [00:23<00:00,  1.44s/it]

Extracted 494 features of dimension 2048

Calculating FID score...





In [36]:
print(f'a_to_dog_fid: {a_to_dog_fid}')

a_to_dog_fid: 31024.44709772394


In [37]:
b_to_dog_fid = calculate_fid_from_datasets(doggies, dataset_b, device=device, image_size=224, model_name='radimagenet', 
    num_workers=0,
    weights_path='/home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop.h5')

Calculating FID between two datasets...
Dataset 1: 494 samples
Dataset 2: 999 samples
Feature extractor: radimagenet
Loading cached PyTorch weights from /home/jupyter/datasphere/project/ultrasound/models/RadImageNet-ResNet50_notop_pytorch.pt

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 16/16 [00:22<00:00,  1.40s/it]


Extracted 494 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [00:14<00:00,  2.15it/s]

Extracted 999 features of dimension 2048

Calculating FID score...





In [38]:
print(f'b_to_dog_fid: {b_to_dog_fid}')

b_to_dog_fid: 33466.152814135596
