In [1]:
import sys
import pathlib
import torch

from torch.utils.data import Dataset

In [2]:
sys.path.insert(0, str(pathlib.Path().resolve().parent))

In [7]:
from domain_adaptation.cyclegan.dataloaders.base_dataloader import UnpairedDataset, create_dataloader
from datasets_adapters.fetal_planes_db.fpd_dataset import FetalPlanesDBDataset
from datasets_adapters.fetal_head_circ.fhc_dataset import FetalHeadCircDataset
from quality.mmd import calculate_mmd_from_datasets

In [4]:
device = torch.device('cuda')

In [5]:
dataset_a = FetalPlanesDBDataset(
    root = '/home/jupyter/datasphere/project/ultrasound/datasetss/fetal_planes_db',
    transform = None,
    target_size = (224, 224),
    csv_file = 'FETAL_PLANES_DB_data.csv',
    images_dir = 'Images',
    train = None,
)

dataset_b = FetalHeadCircDataset(
    images_dir = '/home/jupyter/datasphere/project/ultrasound/datasetss/fetal_head_circumference/training_set',
    csv_file = '/home/jupyter/datasphere/project/ultrasound/datasetss/fetal_head_circumference/training_set_pixel_size_and_HC.csv',
    transform = None,
    target_size = (224, 224),
    load_annotations = True
)

Loaded 12400 images from /home/jupyter/datasphere/project/ultrasound/datasetss/fetal_planes_db
Loaded 999 images from /home/jupyter/datasphere/project/ultrasound/datasetss/fetal_head_circumference/training_set
Found 999 annotation images


In [8]:
initial_mmd = calculate_mmd_from_datasets(dataset_a, dataset_b, device=device, image_size=224)

Calculating MMD between two datasets...
Dataset 1: 12400 samples
Dataset 2: 999 samples
Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /tmp/xdg_cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 115MB/s] 



Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [02:36<00:00,  2.47it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [00:10<00:00,  2.97it/s]

Extracted 999 features of dimension 2048

Calculating MMD score...



  dist_sq = sq + sq.T - 2.0 * (subset @ subset.T)
  dist_sq = x_sq + y_sq.T - 2.0 * (x @ y.T)    # [n, m]


In [9]:
print(f'initial MMD: {initial_mmd}')

initial MMD: 0.07260793098959151


In [10]:
from domain_adaptation.cyclegan.train import CycleGANTrainer
from domain_adaptation.cyclegan.model import CycleGAN
from domain_adaptation.cyclegan.dataloaders.translated_dataset import CycleGANTranslatedDataset

In [11]:
# load cyclegan
model = CycleGAN(
    input_channels_a = 1,
    input_channels_b = 1,
    n_residual_blocks = 3,
)
trainer = CycleGANTrainer(
    model=model,
    device=torch.device('cpu'),
    lambda_cycle=10,
    lambda_identity=0.5,
    lr_g=2e-4,
    lr_d=2e-4
)
checkpoint = trainer.load_checkpoint('./checkpoints/cyclegan/best_model.pt')

In [12]:
dataset_a_to_b = CycleGANTranslatedDataset(
    cyclegan_model = model,
    source_dataset = dataset_a,
    device = torch.device('cpu'))

In [13]:
a_to_b_mmd_score = calculate_mmd_from_datasets(dataset_a_to_b, dataset_b, device=device, image_size=224)

Calculating MMD between two datasets...
Dataset 1: 12400 samples
Dataset 2: 999 samples

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [35:07<00:00,  5.43s/it]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [00:06<00:00,  4.87it/s]


Extracted 999 features of dimension 2048

Calculating MMD score...


In [15]:
print(f'a_to_b_mmd_score: {a_to_b_mmd_score}')

a_to_b_mmd_score: 0.05095152098127209


In [14]:
dataset_b_to_a = CycleGANTranslatedDataset(
    cyclegan_model = model,
    source_dataset = dataset_b,
    device = torch.device('cpu'),
    b2a = True)

In [16]:
b_to_a_mmd_score = calculate_mmd_from_datasets(dataset_a, dataset_b_to_a, device=device, image_size=224)

Calculating MMD between two datasets...
Dataset 1: 12400 samples
Dataset 2: 999 samples

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [00:52<00:00,  7.43it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [02:55<00:00,  5.48s/it]

Extracted 999 features of dimension 2048

Calculating MMD score...





In [17]:
print(f'b_to_a_mmd_score: {b_to_a_mmd_score}')

b_to_a_mmd_score: 0.0673692363639522


In [18]:
from datasets_adapters.dogs.dogs_dataset import DogsDataset

In [19]:
doggies = DogsDataset(
    root='/home/jupyter/datasphere/project/ultrasound/datasetss/dogs_dataset',
)

Loaded 494 images across 4 breeds from /home/jupyter/datasphere/project/ultrasound/datasetss/dogs_dataset


In [21]:
a_to_dog_mmd = calculate_mmd_from_datasets(dataset_a, doggies, device=device, image_size=224)

Calculating MMD between two datasets...
Dataset 1: 12400 samples
Dataset 2: 494 samples

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 388/388 [00:51<00:00,  7.54it/s]


Extracted 12400 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 16/16 [00:13<00:00,  1.17it/s]


Extracted 494 features of dimension 2048

Calculating MMD score...


In [22]:
print(f'a_to_dog_mmd: {a_to_dog_mmd}')

a_to_dog_mmd: 0.23284487807675114


In [23]:
b_to_dog_mmd = calculate_mmd_from_datasets(dataset_b, doggies, device=device, image_size=224)

Calculating MMD between two datasets...
Dataset 1: 999 samples
Dataset 2: 494 samples

Extracting features from dataset 1...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 32/32 [00:05<00:00,  5.50it/s]


Extracted 999 features of dimension 2048

Extracting features from dataset 2...
Using device: cuda
Extracting features...


Processing batches: 100%|██████████| 16/16 [00:12<00:00,  1.26it/s]


Extracted 494 features of dimension 2048

Calculating MMD score...


In [24]:
print(f'b_to_dog_mmd: {b_to_dog_mmd}')

b_to_dog_mmd: 0.3094104921466446


In [29]:
!ls -la /home/jupyter/datasphere/project/ultrasound/models

total 589856
drwxr-xr-x 1 jupyter jupyter       268 Feb 12 20:11 .
drwxr-xr-x 1 jupyter jupyter       126 Feb 12 19:54 ..
drwxr-xr-x 1 jupyter jupyter         0 Feb 12 20:10 .ipynb_checkpoints
-rw-r--r-- 1 jupyter jupyter 219404904 Feb 12 20:01 RadImageNet-IRV2_notop.h5
-rw-r--r-- 1 jupyter jupyter 202032207 Feb 12 20:00 RadImageNet-IRV2_notop.h5.zip
-rw-r--r-- 1 jupyter jupyter  94852768 Feb 12 20:11 RadImageNet-ResNet50_notop.h5
-rw-r--r-- 1 jupyter jupyter  87714602 Feb 12 20:10 RadImageNet-ResNet50_notop.h5.zip


Считаем MMD с использованием ImageRadNet предобученных весов

TypeError: calculate_mmd_from_datasets() got an unexpected keyword argument 'model_name'