In [8]:
import torch
from torchvision.models import resnet18

from otdd.pytorch.datasets import load_torchvision_data
from otdd.pytorch.distance import DatasetDistance, FeatureCost

# Load MNIST/CIFAR in 3channels (needed by torchvision models)
loaders_src = load_torchvision_data('CIFAR10', resize=28, maxsize=20)[0]
loaders_tgt = load_torchvision_data('MNIST', resize=28, to3channels=True, maxsize=20)[0]

# Embed using a pretrained (+frozen) resnet
embedder = resnet18(pretrained=True).eval()
embedder.fc = torch.nn.Identity()
for p in embedder.parameters():
    p.requires_grad = False

# Here we use same embedder for both datasets
feature_cost = FeatureCost(src_embedding = embedder,
                           src_dim = (3,28,28),
                           tgt_embedding = embedder,
                           tgt_dim = (3,28,28),
                           p = 2,
                           device='cpu')

dist = DatasetDistance(loaders_src['train'], loaders_tgt['train'],
                          inner_ot_method = 'exact',
                          debiased_loss = True,
                          feature_cost = feature_cost,
                          sqrt_method = 'spectral',
                          sqrt_niters=10,
                          precision='single',
                          p = 2, entreg = 1e-1,
                          device='cpu')

d = dist.distance(maxsamples = 100)
print(f'Embedded OTDD(MNIST,USPS)={d:8.2f}')


Files already downloaded and verified
Files already downloaded and verified
Fold Sizes: 20/20/10000 (train/valid/test)
Fold Sizes: 20/20/10000 (train/valid/test)


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/49 [00:00<?, ?it/s]

Embedded OTDD(MNIST,USPS)=  301.58


: 

In [1]:
from otdd.pytorch.datasets import load_torchvision_data
from otdd.pytorch.distance import DatasetDistance

# Load data
loaders_src  = load_torchvision_data('MNIST', valid_size=0, resize = 28, maxsize=2000)[0]
loaders_tgt  = load_torchvision_data('USPS',  valid_size=0, resize = 28, maxsize=2000)[0]

# Instantiate distance
dist = DatasetDistance(loaders_src['train'], loaders_tgt['train'],
                          inner_ot_method = 'exact',
                          debiased_loss = True,
                          p = 2, entreg = 1e-1,
                          device='cuda:0')

d = dist.distance(maxsamples = 1000)
print(f'OTDD(MNIST,USPS)={d:8.2f}')


[KeOps] Compiling cuda jit compiler engine ... /usr/bin/ld: cannot find -lcuda
collect2: error: ld returned 1 exit status

[KeOps] Compiling cuda jit compiler engine ... /usr/bin/ld: cannot find -lcuda
collect2: error: ld returned 1 exit status

[KeOps] Compiling cuda jit compiler engine ... /usr/bin/ld: cannot find -lcuda
collect2: error: ld returned 1 exit status

[KeOps] Compiling cuda jit compiler engine ... /usr/bin/ld: cannot find -lcuda
collect2: error: ld returned 1 exit status



ot.gpu not found - coupling computation will be in cpu


Fold Sizes: 2000/10000 (train/test)
Downloading https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.bz2 to /home/brian/anaconda3/envs/otdd/lib/python3.8/site-packages/otdd-0.1.0-py3.8.egg/data/usps.bz2


  0%|          | 0/6579383 [00:00<?, ?it/s]

Downloading https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.t.bz2 to /home/brian/anaconda3/envs/otdd/lib/python3.8/site-packages/otdd-0.1.0-py3.8.egg/data/usps.t.bz2


  0%|          | 0/1831726 [00:00<?, ?it/s]

Fold Sizes: 2000/2007 (train/test)


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

OTDD(MNIST,USPS)=  915.93


: 

In [2]:
import geomloss
import torch

cost_routines = {
    1: (lambda x, y: geomloss.utils.distances(x, y)),
    2: (lambda x, y: geomloss.utils.squared_distances(x, y) / 2),
}

# x = torch.rand(10, 3, 28 * 28)
# y = torch.rand(10, 3, 28 * 28)
x = torch.rand(200, 512)
y = torch.rand(200, 512)
d1 = cost_routines[1](x, y)
d1.shape


[KeOps] Compiling cuda jit compiler engine ... /usr/bin/ld: cannot find -lcuda
collect2: error: ld returned 1 exit status

[KeOps] Compiling cuda jit compiler engine ... /usr/bin/ld: cannot find -lcuda
collect2: error: ld returned 1 exit status

[KeOps] Compiling cuda jit compiler engine ... /usr/bin/ld: cannot find -lcuda
collect2: error: ld returned 1 exit status

[KeOps] Compiling cuda jit compiler engine ... /usr/bin/ld: cannot find -lcuda
collect2: error: ld returned 1 exit status



torch.Size([200, 200])

In [22]:
import ot

d2 = ot.dist(x.numpy(), y.numpy(), metric='euclidean')
d2.shape

(200, 200)

In [16]:
geomloss.utils.distances(x, y).shape

torch.Size([200, 200])

In [18]:
cost_func = lambda x, y: torch.Tensor(ot.dist(x.numpy(), y.numpy()), metric='cosine')
distance = geomloss.SamplesLoss(
            loss='hausdorff', p=2,
            cost=geomloss.utils.distances,
            #debias=True,
            blur=0.05)
distance(x, y)


KeyError: None

In [None]:
d2

array([[9.0372095, 9.186621 , 9.289699 , ..., 8.88498  , 9.218829 ,
        8.857074 ],
       [8.895832 , 9.231877 , 9.3643265, ..., 9.310248 , 9.103726 ,
        9.045063 ],
       [8.706479 , 9.464011 , 9.407043 , ..., 9.213037 , 9.366762 ,
        9.10549  ],
       ...,
       [8.787475 , 9.429503 , 9.280655 , ..., 9.435707 , 9.077512 ,
        8.9355755],
       [9.393565 , 9.48104  , 9.532231 , ..., 9.417166 , 9.369076 ,
        9.140358 ],
       [9.34291  , 8.987768 , 9.081922 , ..., 8.796278 , 8.816949 ,
        9.25925  ]], dtype=float32)

In [7]:
import ot

M = ot.dist(x.numpy(), y.numpy(), metric='euclidean')
ot.emd2(ot.unif(M.shape[0]), ot.unif(M.shape[1]), M)

8.703844022750854