In [1]:
!pip install pytorch-adapt

Collecting pytorch-adapt
  Downloading pytorch_adapt-0.0.61-py3-none-any.whl (137 kB)
[?25l[K     |██▍                             | 10 kB 25.8 MB/s eta 0:00:01[K     |████▊                           | 20 kB 30.2 MB/s eta 0:00:01[K     |███████▏                        | 30 kB 34.5 MB/s eta 0:00:01[K     |█████████▌                      | 40 kB 38.8 MB/s eta 0:00:01[K     |████████████                    | 51 kB 26.9 MB/s eta 0:00:01[K     |██████████████▎                 | 61 kB 29.3 MB/s eta 0:00:01[K     |████████████████▊               | 71 kB 30.5 MB/s eta 0:00:01[K     |███████████████████             | 81 kB 31.1 MB/s eta 0:00:01[K     |█████████████████████▍          | 92 kB 33.5 MB/s eta 0:00:01[K     |███████████████████████▉        | 102 kB 32.0 MB/s eta 0:00:01[K     |██████████████████████████▏     | 112 kB 32.0 MB/s eta 0:00:01[K     |████████████████████████████▋   | 122 kB 32.0 MB/s eta 0:00:01[K     |███████████████████████████████ | 133 kB 32

### Setup

In [2]:
import copy
from pprint import pprint

import torch

# Models
G = torch.nn.Linear(1000, 100)
C = torch.nn.Linear(100, 10)
D = torch.nn.Sequential(torch.nn.Linear(100, 1), torch.nn.Flatten(start_dim=0))
G_opt = torch.optim.Adam(G.parameters())
C_opt = torch.optim.Adam(C.parameters())
D_opt = torch.optim.Adam(D.parameters())

dataset_size = 10000
# 1 batch of data
example_data = {
    "src_imgs": torch.randn(32, 1000),
    "target_imgs": torch.randn(32, 1000),
    "src_labels": torch.randint(0, 10, size=(32,)),
    "src_domain": torch.zeros(32),
    "target_domain": torch.zeros(32),
    "src_sample_idx": torch.randint(0, dataset_size, size=(32,)),
    "target_sample_idx": torch.randint(0, dataset_size, size=(32,)),
}


def get_data(keys):
    return {k: example_data[k] for k in keys}

### [Adversarial Discriminative Domain Adaptation](https://arxiv.org/abs/1702.05464) (ADDA)

In [3]:
from pytorch_adapt.hooks import ADDAHook

# make Target model
T = copy.deepcopy(G)
T_opt = torch.optim.Adam(T.parameters())
hook = ADDAHook(g_opts=[T_opt], d_opts=[D_opt])

models = {"G": G, "C": C, "D": D, "T": T}
data = get_data(["src_imgs", "target_imgs", "src_domain", "target_domain"])
_, losses = hook({**models, **data})
pprint(losses)

{'d_loss': {'d_src_domain_loss': 0.6439731121063232,
            'd_target_domain_loss': 0.6785831451416016,
            'total': 0.6612781286239624},
 'g_loss': {'g_target_domain_loss': 0.0, 'total': 0.0}}


### [Larger Norm More Transferable: An Adaptive Feature Norm Approach for Unsupervised Domain Adaptation](https://arxiv.org/abs/1811.07456) (AFN)

In [4]:
from pytorch_adapt.hooks import AFNHook, ClassifierHook

hook = ClassifierHook(opts=[G_opt, C_opt], post=[AFNHook()])

models = {"G": G, "C": C}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'afn_loss': 2.0,
                'c_loss': 2.382333278656006,
                'total': 2.191166639328003}}


### [Domain Adaptation with Auxiliary Target Domain-Oriented Classifier](https://arxiv.org/abs/2007.04171) (ATDOC)

In [5]:
from pytorch_adapt.hooks import ATDOCHook, ClassifierHook

atdoc = ATDOCHook(dataset_size=10000, feature_dim=100, num_classes=10)
hook = ClassifierHook(opts=[G_opt, C_opt], post=[atdoc])

models = {"G": G, "C": C}
data = get_data(["src_imgs", "target_imgs", "src_labels", "target_sample_idx"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 2.233757495880127,
                'pseudo_label_loss': 0.23822198808193207,
                'total': 1.2359896898269653}}


### [Towards Discriminability and Diversity: Batch Nuclear-norm Maximization under Label Insufficient Situations](https://arxiv.org/abs/2003.12237) (BNM)

In [6]:
from pytorch_adapt.hooks import BNMHook, ClassifierHook

hook = ClassifierHook(opts=[G_opt, C_opt], post=[BNMHook()])

models = {"G": G, "C": C}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'bnm_loss': -0.12025494128465652,
                'c_loss': 1.9182664155960083,
                'total': 0.8990057110786438}}


### [Transferability vs. Discriminability: Batch Spectral Penalization for Adversarial Domain Adaptation](http://proceedings.mlr.press/v97/chen19i.html) (BSP)

In [7]:
from pytorch_adapt.hooks import BSPHook, ClassifierHook
from pytorch_adapt.weighters import MeanWeighter

weighter = MeanWeighter(weights={"bsp_loss": 1e-3})
hook = ClassifierHook(opts=[G_opt, C_opt], post=[BSPHook()], weighter=weighter)

models = {"G": G, "C": C}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'bsp_loss': 0.32410016655921936,
                'c_loss': 1.565260648727417,
                'total': 0.944680392742157}}


### [Conditional Adversarial Domain Adaptation](https://arxiv.org/abs/1705.10667) (CDAN)

In [8]:
from pytorch_adapt.hooks import CDANHook
from pytorch_adapt.layers import RandomizedDotProduct

feature_combiner = RandomizedDotProduct(in_dims=[100, 10], out_dim=100)
hook = CDANHook(g_opts=[G_opt, C_opt], d_opts=[D_opt])

models = {"G": G, "C": C, "D": D}
misc = {"feature_combiner": feature_combiner}
data = get_data(
    ["src_imgs", "target_imgs", "src_labels", "src_domain", "target_domain"]
)
_, losses = hook({**models, **misc, **data})
pprint(losses)

{'d_loss': {'d_src_domain_loss': 0.660281777381897,
            'd_target_domain_loss': 0.6792236566543579,
            'total': 0.6697527170181274},
 'g_loss': {'c_loss': 1.2200168371200562,
            'g_src_domain_loss': 0.6587430238723755,
            'g_target_domain_loss': 0.6779651045799255,
            'total': 0.8522416949272156}}


### [Deep CORAL: Correlation Alignment for Deep Domain Adaptation](https://arxiv.org/abs/1607.01719) (CORAL)

In [9]:
from pytorch_adapt.hooks import AlignerPlusCHook
from pytorch_adapt.layers import CORALLoss

hook = AlignerPlusCHook(opts=[G_opt, C_opt], loss_fn=CORALLoss(), softmax=False)

models = {"G": G, "C": C}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 0.9225510954856873,
                'features_confusion_loss': 0.009478135965764523,
                'logits_confusion_loss': 0.00622877012938261,
                'total': 0.3127526640892029}}


### [Domain-Adversarial Training of Neural Networks](https://arxiv.org/abs/1505.07818) (DANN)

In [10]:
from pytorch_adapt.hooks import DANNHook

hook = DANNHook(opts=[G_opt, C_opt, D_opt])

models = {"G": G, "C": C, "D": D}
data = get_data(
    ["src_imgs", "target_imgs", "src_labels", "src_domain", "target_domain"]
)
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 0.6768689751625061,
                'src_domain_loss': 0.6425599455833435,
                'target_domain_loss': 0.683060348033905,
                'total': 0.6674964427947998}}


### [Simultaneous Deep Transfer Across Domains and Tasks](https://arxiv.org/abs/1510.02192) (Domain Confusion)

In [11]:
from pytorch_adapt.hooks import DomainConfusionHook

# D has to output 2 values instead of the usual 1
D_ = torch.nn.Linear(100, 2)
D_opt_ = torch.optim.Adam(D_.parameters())

hook = DomainConfusionHook(g_opts=[G_opt, C_opt], d_opts=[D_opt_])

models = {"G": G, "C": C, "D": D_}
data = get_data(
    ["src_imgs", "target_imgs", "src_labels", "src_domain", "target_domain"]
)
_, losses = hook({**models, **data})
pprint(losses)

{'d_loss': {'d_src_domain_loss': 0.8172264695167542,
            'd_target_domain_loss': 0.8290322422981262,
            'total': 0.8231293559074402},
 'g_loss': {'c_loss': 0.48606035113334656,
            'g_src_domain_loss': 0.7360879778862,
            'g_target_domain_loss': 0.7338228225708008,
            'total': 0.6519904136657715}}


### GAN

In [12]:
from pytorch_adapt.hooks import GANHook

hook = GANHook(g_opts=[G_opt, C_opt], d_opts=[D_opt])

models = {"G": G, "C": C, "D": D}
data = get_data(
    ["src_imgs", "target_imgs", "src_labels", "src_domain", "target_domain"]
)
_, losses = hook({**models, **data})
pprint(losses)

{'d_loss': {'d_src_domain_loss': 0.6861247420310974,
            'd_target_domain_loss': 0.7300451993942261,
            'total': 0.7080849409103394},
 'g_loss': {'c_loss': 0.3431521952152252,
            'g_src_domain_loss': 0.681054413318634,
            'g_target_domain_loss': 0.725422739982605,
            'total': 0.5832098126411438}}


### [Gradually Vanishing Bridge for Adversarial Domain Adaptation](https://arxiv.org/abs/2003.13183) (GVB)

In [13]:
from pytorch_adapt.hooks import GVBHook
from pytorch_adapt.layers import ModelWithBridge

# Discriminator comes after classifier,
# so the input shape is num_classes instead of feature size
D_ = torch.nn.Sequential(torch.nn.Linear(10, 1), torch.nn.Flatten(start_dim=0))

# Add bridges
C_ = ModelWithBridge(C)
D_ = ModelWithBridge(D_)
C_opt_ = torch.optim.Adam(C_.parameters())
D_opt_ = torch.optim.Adam(D_.parameters())

hook = GVBHook(opts=[G_opt, C_opt_, D_opt_])

models = {"G": G, "C": C_, "D": D_}
data = get_data(
    ["src_imgs", "target_imgs", "src_labels", "src_domain", "target_domain"]
)
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 0.33657750487327576,
                'd_src_bridge_loss': 0.04263206571340561,
                'd_target_bridge_loss': 0.024829374626278877,
                'g_src_bridge_loss': 0.49391111731529236,
                'g_target_bridge_loss': 0.4551009237766266,
                'src_domain_loss': 0.5293594002723694,
                'target_domain_loss': 0.5325483083724976,
                'total': 0.34499409794807434}}


### Information Maximization (IM)

In [14]:
from pytorch_adapt.hooks import ClassifierHook, TargetDiversityHook, TargetEntropyHook

hook = ClassifierHook(
    opts=[G_opt, C_opt], post=[TargetEntropyHook(), TargetDiversityHook()]
)

models = {"G": G, "C": C}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 0.16920050978660583,
                'diversity_loss': -2.2921385765075684,
                'entropy_loss': 2.122807025909424,
                'total': -4.3710071622626856e-05}}


### [Information-Theoretical Learning of Discriminative Clusters for Unsupervised Domain Adaptation](https://icml.cc/2012/papers/566.pdf) (ITL)

In [15]:
from pytorch_adapt.hooks import (
    ClassifierHook,
    ISTLossHook,
    TargetDiversityHook,
    TargetEntropyHook,
)

hook = ClassifierHook(
    opts=[G_opt, C_opt],
    post=[ISTLossHook(), TargetEntropyHook(), TargetDiversityHook()],
)

models = {"G": G, "C": C}
data = get_data(
    ["src_imgs", "target_imgs", "src_labels", "src_domain", "target_domain"]
)
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 0.1202351301908493,
                'diversity_loss': -2.2917556762695312,
                'entropy_loss': 2.1037609577178955,
                'ist_loss': -1.8626447051417472e-09,
                'total': -0.016939878463745117}}


### [Deep Transfer Learning with Joint Adaptation Networks](https://arxiv.org/abs/1605.06636) (JMMD)

In [16]:
from pytorch_adapt.hooks import AlignerPlusCHook, JointAlignerHook
from pytorch_adapt.layers import MMDLoss
from pytorch_adapt.layers.utils import get_kernel_scales

kernel_scales = get_kernel_scales(low=-3, high=3, num_kernels=10)
loss_fn = MMDLoss(kernel_scales=kernel_scales)
aligner_hook = JointAlignerHook(loss_fn=loss_fn)
hook = AlignerPlusCHook(opts=[G_opt, C_opt], aligner_hook=aligner_hook)

models = {"G": G, "C": C}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 0.08655861020088196,
                'joint_confusion_loss': 0.12583012878894806,
                'total': 0.10619436949491501}}


### [Minimum Class Confusion for Versatile Domain Adaptation](https://arxiv.org/abs/1912.03699) (MCC)

In [17]:
from pytorch_adapt.hooks import ClassifierHook, MCCHook

hook = ClassifierHook(opts=[G_opt, C_opt], post=[MCCHook()])

models = {"G": G, "C": C}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 0.06311061233282089,
                'mcc_loss': 0.8397989273071289,
                'total': 0.451454758644104}}


### [Maximum Classifier Discrepancy for Unsupervised Domain Adaptation](https://arxiv.org/abs/1712.02560) (MCD)

In [18]:
from pytorch_adapt.hooks import MCDHook
from pytorch_adapt.layers import MultipleModels
from pytorch_adapt.utils import common_functions as c_f

# MCD needs 2 classifiers
C_ = MultipleModels(C, c_f.reinit(copy.deepcopy(C)))
C_opt_ = torch.optim.Adam(C_.parameters())

hook = MCDHook(g_opts=[G_opt], c_opts=[C_opt_])

models = {"G": G, "C": C_}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'x_loss': {'c_loss0': 0.04653286188840866,
            'c_loss1': 2.8394877910614014,
            'total': 1.4430103302001953},
 'y_loss': {'c_loss0': 0.03436530381441116,
            'c_loss1': 2.650433301925659,
            'discrepancy_loss': -0.07384142279624939,
            'total': 0.870319128036499},
 'z_loss': {'discrepancy_loss': 0.075227752327919, 'total': 0.075227752327919}}


### [Learning Transferable Features with Deep Adaptation Networks](https://arxiv.org/abs/1502.02791) (MMD)

In [19]:
from pytorch_adapt.hooks import AlignerPlusCHook
from pytorch_adapt.layers import MMDLoss
from pytorch_adapt.layers.utils import get_kernel_scales

kernel_scales = get_kernel_scales(low=-3, high=3, num_kernels=10)
loss_fn = MMDLoss(kernel_scales=kernel_scales)
hook = AlignerPlusCHook(opts=[G_opt, C_opt], loss_fn=loss_fn)

models = {"G": G, "C": C}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 0.015743529424071312,
                'features_confusion_loss': 0.005411505699157715,
                'logits_confusion_loss': 0.1700156331062317,
                'total': 0.06372355669736862}}


### [Unsupervised Domain Adaptation with Residual Transfer Networks](https://arxiv.org/abs/1602.04433) (RTN)

In [20]:
from pytorch_adapt.hooks import RTNHook
from pytorch_adapt.layers import PlusResidual, RandomizedDotProduct

residual_model = PlusResidual(torch.nn.Linear(10, 10))
feature_combiner = RandomizedDotProduct(in_dims=[100, 10], out_dim=100)

kernel_scales = get_kernel_scales(low=-3, high=3, num_kernels=10)
loss_fn = MMDLoss(kernel_scales=kernel_scales)
hook = RTNHook(opts=[G_opt, C_opt], aligner_loss_fn=loss_fn)

models = {
    "G": G,
    "C": C,
    "residual_model": residual_model,
    "feature_combiner": feature_combiner,
}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'total_loss': {'c_loss': 0.04441383108496666,
                'entropy_loss': 1.8927198648452759,
                'features_confusion_loss': 0.17413365840911865,
                'total': 0.703755795955658}}


### [Stochastic Classifiers for Unsupervised Domain Adaptation](https://xiatian-zhu.github.io/papers/LuEtAl_CVPR2020.pdf) (STAR)

In [21]:
from pytorch_adapt.hooks import MCDHook
from pytorch_adapt.layers import MultipleModels, StochasticLinear

# Use same model twice because the multiple models
# is actually modeled by the distribution learned
# in StochasticLinear
C_ = StochasticLinear(100, 10)
C_ = MultipleModels(C_, C_)
C_opt_ = torch.optim.Adam(C_.parameters())

hook = MCDHook(g_opts=[G_opt], c_opts=[C_opt_])

models = {"G": G, "C": C_}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'x_loss': {'c_loss0': 6.110048294067383,
            'c_loss1': 7.3721818923950195,
            'total': 6.741115093231201},
 'y_loss': {'c_loss0': 5.647284507751465,
            'c_loss1': 5.274874687194824,
            'discrepancy_loss': -0.12871317565441132,
            'total': 3.5978152751922607},
 'z_loss': {'discrepancy_loss': 0.11849485337734222,
            'total': 0.11849485337734222}}


### [Sliced Wasserstein Discrepancy for Unsupervised Domain Adaptation](https://arxiv.org/abs/1903.04064) (SWD)

In [22]:
from pytorch_adapt.hooks import MCDHook
from pytorch_adapt.layers import MultipleModels, SlicedWasserstein
from pytorch_adapt.utils import common_functions as c_f

# MCD needs 2 classifiers
C_ = MultipleModels(C, c_f.reinit(copy.deepcopy(C)))
C_opt_ = torch.optim.Adam(C_.parameters())
loss_fn = SlicedWasserstein(m=128)

hook = MCDHook(g_opts=[G_opt], c_opts=[C_opt_], discrepancy_loss_fn=loss_fn)

models = {"G": G, "C": C_}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'x_loss': {'c_loss0': 0.009857219643890858,
            'c_loss1': 2.4092624187469482,
            'total': 1.2095597982406616},
 'y_loss': {'c_loss0': 0.00883272010833025,
            'c_loss1': 2.2350099086761475,
            'discrepancy_loss': -0.2507260739803314,
            'total': 0.6643721461296082},
 'z_loss': {'discrepancy_loss': 0.1385493129491806,
            'total': 0.1385493129491806}}


### [Domain-Symmetric Networks for Adversarial Domain Adaptation](https://arxiv.org/abs/1904.04663) (SymNets)

In [23]:
from pytorch_adapt.hooks import SymNetsHook
from pytorch_adapt.layers import MultipleModels
from pytorch_adapt.utils import common_functions as c_f

# SymNets needs 2 classifiers
C_ = MultipleModels(C, c_f.reinit(copy.deepcopy(C)))
C_opt_ = torch.optim.Adam(C_.parameters())

hook = SymNetsHook(g_opts=[G_opt], c_opts=[C_opt_])

models = {"G": G, "C": C_}
data = get_data(["src_imgs", "target_imgs", "src_labels"])
_, losses = hook({**models, **data})
pprint(losses)

{'c_loss': {'c_loss0': 0.008523832075297832,
            'c_loss1': 2.7464451789855957,
            'c_symnets_src_domain_loss_0': 0.021570373326539993,
            'c_symnets_target_domain_loss_1': 0.8368582725524902,
            'total': 0.9033494591712952},
 'g_loss': {'g_symnets_target_domain_loss_0': 0.6074486374855042,
            'g_symnets_target_domain_loss_1': 0.8207646608352661,
            'symnets_category_loss': 3.787899971008301,
            'symnets_entropy_loss': 2.089550018310547,
            'total': 1.826415777206421}}


### [A DIRT-T Approach to Unsupervised Domain Adaptation](https://arxiv.org/abs/1802.08735) (VADA)

In [24]:
from pytorch_adapt.hooks import VADAHook

combined_model = torch.nn.Sequential(G, C)
hook = VADAHook(g_opts=[G_opt, C_opt], d_opts=[D_opt])

models = {"G": G, "C": C, "D": D}
misc = {"combined_model": combined_model}
data = get_data(
    ["src_imgs", "target_imgs", "src_labels", "src_domain", "target_domain"]
)
_, losses = hook({**models, **misc, **data})
pprint(losses)

{'d_loss': {'d_src_domain_loss': 0.6249565482139587,
            'd_target_domain_loss': 0.6639358401298523,
            'total': 0.6444461941719055},
 'g_loss': {'c_loss': 0.008804281242191792,
            'entropy_loss': 1.896783471107483,
            'g_src_domain_loss': 0.6186193227767944,
            'g_target_domain_loss': 0.6593835353851318,
            'src_vat_loss': 0.3772188723087311,
            'target_vat_loss': 1.4838342666625977,
            'total': 0.8407739996910095}}
