In [1]:
# Mount Google Drive if necessary (optional)
from google.colab import drive
drive.mount('/content/drive')

# Clone the DANCE repository
!git clone https://github.com/VisionLearningGroup/DANCE.git


Mounted at /content/drive
Cloning into 'DANCE'...
remote: Enumerating objects: 75, done.[K
remote: Total 75 (delta 0), reused 0 (delta 0), pack-reused 75 (from 1)[K
Receiving objects: 100% (75/75), 4.08 MiB | 17.13 MiB/s, done.
Resolving deltas: 100% (30/30), done.


In [2]:
# Navigate to the DANCE directory
%cd /content/DANCE

/content/DANCE


In [3]:
# Create a new pip-compatible requirements file
with open("requirements_pip.txt", "w") as f:
    dependencies = [
        "absl-py",
        "argon2-cffi==20.1.0",
        "astor==0.8.0",
        "astunparse==1.6.3",
        "attrs==19.3.0",
        "bleach==3.2.0",
        "bravado==10.6.2",
        "bravado-core==5.17.0",
        "cachetools==3.1.1",
        "click==7.1.2",
        "easydict==1.9",
        "efficientnet-pytorch==0.7.0",
        "h5py>=3.6.0",
        "joblib==0.13.2",
        "jsonschema==3.2.0",
        "matplotlib>=3.5.0",
        "neptune-client==0.4.120",
        "numpy>=1.22.0",
        "pandas>=1.3.0",
        "pyyaml>=6.0",
        "scikit-learn>=1.0,<1.2",
        "scipy>=1.7.0",
        "seaborn==0.11.0",
        "tensorboard>=2.10,<2.12",
        "tensorboardx==1.8",
        "tensorflow>=2.10,<2.12",
        "torch>=2.0.0",
        "torchvision>=0.15.0",
        "tqdm==4.32.1",
        "xmltodict==0.12.0",
    ]
    f.write("\n".join(dependencies))


In [4]:
!pip install -r requirements_pip.txt

Collecting argon2-cffi==20.1.0 (from -r requirements_pip.txt (line 2))
  Downloading argon2_cffi-20.1.0-cp35-abi3-manylinux1_x86_64.whl.metadata (7.9 kB)
Collecting astor==0.8.0 (from -r requirements_pip.txt (line 3))
  Downloading astor-0.8.0-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting attrs==19.3.0 (from -r requirements_pip.txt (line 5))
  Downloading attrs-19.3.0-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting bleach==3.2.0 (from -r requirements_pip.txt (line 6))
  Downloading bleach-3.2.0-py2.py3-none-any.whl.metadata (22 kB)
Collecting bravado==10.6.2 (from -r requirements_pip.txt (line 7))
  Downloading bravado-10.6.2-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting bravado-core==5.17.0 (from -r requirements_pip.txt (line 8))
  Downloading bravado_core-5.17.0-py2.py3-none-any.whl.metadata (3.7 kB)
Collecting cachetools==3.1.1 (from -r requirements_pip.txt (line 9))
  Downloading cachetools-3.1.1-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting click==7.1.2 (from -r requ

In [5]:
import torch
print("GPU available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")

GPU available: True
GPU Name: NVIDIA A100-SXM4-40GB


In [6]:
# Create the data directory
!mkdir data

# Unzip the dataset into the data directory
!unzip /content/drive/MyDrive/OFFICE31.zip -d ./data

# Move contents from OFFICE31 to the data directory and remove the OFFICE31 folder
!mv ./data/OFFICE31/* ./data
!rm -r ./data/OFFICE31

# Create an "images" folder within each domain directory (Amazon, dslr, webcam)
!mkdir -p ./data/amazon/images
!mkdir -p ./data/dslr/images
!mkdir -p ./data/webcam/images

# Move category folders into the "images" folder for each domain
!mv ./data/amazon/* ./data/amazon/images/ 2>/dev/null
!mv ./data/dslr/* ./data/dslr/images/ 2>/dev/null
!mv ./data/webcam/* ./data/webcam/images/ 2>/dev/null

# Remove any unwanted files or empty directories that may have been left behind
!find ./data -type d -empty -delete


Archive:  /content/drive/MyDrive/OFFICE31.zip
   creating: ./data/OFFICE31/
   creating: ./data/OFFICE31/amazon/
   creating: ./data/OFFICE31/amazon/mobile_phone/
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0028.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0014.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0001.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0015.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0029.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0003.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0017.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0016.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0002.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0006.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0012.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_phone/frame_0013.jpg  
  inflating: ./data/OFFICE31/amazon/mobile_ph

In [7]:
!unzip txt.zip

Archive:  txt.zip
   creating: txt/
  inflating: __MACOSX/._txt          
  inflating: txt/source_amazon_obda.txt  
  inflating: txt/target_webcam_obda.txt  
  inflating: txt/target_dslr_cls.txt  
  inflating: txt/target_Art_cls.txt  
  inflating: txt/source_visda_obda.txt  
  inflating: __MACOSX/txt/._source_visda_obda.txt  
  inflating: txt/source_Clipart_cls.txt  
  inflating: txt/target_visda_univ.txt  
  inflating: __MACOSX/txt/._target_visda_univ.txt  
  inflating: txt/.DS_Store           
  inflating: __MACOSX/txt/._.DS_Store  
  inflating: txt/source_amazon_unive.txt  
  inflating: txt/target_list_cls.txt  
  inflating: __MACOSX/txt/._target_list_cls.txt  
  inflating: txt/source_Clipart_pada.txt  
  inflating: txt/source_dslr_pada.txt  
  inflating: txt/source_Real_pada.txt  
  inflating: txt/source_visda_cls.txt  
  inflating: __MACOSX/txt/._source_visda_cls.txt  
  inflating: txt/source_dslr_opda.txt  
  inflating: txt/target_Product_pada.txt  
  inflating: txt/source_webcam

In [8]:
# Define the file path
file_path = "/content/DANCE/train_dance.py"

# Define the new content to write
new_content = """from __future__ import print_function
import yaml
import easydict
import os
import numpy as np
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.transforms as transforms
from data_loader.get_loader import get_loader
from utils.utils import *
from utils.lr_schedule import inv_lr_scheduler
from utils.loss import *
from models.LinearAverage import LinearAverage
from eval import test
import time
import matplotlib.pyplot as plt

# Training settings

import argparse

parser = argparse.ArgumentParser(description='Pytorch DA',
                                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--config', type=str, default='config.yaml', help='/path/to/config/file')

parser.add_argument('--source_path', type=str, default='./utils/source_list.txt', metavar='B',
                    help='path to source list')
parser.add_argument('--target_path', type=str, default='./utils/target_list.txt', metavar='B',
                    help='path to target list')
parser.add_argument('--log-interval', type=int, default=100, metavar='N',
                    help='how many batches to wait before logging training status')
parser.add_argument('--exp_name', type=str, default='office_close', help='/path/to/config/file')
parser.add_argument("--gpu_devices", type=int, nargs='+', default=None, help="")

args = parser.parse_args()
config_file = args.config
conf = yaml.load(open(config_file), Loader=yaml.FullLoader)
save_config = yaml.load(open(config_file), Loader=yaml.FullLoader)
conf = easydict.EasyDict(conf)
gpu_devices = ','.join([str(id) for id in args.gpu_devices])
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_devices

args.cuda = torch.cuda.is_available()
source_data = args.source_path
target_data = args.target_path
evaluation_data = args.target_path

batch_size = conf.data.dataloader.batch_size
filename = source_data.split("_")[1] + "2" + target_data.split("_")[1]
filename = os.path.join("record", args.exp_name,
                        config_file.replace(".yaml", ""), filename)
if not os.path.exists(os.path.dirname(filename)):
    os.makedirs(os.path.dirname(filename))
print("record in %s " % filename)

data_transforms = {
    source_data: transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    target_data: transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    evaluation_data: transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

use_gpu = torch.cuda.is_available()
source_loader, target_loader, \\
test_loader, target_folder = get_loader(source_data, target_data,
                                        evaluation_data, data_transforms,
                                        batch_size=batch_size, return_id=True,
                                        balanced=conf.data.dataloader.class_balance)
dataset_test = test_loader
n_share = conf.data.dataset.n_share
n_source_private = conf.data.dataset.n_source_private
num_class = n_share + n_source_private

G, C1 = get_model_mme(conf.model.base_model, num_class=num_class,
                      temp=conf.model.temp)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
G.to(device)
C1.to(device)
ndata = target_folder.__len__()

## Memory
lemniscate = LinearAverage(2048, ndata, conf.model.temp, conf.train.momentum).to(device)
params = []
for key, value in dict(G.named_parameters()).items():
    if value.requires_grad and "features" in key:
        if 'bias' in key:
            params += [{'params': [value], 'lr': conf.train.multi,
                        'weight_decay': conf.train.weight_decay}]
        else:
            params += [{'params': [value], 'lr': conf.train.multi,
                        'weight_decay': conf.train.weight_decay}]
    else:
        if 'bias' in key:
            params += [{'params': [value], 'lr': 1.0,
                        'weight_decay': conf.train.weight_decay}]
        else:
            params += [{'params': [value], 'lr': 1.0,
                        'weight_decay': conf.train.weight_decay}]
criterion = torch.nn.CrossEntropyLoss().to(device)

opt_g = optim.SGD(params, momentum=conf.train.sgd_momentum,
                  weight_decay=0.0005, nesterov=True)
opt_c1 = optim.SGD(list(C1.parameters()), lr=1.0,
                   momentum=conf.train.sgd_momentum, weight_decay=0.0005,
                   nesterov=True)
param_lr_g = []
for param_group in opt_g.param_groups:
    param_lr_g.append(param_group["lr"])
param_lr_f = []
for param_group in opt_c1.param_groups:
    param_lr_f.append(param_group["lr"])

def train():
    print('train start!')
    data_iter_s = iter(source_loader)
    data_iter_t = iter(target_loader)
    len_train_source = len(source_loader)
    len_train_target = len(target_loader)

    inference_times = []  # List to store inference times
    steps = []  # List to store step numbers
    loss_source_list = []  # List to store Loss Source values
    loss_nc_list = []  # List to store Loss NC values
    loss_ens_list = []  # List to store Loss ENS values

    for step in range(conf.train.min_step + 1):
        G.train()
        C1.train()
        if step % len_train_target == 0:
            data_iter_t = iter(target_loader)
        if step % len_train_source == 0:
            data_iter_s = iter(source_loader)
        data_t = next(data_iter_t)
        data_s = next(data_iter_s)
        inv_lr_scheduler(param_lr_g, opt_g, step,
                         init_lr=conf.train.lr,
                         max_iter=conf.train.min_step)
        inv_lr_scheduler(param_lr_f, opt_c1, step,
                         init_lr=conf.train.lr,
                         max_iter=conf.train.min_step)
        img_s = data_s[0].to(device)
        label_s = data_s[1].to(device)
        img_t = data_t[0].to(device)
        index_t = data_t[2].to(device)
        if len(img_t) < batch_size:
            break
        if len(img_s) < batch_size:
            break

        # Measure inference time
        start_time = time.time()
        feat_t = G(img_t)
        out_t = C1(feat_t)
        inference_time = time.time() - start_time
        inference_times.append(inference_time)

        opt_g.zero_grad()
        opt_c1.zero_grad()
        ## Weight normalization
        C1.weight_norm()
        ## Source loss calculation
        feat = G(img_s)
        out_s = C1(feat)
        loss_s = criterion(out_s, label_s)

        feat_t = G(img_t)
        out_t = C1(feat_t)
        feat_t = F.normalize(feat_t)
        ### Calculate mini-batch x memory similarity
        feat_mat = lemniscate(feat_t, index_t)
        ### We do not use memory features present in mini-batch
        feat_mat[:, index_t] = -1 / conf.model.temp
        ### Calculate mini-batch x mini-batch similarity
        feat_mat2 = torch.matmul(feat_t,
                                 feat_t.t()) / conf.model.temp
        mask = torch.eye(feat_mat2.size(0),
                         feat_mat2.size(0)).bool().to(device)
        feat_mat2.masked_fill_(mask, -1 / conf.model.temp)
        loss_nc = conf.train.eta * entropy(torch.cat([out_t, feat_mat,
                                                      feat_mat2], 1))
        loss_ent = conf.train.eta * entropy_margin(out_t, conf.train.thr,
                                                   conf.train.margin)
        all_loss = loss_nc + loss_s + loss_ent
        all_loss.backward()
        opt_g.step()
        opt_c1.step()
        opt_g.zero_grad()
        opt_c1.zero_grad()
        lemniscate.update_weight(feat_t, index_t)

        # Log losses and steps
        steps.append(step)
        loss_source_list.append(loss_s.item())
        loss_nc_list.append(loss_nc.item())
        loss_ens_list.append(loss_ent.item())

        if step % conf.train.log_interval == 0:
            print('Train [{}/{} ({:.2f}%)]\\tLoss Source: {:.6f} '
                  'Loss NC: {:.6f} Loss ENS: {:.6f}\\t'.format(
                step, conf.train.min_step,
                100 * float(step / conf.train.min_step),
                loss_s.item(), loss_nc.item(), loss_ent.item()))
            print(f'Inference Time for Step {step}: {inference_time:.6f} seconds')

        if step > 0 and step % conf.test.test_interval == 0:
            test(step, dataset_test, filename, n_share, num_class, G, C1,
                 conf.train.thr)
            G.train()
            C1.train()

    # Calculate and log the average inference time
    avg_time = sum(inference_times) / len(inference_times)
    print(f'Average Inference Time per Batch: {avg_time:.6f} seconds')

    # Plot losses
    plt.figure(figsize=(10, 6))
    plt.plot(steps, loss_source_list, label="Loss Source")
    plt.plot(steps, loss_nc_list, label="Loss NC")
    plt.plot(steps, loss_ens_list, label="Loss ENS")
    plt.xlabel("Training Steps")
    plt.ylabel("Loss Values")
    plt.title("Losses During Training")
    plt.legend()
    plt.grid()
    plt.show()

train()
"""

# Write the content to the file
with open(file_path, "w") as file:
    file.write(new_content)

print(f"File {file_path} has been successfully overwritten!")


File /content/DANCE/train_dance.py has been successfully overwritten!


In [9]:
# Define the file path for txt_edit.py
file_path = "/content/DANCE/utils/txt_edit.py"

# The content to be written to the file
new_content = """
import sys

# Open the input file (list of image paths) and output file
file = open(sys.argv[1], 'r')
file_new = open(sys.argv[2], 'w')

# Read all lines from the input file
files = [line.strip() for line in file.readlines()]

# Process each line
for line in files:
    name = line.split(' ')[0]  # Extract the file path
    ind = line.split(' ')[1]   # Extract the index or label
    # Skip lines containing the 'images' folder
    if '/images' in name:
        continue
    # Replace the dataset path
    name = name.replace('/research/masaito', '/content/DANCE/data')
    file_new.write(f'{name} {ind}\\n')  # Write the corrected line to the output file

# Close the files
file.close()
file_new.close()
"""

# Overwrite the txt_edit.py file
with open(file_path, "w") as file:
    file.write(new_content)

print("txt_edit.py has been updated successfully!")


txt_edit.py has been updated successfully!


In [10]:
# Define the path to the file
file_path = "/content/DANCE/data_loader/get_loader.py"

# Define the new content for the file
new_content = """from .mydataset import ImageFolder
from .unaligned_data_loader import UnalignedDataLoader
from collections import Counter
import os
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler

def sanitize_path(path):
    parts = path.split('/')
    sanitized_parts = [part for part in parts if part != 'images']
    return '/'.join(sanitized_parts)

def get_loader(source_path, target_path, evaluation_path, transforms,
               batch_size=32, return_id=False, balanced=False):
    source_path = sanitize_path(source_path)
    target_path = sanitize_path(target_path)
    evaluation_path = sanitize_path(evaluation_path)

    source_folder = ImageFolder(os.path.join(source_path),
                                transforms[source_path],
                                return_id=return_id)
    target_folder_train = ImageFolder(os.path.join(target_path),
                                      transform=transforms[target_path],
                                      return_paths=False, return_id=return_id)
    eval_folder_test = ImageFolder(os.path.join(evaluation_path),
                                   transform=transforms[evaluation_path],
                                   return_paths=True)
    if balanced:
        freq = Counter(source_folder.labels)
        class_weight = {x: 1.0 / freq[x] for x in freq}
        source_weights = [class_weight[x] for x in source_folder.labels]
        sampler = WeightedRandomSampler(source_weights,
                                        len(source_folder.labels))
        print("use balanced loader")
        source_loader = torch.utils.data.DataLoader(
            source_folder,
            batch_size=batch_size,
            sampler=sampler,
            drop_last=True,
            num_workers=4)
    else:
        source_loader = torch.utils.data.DataLoader(
            source_folder,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True,
            num_workers=4)

    target_loader = torch.utils.data.DataLoader(
        target_folder_train,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True,
        num_workers=4)
    test_loader = torch.utils.data.DataLoader(
        eval_folder_test,
        batch_size=batch_size,
        shuffle=False,
        num_workers=4)

    return source_loader, target_loader, test_loader, target_folder_train

def get_loader_class_inc(source_path, target_path, target_labeled_path, evaluation_path, transforms,
                         batch_size=32, return_id=False, balanced=False):
    source_path = sanitize_path(source_path)
    target_path = sanitize_path(target_path)
    target_labeled_path = sanitize_path(target_labeled_path)
    evaluation_path = sanitize_path(evaluation_path)

    source_folder = ImageFolder(os.path.join(source_path),
                                transforms[source_path],
                                return_id=return_id)
    target_folder_train = ImageFolder(os.path.join(target_path),
                                      transform=transforms[target_path],
                                      return_paths=False, return_id=return_id)
    target_folder_labeled = ImageFolder(os.path.join(target_labeled_path),
                                        transform=transforms[target_labeled_path],
                                        return_paths=False, return_id=return_id)
    eval_folder_test = ImageFolder(os.path.join(evaluation_path),
                                   transform=transforms[evaluation_path],
                                   return_paths=True)
    if balanced:
        freq = Counter(source_folder.labels)
        class_weight = {x: 1.0 / freq[x] for x in freq}
        source_weights = [class_weight[x] for x in source_folder.labels]
        sampler = WeightedRandomSampler(source_weights,
                                        len(source_folder.labels))
        print("use balanced loader")
        source_loader = torch.utils.data.DataLoader(
            source_folder,
            batch_size=batch_size,
            sampler=sampler,
            drop_last=True,
            num_workers=4)
    else:
        source_loader = torch.utils.data.DataLoader(
            source_folder,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True,
            num_workers=4)

    target_loader = torch.utils.data.DataLoader(
        target_folder_train,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True,
        num_workers=4)
    target_labeled_loader = torch.utils.data.DataLoader(
        target_folder_labeled,
        batch_size=batch_size,
        shuffle=True,
        drop_last=False,
        num_workers=1)
    test_loader = torch.utils.data.DataLoader(
        eval_folder_test,
        batch_size=batch_size,
        shuffle=False,
        num_workers=4)

    return source_loader, target_loader, target_labeled_loader, \
           test_loader, target_folder_train

def get_loader_balanced(source_path, target_path, evaluation_path, transforms, batch_size=32):
    source_path = sanitize_path(source_path)
    target_path = sanitize_path(target_path)
    evaluation_path = sanitize_path(evaluation_path)

    source_folder = ImageFolder(os.path.join(source_path),
                                transforms[source_path])
    target_folder_train = ImageFolder(os.path.join(target_path),
                                      transform=transforms[target_path],
                                      return_paths=False)
    eval_folder_test = ImageFolder(os.path.join(evaluation_path),
                                   transform=transforms[evaluation_path],
                                   return_paths=True)
    freq = Counter(source_folder.labels)
    class_weight = {x: 1.0 / freq[x] for x in freq}
    source_weights = [class_weight[x] for x in source_folder.labels]
    sampler = WeightedRandomSampler(source_weights, len(source_folder.labels))

    train_loader = UnalignedDataLoader()
    train_loader.initialize(source_folder, target_folder_train, batch_size, sampler=sampler)

    test_loader = torch.utils.data.DataLoader(
        eval_folder_test,
        batch_size=batch_size,
        shuffle=False,
        num_workers=4)

    return train_loader, test_loader
"""

# Overwrite the file with the new content
with open(file_path, "w") as file:
    file.write(new_content)

print("File overwritten successfully.")


File overwritten successfully.


In [11]:
import os

def rewrite_mydataset(file_path):
    new_code = """import torch.utils.data as data
from PIL import Image
import os
import os.path
import numpy as np

IMG_EXTENSIONS = [
    '.jpg', '.JPG', '.jpeg', '.JPEG',
    '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
]

def find_classes(dir):
    classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
    classes.sort()
    class_to_idx = {classes[i]: i for i in range(len(classes))}
    return classes, class_to_idx

def is_image_file(filename):
    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)

def make_dataset(dir, class_to_idx):
    images = []
    dir = os.path.expanduser(dir)
    for target in os.listdir(dir):
        d = os.path.join(dir, target)
        if not os.path.isdir(d):
            continue

        for root, _, fnames in sorted(os.walk(d)):
            for fname in fnames:
                if is_image_file(fname):
                    path = os.path.join(root, fname)
                    item = (path, class_to_idx[target])
                    images.append(item)

    return images

def default_flist_reader(flist):
    imlist = []
    with open(flist, 'r') as rf:
        for line in rf.readlines():
            impath, imlabel = line.strip().split()
            imlist.append((impath, int(imlabel)))

    return imlist

def default_loader(path):
    return Image.open(path).convert('RGB')

def make_dataset_nolist(image_list):
    with open(image_list) as f:
        image_index = [x.split(' ')[0] for x in f.readlines()]
    with open(image_list) as f:
        label_list = []
        selected_list = []
        for ind, x in enumerate(f.readlines()):
            label = x.split(' ')[1].strip()
            label_list.append(int(label))
            selected_list.append(ind)
        image_index = np.array(image_index)
        label_list = np.array(label_list)
    image_index = image_index[selected_list]
    return image_index, label_list

class ImageFolder(data.Dataset):
    def __init__(self, image_list, transform=None, target_transform=None, return_paths=False,
                 loader=default_loader, train=False, return_id=False):
        imgs, labels = make_dataset_nolist(image_list)
        self.imgs = imgs
        self.labels = labels
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader
        self.return_paths = return_paths
        self.return_id = return_id
        self.train = train

    def __getitem__(self, index):
        path = self.imgs[index]
        target = self.labels[index]
        img = self.loader(path)
        img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)
        if self.return_paths:
            return img, target, path
        elif self.return_id:
            return img, target, index
        else:
            return img, target

    def __len__(self):
        return len(self.imgs)
"""

    # Write the new code to the file
    with open(file_path, "w") as file:
        file.write(new_code)

    print(f"Rewritten {file_path} successfully.")

# Specify the path to `mydataset.py` and rewrite it
file_path = "/content/DANCE/data_loader/mydataset.py"
rewrite_mydataset(file_path)


Rewritten /content/DANCE/data_loader/mydataset.py successfully.


In [12]:
import os

# Define the directory containing the text files
txt_dir = "./txt"

# New root path for the dataset
new_root_path = "/content/DANCE/data"

# Iterate over all files in the txt directory
for txt_file in os.listdir(txt_dir):
    txt_file_path = os.path.join(txt_dir, txt_file)
    if os.path.isfile(txt_file_path):
        try:
            # Read the content of the text file using latin-1 encoding
            with open(txt_file_path, 'r', encoding='latin-1') as file:
                lines = file.readlines()

            # Update each line with the new path
            updated_lines = []
            for line in lines:
                if ' ' in line:  # Ensure the line has the expected format
                    old_path, label = line.strip().split(' ', 1)
                    # Replace the old root path with the new root path
                    new_path = old_path.replace('/research/masaito/office/', f'{new_root_path}/')
                    updated_lines.append(f"{new_path} {label}\n")

            # Write the updated lines back to the text file
            with open(txt_file_path, 'w', encoding='latin-1') as file:
                file.writelines(updated_lines)
            print(f"Updated {txt_file_path} successfully.")
        except UnicodeDecodeError:
            print(f"Skipping {txt_file_path}: Unable to decode.")
        except ValueError:
            print(f"Skipping {txt_file_path}: Unexpected file format.")


Updated ./txt/source_amazon_obda.txt successfully.
Updated ./txt/source_Clipart_univ.txt successfully.
Updated ./txt/source_dslr_unive.txt successfully.
Updated ./txt/.DS_Store successfully.
Updated ./txt/target_webcam_opda.txt successfully.
Updated ./txt/target_Real_cls.txt successfully.
Updated ./txt/source_Real_cls.txt successfully.
Updated ./txt/target_visda_cls.txt successfully.
Updated ./txt/target_list.txt successfully.
Updated ./txt/source_webcam_cls.txt successfully.
Updated ./txt/target_Art_pada.txt successfully.
Updated ./txt/source_visda_pada.txt successfully.
Updated ./txt/target_amazon_obda.txt successfully.
Updated ./txt/target_visda_obda.txt successfully.
Updated ./txt/target_Clipart_cls.txt successfully.
Updated ./txt/target_dslr_cls.txt successfully.
Updated ./txt/source_dslr_pada.txt successfully.
Updated ./txt/target_dslr_opda.txt successfully.
Updated ./txt/source_webcam_opda.txt successfully.
Updated ./txt/source_amazon_opda.txt successfully.
Updated ./txt/target_

# ODA/OBDA (Open-Set Domain Adaptation)

In [13]:
import yaml

# Load the YAML configuration file
config_file_path = "/content/DANCE/configs/office-train-config_ODA.yaml"  # Update this path to where your config file is located

# Read the YAML file
with open(config_file_path, "r") as file:
    config = yaml.safe_load(file)

# Modify the 'min_step' value
config['train']['min_step'] = 2000
# config['data']['dataloader']['data_workers'] = 3

# Write the updated configuration back to the file
with open(config_file_path, "w") as file:
    yaml.dump(config, file)

print(f"Updated 'min_step' to {config['train']['min_step']} in {config_file_path}.")

Updated 'min_step' to 2000 in /content/DANCE/configs/office-train-config_ODA.yaml.


In [14]:
!sh script/run_office_obda.sh 0 /content/DANCE/configs/office-train-config_ODA.yaml


record in /content/DANCE/configs/office-train-config_ODA/amazon2dslr 
use balanced loader
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100% 97.8M/97.8M [00:00<00:00, 226MB/s]
train start!
  p = F.softmax(p)
  p = F.softmax(p)
Inference Time for Step 0: 1.061805 seconds
Inference Time for Step 100: 0.012239 seconds
Inference Time for Step 200: 0.011829 seconds
Inference Time for Step 300: 0.017755 seconds
Inference Time for Step 400: 0.011502 seconds
Inference Time for Step 500: 0.011523 seconds
  out_t = F.softmax(out_t)

Test set including unknown classes:  Accuracy: 275.0/332 (83%)  (0.9405%)

[500, [1.0, 1.0, 1.0, 1.0, 1.0, 0.75, 1.0, 1.0, 1.0, 0.8695652173913043, 0.7257142857142858], 'per class mean acc 0.940479954827781', 0.8283132530120482, 'closed acc 0.4578313253012048']
Inference Time for Step 600: 0.011387 seconds
Inference Time for Step 700: 0.010595 seconds
Inference Time for Step 800: 0

# OPDA (Open-Partial Domain Adaptation)

In [15]:
import yaml

# Load the YAML configuration file
config_file_path = "/content/DANCE/configs/office-train-config_OPDA.yaml"  # Update this path to where your config file is located

# Read the YAML file
with open(config_file_path, "r") as file:
    config = yaml.safe_load(file)

# Modify the 'min_step' value
config['train']['min_step'] = 2000
# config['data']['dataloader']['data_workers'] = 3

# Write the updated configuration back to the file
with open(config_file_path, "w") as file:
    yaml.dump(config, file)

print(f"Updated 'min_step' to {config['train']['min_step']} in {config_file_path}.")

Updated 'min_step' to 2000 in /content/DANCE/configs/office-train-config_OPDA.yaml.


In [16]:
!sh script/run_office_opda.sh 0 /content/DANCE/configs/office-train-config_OPDA.yaml

record in /content/DANCE/configs/office-train-config_OPDA/amazon2dslr 
use balanced loader
train start!
  p = F.softmax(p)
  p = F.softmax(p)
Inference Time for Step 0: 0.414084 seconds
Inference Time for Step 100: 0.021133 seconds
Inference Time for Step 200: 0.026561 seconds
Inference Time for Step 300: 0.026958 seconds
Inference Time for Step 400: 0.019155 seconds
Inference Time for Step 500: 0.023826 seconds
  out_t = F.softmax(out_t)

Test set including unknown classes:  Accuracy: 274.0/332 (83%)  (0.8943%)

[500, [1.0, 1.0, 1.0, 1.0, 0.8, 0.75, 0.7272727272727273, 1.0, 1.0, 0.782608695652174, 0.7771428571428571], 'per class mean acc 0.8942749345516144', 0.8253012048192772, 'closed acc 0.4246987951807229']
Inference Time for Step 600: 0.019602 seconds
Inference Time for Step 700: 0.020230 seconds
Inference Time for Step 800: 0.027355 seconds
Inference Time for Step 900: 0.028386 seconds
Inference Time for Step 1000: 0.020203 seconds

Test set including unknown classes:  Accuracy: 

# CDA (Closed-Set Domain Adaptation)

In [19]:
  # Define the file path
  file_path_eval = "/content/DANCE/eval.py"

  # Define the new content for eval.py
  new_content_eval = """import torch
  import torch.nn.functional as F
  import numpy as np
  import logging

  def test(step, dataset_test, filename, n_share, unk_class, G, C1, adaptive_threshold):
      G.eval()
      C1.eval()
      correct = 0
      correct_close = 0
      size = 0
      class_list = [i for i in range(n_share)]
      class_list.append(unk_class)
      per_class_num = np.zeros((n_share + 1))
      per_class_correct = np.zeros((n_share + 1)).astype(np.float32)
      per_class_correct_cls = np.zeros((n_share + 1)).astype(np.float32)
      all_pred = []
      all_gt = []

      # Ensure adaptive_threshold is converted to a scalar
      if isinstance(adaptive_threshold, torch.Tensor):
          adaptive_threshold = adaptive_threshold.detach().cpu().item()
      elif isinstance(adaptive_threshold, np.ndarray):
          adaptive_threshold = adaptive_threshold.item()

      for batch_idx, data in enumerate(dataset_test):
          with torch.no_grad():
              img_t, label_t, path_t = data[0], data[1], data[2]
              img_t, label_t = img_t.cuda(), label_t.cuda()
              feat = G(img_t)
              out_t = C1(feat)
              out_t = F.softmax(out_t, dim=1)
              eps = 1e-8
              entr = -torch.sum(out_t * torch.log(out_t + eps), dim=1).data.cpu().numpy()
              pred = out_t.data.max(1)[1]
              k = label_t.data.size()[0]
              pred_cls = pred.cpu().numpy()
              pred = pred.cpu().numpy()

              # Use the adaptive threshold
              pred_unk = np.where(entr > adaptive_threshold)
              pred[pred_unk[0]] = unk_class
              all_gt += list(label_t.data.cpu().numpy())
              all_pred += list(pred)
              for i, t in enumerate(class_list):
                  t_ind = np.where(label_t.data.cpu().numpy() == t)
                  correct_ind = np.where(pred[t_ind[0]] == t)
                  correct_ind_close = np.where(pred_cls[t_ind[0]] == i)
                  per_class_correct[i] += float(len(correct_ind[0]))
                  per_class_correct_cls[i] += float(len(correct_ind_close[0]))
                  per_class_num[i] += float(len(t_ind[0]))
                  correct += float(len(correct_ind[0]))
                  correct_close += float(len(correct_ind_close[0]))
              size += k
      # Exclude the last entry (unk_class) for per_class_acc calculation
      per_class_acc = per_class_correct[:-1] / per_class_num[:-1]  # Exclude last element
      close_p = float(per_class_correct_cls[:-1].sum() / per_class_num[:-1].sum())  # Exclude last element
      overall_acc = float(correct / size)

      print(
          '\\nTest set including unknown classes:  Accuracy: {}/{} ({:.0f}%)  '
          '({:.4f}%)\\n'.format(
              correct, size,
              100. * overall_acc, float(per_class_acc.mean())))
      output = [step, list(per_class_acc), 'per class mean acc %s' % float(per_class_acc.mean()),
                overall_acc, 'closed acc %s' % float(close_p)]
      logger = logging.getLogger(__name__)
      logging.basicConfig(filename=filename, format="%(message)s")
      logger.setLevel(logging.INFO)
      print(output)
      logger.info(output)
  """

  # Write the content to the file
  with open(file_path_eval, "w") as file:
      file.write(new_content_eval)

  print(f"File {file_path_eval} has been successfully overwritten!")


File /content/DANCE/eval.py has been successfully overwritten!


In [20]:
import yaml

# Load the YAML configuration file
config_file_path = "/content/DANCE/configs/office-train-config_CDA.yaml"  # Update this path to where your config file is located

# Read the YAML file
with open(config_file_path, "r") as file:
    config = yaml.safe_load(file)

# Modify the 'min_step' value
config['train']['min_step'] = 2000
# config['data']['dataloader']['data_workers'] = 3

# Write the updated configuration back to the file
with open(config_file_path, "w") as file:
    yaml.dump(config, file)

print(f"Updated 'min_step' to {config['train']['min_step']} in {config_file_path}.")

Updated 'min_step' to 2000 in /content/DANCE/configs/office-train-config_CDA.yaml.


In [21]:
!sh script/run_office_cls.sh 0 /content/DANCE/configs/office-train-config_CDA.yaml

record in /content/DANCE/configs/office-train-config_CDA/amazon2dslr 
use balanced loader
train start!
  p = F.softmax(p)
  p = F.softmax(p)
Inference Time for Step 0: 0.424017 seconds
Inference Time for Step 100: 0.011848 seconds
Inference Time for Step 200: 0.011511 seconds
Inference Time for Step 300: 0.011565 seconds
Inference Time for Step 400: 0.011749 seconds
Inference Time for Step 500: 0.011912 seconds

Test set including unknown classes:  Accuracy: 382.0/498 (77%)  (0.8028%)

[500, [1.0, 1.0, 1.0, 0.75, 1.0, 1.0, 1.0, 0.7857142857142857, 0.6, 1.0, 1.0, 0.8, 0.7083333333333334, 0.625, 0.25806451612903225, 0.7272727272727273, 0.9166666666666666, 0.875, 1.0, 1.0, 0.9230769230769231, 1.0, 0.6086956521739131, 0.1111111111111111, 0.3, 1.0, 1.0, 0.6153846153846154, 0.6666666666666666, 0.6818181818181818, 0.9333333333333333], 'per class mean acc 0.8027786455703483', 0.7670682730923695, 'closed acc 0.8654618473895582']
Inference Time for Step 600: 0.016324 seconds
Inference Time for S

# PDA (Partial Domain Adaptation)

In [28]:
  # Define the file path
  file_path_eval = "/content/DANCE/eval.py"

  # Define the new content for eval.py
  new_content_eval = """import torch
  import torch.nn.functional as F
  import numpy as np
  import logging

  def test(step, dataset_test, filename, n_share, unk_class, G, C1, adaptive_threshold):
      G.eval()
      C1.eval()
      correct = 0
      correct_close = 0
      size = 0
      class_list = [i for i in range(n_share)]
      class_list.append(unk_class)
      per_class_num = np.zeros((n_share + 1))
      per_class_correct = np.zeros((n_share + 1)).astype(np.float32)
      per_class_correct_cls = np.zeros((n_share + 1)).astype(np.float32)
      all_pred = []
      all_gt = []

      # Ensure adaptive_threshold is converted to a scalar
      if isinstance(adaptive_threshold, torch.Tensor):
          adaptive_threshold = adaptive_threshold.detach().cpu().item()
      elif isinstance(adaptive_threshold, np.ndarray):
          adaptive_threshold = adaptive_threshold.item()

      for batch_idx, data in enumerate(dataset_test):
          with torch.no_grad():
              img_t, label_t, path_t = data[0], data[1], data[2]
              img_t, label_t = img_t.cuda(), label_t.cuda()
              feat = G(img_t)
              out_t = C1(feat)
              out_t = F.softmax(out_t, dim=1)
              eps = 1e-8
              entr = -torch.sum(out_t * torch.log(out_t + eps), dim=1).data.cpu().numpy()
              pred = out_t.data.max(1)[1]
              k = label_t.data.size()[0]
              pred_cls = pred.cpu().numpy()
              pred = pred.cpu().numpy()

              # Use the adaptive threshold
              pred_unk = np.where(entr > adaptive_threshold)
              pred[pred_unk[0]] = unk_class
              all_gt += list(label_t.data.cpu().numpy())
              all_pred += list(pred)
              for i, t in enumerate(class_list):
                  t_ind = np.where(label_t.data.cpu().numpy() == t)
                  correct_ind = np.where(pred[t_ind[0]] == t)
                  correct_ind_close = np.where(pred_cls[t_ind[0]] == i)
                  per_class_correct[i] += float(len(correct_ind[0]))
                  per_class_correct_cls[i] += float(len(correct_ind_close[0]))
                  per_class_num[i] += float(len(t_ind[0]))
                  correct += float(len(correct_ind[0]))
                  correct_close += float(len(correct_ind_close[0]))
              size += k
      # Exclude the last entry (unk_class) for per_class_acc calculation
      per_class_acc = per_class_correct[:-1] / per_class_num[:-1]  # Exclude last element
      close_p = float(per_class_correct_cls[:-1].sum() / per_class_num[:-1].sum())  # Exclude last element
      overall_acc = float(correct / size)

      print(
          '\\nTest set including unknown classes:  Accuracy: {}/{} ({:.0f}%)  '
          '({:.4f}%)\\n'.format(
              correct, size,
              100. * overall_acc, float(per_class_acc.mean())))
      output = [step, list(per_class_acc), 'per class mean acc %s' % float(per_class_acc.mean()),
                overall_acc, 'closed acc %s' % float(close_p)]
      logger = logging.getLogger(__name__)
      logging.basicConfig(filename=filename, format="%(message)s")
      logger.setLevel(logging.INFO)
      print(output)
      logger.info(output)
  """

  # Write the content to the file
  with open(file_path_eval, "w") as file:
      file.write(new_content_eval)

  print(f"File {file_path_eval} has been successfully overwritten!")


File /content/DANCE/eval.py has been successfully overwritten!


In [29]:
import yaml

# Load the YAML configuration file
config_file_path = "/content/DANCE/configs/office-train-config_PDA.yaml"  # Update this path to where your config file is located

# Read the YAML file
with open(config_file_path, "r") as file:
    config = yaml.safe_load(file)

# Modify the 'min_step' value
config['train']['min_step'] = 2000
# config['data']['dataloader']['data_workers'] = 3

# Write the updated configuration back to the file
with open(config_file_path, "w") as file:
    yaml.dump(config, file)

print(f"Updated 'min_step' to {config['train']['min_step']} in {config_file_path}.")

Updated 'min_step' to 2000 in /content/DANCE/configs/office-train-config_PDA.yaml.


In [30]:
!sh script/run_office_cls.sh 0 /content/DANCE/configs/office-train-config_PDA.yaml

record in /content/DANCE/configs/office-train-config_PDA/amazon2dslr 
use balanced loader
train start!
  p = F.softmax(p)
  p = F.softmax(p)
Inference Time for Step 0: 0.371845 seconds
Inference Time for Step 100: 0.011080 seconds
Inference Time for Step 200: 0.015028 seconds
Inference Time for Step 300: 0.012104 seconds
Inference Time for Step 400: 0.011713 seconds
Inference Time for Step 500: 0.018039 seconds

Test set including unknown classes:  Accuracy: 140.0/498 (28%)  (0.8981%)

[500, [1.0, 1.0, 1.0, 0.75, 1.0, 0.9166666666666666, 1.0, 0.7142857142857143, 0.6, 1.0], 'per class mean acc 0.8980952380952381', 0.28112449799196787, 'closed acc 0.9675324675324676']
Inference Time for Step 600: 0.018531 seconds
Inference Time for Step 700: 0.013242 seconds
Inference Time for Step 800: 0.011812 seconds
Inference Time for Step 900: 0.012080 seconds
Inference Time for Step 1000: 0.011615 seconds

Test set including unknown classes:  Accuracy: 139.0/498 (28%)  (0.8886%)

[1000, [1.0, 1.0, 

In [None]:
pwd

'/content/DANCE'

In [None]:
cd ..

/content/DANCE
