In [9]:
%reset -f

In [10]:
import pandas as pd
import numpy as np
import os
import yaml
from easydict import EasyDict

import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import normalized_mutual_info_score as NMI

from z_common import config_to_execute
from data_aug.gaussian_blur import GaussianBlur
from clustering import run_clustering
from dataset import get_datasets
from simclr import SimCLR
from log_functions import *

from models.baseline_encoder import Encoder
from models.alexnet_simclr import AlexSimCLR
from models.resnet_simclr import ResNetSimCLR
from loss.nt_xent import NTXentLoss
from functions import *

In [11]:
N_CUDA = 0
PARENT = 'Office31'
DATA_DOMAIN = 'dslr_webcam'
log_dir_opt = f''

config, CONFIG_FILE, log_dir = config_to_execute(DATA_DOMAIN, PARENT, N_CUDA, log_dir_opt)

config.config_path = CONFIG_FILE
config.log_dir = log_dir
config.lap = 1
config.num_lap = 2



In [12]:
config.edls = pd.read_csv(os.path.join(config.log_dir, 'cluster_pca_gmm.csv'), names=['domain_label'], dtype=int).domain_label.values
dataset = get_datasets(config, 'train')
train_loader = torch.utils.data.DataLoader(dataset, batch_size=config.batch_size, shuffle=True, num_workers=2, drop_last=True)


model = Encoder(config.model.ssl, input_dim=3, out_dim=config.model.out_dim).cuda()
nt_xent_criterion = NTXentLoss(config, 'cuda', config.batch_size, **config.loss)

xis, xjs, edls = next(iter(train_loader))
xis, xjs = xis.cuda(), xjs.cuda()
# get the representations and the projections
ris, zis = model(xis)  # [N,C]
rjs, zjs = model(xjs)  # [N,C]
# normalize projection feature vectors
zis = F.normalize(zis, dim=1)
zjs = F.normalize(zjs, dim=1)

loss = nt_xent_criterion(zis, zjs, edls)
loss

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    4.4s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:   10.6s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:   21.0s
[Parallel(n_jobs=4)]: Done 498 out of 498 | elapsed:   23.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  76 tasks      | elapsed:    2.4s
[Parallel(n_jobs=4)]: Done 376 tasks      | elapsed:   12.2s
[Parallel(n_jobs=4)]: Done 788 out of 795 | elapsed:   24.5s remaining:    0.2s
[Parallel(n_jobs=4)]: Done 795 out of 795 | elapsed:   24.6s finished


tensor(5.1084, device='cuda:0', grad_fn=<DivBackward0>)

In [13]:
cosine_similarity = nn.CosineSimilarity(dim=-1)
representations = torch.cat([zjs, zis], dim=0)
similarity_matrix = cosine_similarity(representations.unsqueeze(1), representations.unsqueeze(1))  # 類似度行列の作成. ([2*batch_size, 2*batch_size])
# similarity_matrix

In [14]:
import itertools
config.edls = pd.read_csv(os.path.join(config.log_dir, 'cluster_pca_gmm.csv'), names=['domain_label'], dtype=int).domain_label.values
edl_index = [[i for i, x in enumerate(config.edls) if x == d] for d in np.unique(config.edls)][0]
next(iter(itertools.combinations(edl_index, 2)))

(0, 1)