In [3]:
import time
import random
import itertools
import collections
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

from otmtd.utils.distance import WTE
from scipy.spatial import distance


class EmbeddingDataset(Dataset):
    def __init__(self, task, text_path, train_embs_path, valid_embs_path=None, pt_label='domain'):
        embs, labels, unique_labels = self.load_embs_and_labels(task, text_path, train_embs_path, valid_embs_path, pt_label)
        if task != 'pre_train':
            print("{} unique labels: {}".format(task.capitalize(), len(unique_labels)))
        else:
            print("PRE-TRAIN {} unique labels: {}".format(pt_label, len(unique_labels)))
        counter = collections.Counter(labels)
        sorted_counter = dict(sorted(counter.items(), key=lambda kv: kv[1], reverse=True))
        print(sorted_counter)
        self.unique_labels = unique_labels

        self.embs_and_labels = pd.concat([embs, labels], axis=1)
        # 创建otdd需求的Dataset属性
        self.classes = [str(k) for k in unique_labels] # list of unique labels string
        self.targets = torch.tensor(list(labels))

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        embedding = self.embs_and_labels.iloc[idx]['pro_emb']
        label = self.embs_and_labels.iloc[idx]['label']
        return torch.tensor(embedding).reshape(1, -1, 1), torch.tensor(label)

    def load_embs_and_labels(self, task, text_path, train_embs_path, valid_embs_path, pt_label):
        texts = pd.read_csv(text_path)
        if task=='pre_train':
            texts.rename(columns={pt_label: 'label'}, inplace=True)
        labels = texts['label']
        unique_labels = labels.unique()

        if task=='pre_train' or task=='kinase': # pre_train / kinase, only `train` set
            embs = pd.read_pickle(train_embs_path)
        else:
            train_embs, valid_embs = pd.read_pickle(train_embs_path), pd.read_pickle(valid_embs_path)
            embs = pd.concat([train_embs, valid_embs], axis=0)

        selected_ids = texts['uniprot_id'].tolist()
        selected_embs_flag = embs['pro_id'].map(lambda x: True if x in selected_ids else False)
        embs = embs[selected_embs_flag].reset_index(drop=True)
        embs = embs['pro_emb']

        return embs, labels, unique_labels
    
def set_random_seed(SEED):
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)

def cal_combs_distances_mean(all_pt_tasks, t2t_distances_list):
    mean_t2t_distances = []
    for pt_task in all_pt_tasks:
        spec_pt_task_distance = []
        for t2t_distances in t2t_distances_list:
            if pt_task in t2t_distances['pt_task'].tolist():
                spec_pt_task_distance.append(t2t_distances[t2t_distances['pt_task']==pt_task].to_numpy()[:, :-1].reshape(1, -1))
        spec_pt_task_distance = np.concatenate(spec_pt_task_distance, axis=0).astype(np.float32).mean(axis=0)
        mean_t2t_distances.append(spec_pt_task_distance)
    mean_t2t_distances = np.stack(mean_t2t_distances, axis=0)
    return mean_t2t_distances

In [5]:
return_t2t = True # 同时计算task-to-task distance (for PT and FT)
num_samples = 1000
TaskCombs = ['MLM+RMD', 'GO+RMD', 'MLM+GO+D', 'RMD']
pt_tasks_combs = [
    ['mlm', 'domain', 'motif', 'region'],
    ['domain', 'motif', 'region', 'go'],
    ['mlm', 'domain', 'go'],
    ['domain', 'motif', 'region']
]

device = torch.device('cpu')
# generate reference
feat_dim, lbl_emb_dim, ref_size = 512, 10, 200
seed = 1145114
set_random_seed(seed)
reference = torch.randn(ref_size, feat_dim + lbl_emb_dim, dtype=float, device=device).float()

wte_distance, wte_t2t_distances = [], []
texts_base_dir = "/home/brian/work/OTMTD_GH/processed_data"
embs_base_dir = "/home/brian/work/OTMTD_GH/protein_embeddings_MultiTasks"
for i, comb in enumerate(TaskCombs):
    print("Start computing for Pretrain-MultiTask: {}".format(comb))
    # 根据multi-tasks combination构建对应的embdding hub
    tasks_texts_embs_hub = {
        'pre_train': ["{}/pre_train/sampling_set.txt".format(texts_base_dir),
                        "{}/pre_train_combs/pre_train_{}_pro_embs_pt.pkl".format(embs_base_dir, comb)],
        'stability': ["{}/stability/sequence_go_label_{}.txt".format(texts_base_dir, num_samples),
                        "{}/stability/{}->stability_pretrain_pro_embs_train.pkl".format(embs_base_dir, comb),
                        "{}/stability/{}->stability_pretrain_pro_embs_valid.pkl".format(embs_base_dir, comb)],
        'fluoresecence': ["{}/fluorescence/sequence_go_label_{}.txt".format(texts_base_dir, num_samples),
                        "{}/fluorescence/{}->fluorescence_pretrain_pro_embs_train.pkl".format(embs_base_dir, comb),
                        "{}/fluorescence/{}->fluorescence_pretrain_pro_embs_valid.pkl".format(embs_base_dir, comb)],
        'remote_homology': ["{}/remote_homology/sequence_go_label_{}.txt".format(texts_base_dir, num_samples),
                            "{}/remote_homology/{}->remote_homology_pretrain_pro_embs_train.pkl".format(embs_base_dir, comb),
                            "{}/remote_homology/{}->remote_homology_pretrain_pro_embs_valid.pkl".format(embs_base_dir, comb)], 
        'secondary_structrue': ["{}/secondary_structure/sequence_go_label_{}.txt".format(texts_base_dir, num_samples),
                                "{}/secondary_structure/{}->secondary_structure_pretrain_pro_embs_train.pkl".format(embs_base_dir, comb),
                                "{}/secondary_structure/{}->secondary_structure_pretrain_pro_embs_valid.pkl".format(embs_base_dir, comb)],
        'pdbbind': ["{}/pdbbind/sequence_go_label_{}.txt".format(texts_base_dir, num_samples),
                    "{}/pdbbind/{}->pdbbind_pretrain_pro_embs_train.pkl".format(embs_base_dir, comb),
                    "{}/pdbbind/{}->pdbbind_pretrain_pro_embs_dev.pkl".format(embs_base_dir, comb)],
        'kinase': ["{}/kinase/sequence_go_label_{}.txt".format(texts_base_dir, num_samples),
                    "{}/kinase/{}->kinase_pretrain_pro_embs_train.pkl".format(embs_base_dir, comb)]
    }

    pt_datasets, pt_class_nums, pt_names = [], [], []
    for label_task in pt_tasks_combs[i]:
        dataset = EmbeddingDataset('pre_train', *tasks_texts_embs_hub['pre_train'], pt_label=label_task)
        pt_datasets.append(dataset)
        pt_class_nums.append(len(dataset.unique_labels))
        pt_names.append(label_task)

    ft_datasets, ft_class_nums, ft_names = [], [], []
    for task in ['stability', 'fluoresecence', 'remote_homology', 'secondary_structrue', 'pdbbind', 'kinase']:
        dataset = EmbeddingDataset(task, *tasks_texts_embs_hub[task])
        ft_datasets.append(dataset)
        ft_class_nums.append(len(dataset.unique_labels))
        ft_names.append(task.capitalize())

    pt_tasks_dict = {task: i for i, task in enumerate(pt_tasks_combs[i])}
    MultiTask_WTE = WTE(lbl_emb_dim, device, pt_class_nums=np.array(pt_class_nums),
                        ft_class_nums=np.array(ft_class_nums), pt_names=pt_names, ft_names=ft_names,
                        pt_tasks_combs=[pt_tasks_combs[i]], pt_tasks_dict=pt_tasks_dict, gaussian_assumption=True)
    # pt_task_comb_embs, ft_task_embs = MultiTask_WTE.cwte(pt_datasets, ft_datasets, reference)
    pt_task_comb_embs, ft_task_embs, pt_task_sole_embs = MultiTask_WTE.cwte(pt_datasets, ft_datasets, reference, return_t2t=return_t2t)
    pt_task_comb_vecs = pt_task_comb_embs.reshape(1, -1) # each comb each time
    pt_task_sole_vecs = pt_task_sole_embs.reshape(len(pt_datasets), -1) # (PT dataset_num, ref_size * (feat_dim+lbl_emb_dim))
    ft_task_vecs = ft_task_embs.reshape(ft_task_embs.shape[0], -1) # (dataset_num, ref_size * (feat_dim+lbl_emb_dim)) flatten operation

    wte_distance_row = distance.cdist(pt_task_comb_vecs, ft_task_vecs, 'euclidean') # (dataset_num, dataset_num)
    wte_distance.append(wte_distance_row)
    wte_sole_distance = distance.cdist(pt_task_sole_vecs, ft_task_vecs, 'euclidean')
    wte_sole_distance = pd.DataFrame(wte_sole_distance, columns=ft_names)
    wte_sole_distance['pt_task'] = pt_names
    wte_t2t_distances.append(wte_sole_distance)
    print(wte_distance_row.shape, '\n', '==== x ==== ' * 5)
    
wte_distance = np.concatenate(wte_distance, axis=0)
wte_t2t_distances_ = cal_combs_distances_mean(['mlm', 'domain', 'motif', 'region', 'go'], wte_t2t_distances)
np.save("./results(all)/wte_distance_20bins_{}_mlm.npy".format(num_samples), wte_distance)
np.save("./results(all)/wte_t2t_distances.npy", wte_t2t_distances_)

Start computing for Pretrain-MultiTask: MLM+RMD
PRE-TRAIN mlm unique labels: 15
{2: 148, 10: 117, 6: 116, 8: 101, 9: 98, 4: 95, 5: 59, 14: 54, 1: 49, 3: 47, 7: 46, 12: 45, 0: 45, 11: 13, 13: 12}
PRE-TRAIN domain unique labels: 13
{0: 173, 1: 165, 2: 121, 3: 89, 4: 88, 5: 64, 6: 59, 7: 53, 8: 50, 9: 47, 10: 46, 11: 45, 12: 45}
PRE-TRAIN motif unique labels: 5
{0: 843, 1: 59, 2: 50, 3: 47, 4: 46}
PRE-TRAIN region unique labels: 10
{0: 202, 1: 173, 2: 165, 3: 121, 4: 89, 5: 88, 6: 64, 7: 53, 8: 45, 9: 45}
Stability unique labels: 12
{0: 101, 1: 87, 2: 83, 3: 57, 4: 53, 5: 34, 6: 27, 7: 20, 8: 14, 9: 11, 10: 7, 11: 6}
Fluoresecence unique labels: 12
{0: 142, 1: 83, 2: 80, 3: 48, 4: 38, 5: 35, 6: 20, 7: 20, 8: 14, 9: 9, 10: 6, 11: 5}
Remote_homology unique labels: 16
{0: 136, 1: 126, 2: 96, 3: 26, 4: 20, 5: 14, 6: 11, 7: 11, 8: 9, 9: 9, 10: 8, 11: 8, 12: 7, 13: 7, 14: 6, 15: 6}
Secondary_structrue unique labels: 15
{9: 55, 3: 50, 1: 46, 14: 45, 8: 42, 7: 38, 13: 38, 0: 32, 6: 30, 11: 28, 2:

13it [00:01, 10.57it/s]
Computing label-to-label distances: 100%|██████████| 195/195 [00:21<00:00,  8.98it/s]


Computing inter label-to-label distance for mlm & motif


5it [00:00, 17.21it/s]
Computing label-to-label distances: 100%|██████████| 75/75 [00:06<00:00, 11.26it/s]


Computing inter label-to-label distance for mlm & region


10it [00:01,  6.43it/s]
Computing label-to-label distances: 100%|██████████| 150/150 [00:15<00:00,  9.72it/s]


Computing inter label-to-label distance for mlm & Stability


12it [00:02,  5.85it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:24<00:00,  7.40it/s]


Computing inter label-to-label distance for mlm & Fluoresecence


12it [00:00, 19.26it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:17<00:00, 10.29it/s]


Computing inter label-to-label distance for mlm & Remote_homology


15it [00:01, 12.74it/s]
Computing label-to-label distances: 100%|██████████| 240/240 [00:20<00:00, 11.55it/s]


Computing inter label-to-label distance for mlm & Secondary_structrue


15it [00:02,  5.52it/s]
Computing label-to-label distances: 100%|██████████| 225/225 [00:16<00:00, 13.45it/s]


Computing inter label-to-label distance for mlm & Pdbbind


11it [00:02,  5.48it/s]
Computing label-to-label distances: 100%|██████████| 165/165 [00:14<00:00, 11.67it/s]


Computing inter label-to-label distance for mlm & Kinase


2it [00:00, 14.44it/s]
Computing label-to-label distances: 100%|██████████| 30/30 [00:04<00:00,  7.35it/s]


Computing inter label-to-label distance for domain & motif


5it [00:00, 20.09it/s]
Computing label-to-label distances: 100%|██████████| 65/65 [00:07<00:00,  8.60it/s]


Computing inter label-to-label distance for domain & region


10it [00:01,  6.01it/s]
Computing label-to-label distances: 100%|██████████| 130/130 [00:11<00:00, 10.99it/s]


Computing inter label-to-label distance for domain & Stability


12it [00:01,  7.26it/s]
Computing label-to-label distances: 100%|██████████| 156/156 [00:13<00:00, 11.67it/s]


Computing inter label-to-label distance for domain & Fluoresecence


12it [00:01,  7.38it/s]
Computing label-to-label distances: 100%|██████████| 156/156 [00:15<00:00,  9.98it/s]


Computing inter label-to-label distance for domain & Remote_homology


13it [00:01,  9.18it/s]
Computing label-to-label distances: 100%|██████████| 208/208 [00:20<00:00, 10.00it/s]


Computing inter label-to-label distance for domain & Secondary_structrue


13it [00:01,  7.88it/s]
Computing label-to-label distances: 100%|██████████| 195/195 [00:18<00:00, 10.74it/s]


Computing inter label-to-label distance for domain & Pdbbind


11it [00:00, 18.74it/s]
Computing label-to-label distances: 100%|██████████| 143/143 [00:11<00:00, 11.92it/s]


Computing inter label-to-label distance for domain & Kinase


2it [00:00, 15.48it/s]
Computing label-to-label distances: 100%|██████████| 26/26 [00:01<00:00, 13.35it/s]


Computing inter label-to-label distance for motif & region


5it [00:00, 11.36it/s]
Computing label-to-label distances: 100%|██████████| 50/50 [00:10<00:00,  4.63it/s]


Computing inter label-to-label distance for motif & Stability


5it [00:00,  8.91it/s]
Computing label-to-label distances: 100%|██████████| 60/60 [00:05<00:00, 10.38it/s]


Computing inter label-to-label distance for motif & Fluoresecence


5it [00:00,  8.47it/s]
Computing label-to-label distances: 100%|██████████| 60/60 [00:03<00:00, 15.57it/s]


Computing inter label-to-label distance for motif & Remote_homology


5it [00:00,  5.84it/s]
Computing label-to-label distances: 100%|██████████| 80/80 [00:09<00:00,  8.59it/s]


Computing inter label-to-label distance for motif & Secondary_structrue


5it [00:00, 18.27it/s]
Computing label-to-label distances: 100%|██████████| 75/75 [00:07<00:00, 10.25it/s]


Computing inter label-to-label distance for motif & Pdbbind


5it [00:00, 20.08it/s]
Computing label-to-label distances: 100%|██████████| 55/55 [00:05<00:00, 10.17it/s]


Computing inter label-to-label distance for motif & Kinase


2it [00:00,  5.75it/s]
Computing label-to-label distances: 100%|██████████| 10/10 [00:00<00:00, 18.94it/s]


Computing inter label-to-label distance for region & Stability


10it [00:00, 13.37it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:09<00:00, 12.30it/s]


Computing inter label-to-label distance for region & Fluoresecence


10it [00:00, 20.25it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:12<00:00,  9.57it/s]


Computing inter label-to-label distance for region & Remote_homology


10it [00:01,  7.96it/s]
Computing label-to-label distances: 100%|██████████| 160/160 [00:14<00:00, 10.97it/s]


Computing inter label-to-label distance for region & Secondary_structrue


10it [00:01,  5.60it/s]
Computing label-to-label distances: 100%|██████████| 150/150 [00:14<00:00, 10.36it/s]


Computing inter label-to-label distance for region & Pdbbind


10it [00:00, 11.79it/s]
Computing label-to-label distances: 100%|██████████| 110/110 [00:11<00:00,  9.66it/s]


Computing inter label-to-label distance for region & Kinase


2it [00:00, 12.26it/s]
Computing label-to-label distances: 100%|██████████| 20/20 [00:01<00:00, 10.91it/s]


Computing inter label-to-label distance for Stability & Fluoresecence


12it [00:01, 11.07it/s]
Computing label-to-label distances: 100%|██████████| 144/144 [00:13<00:00, 10.33it/s]


Computing inter label-to-label distance for Stability & Remote_homology


12it [00:00, 14.07it/s]
Computing label-to-label distances: 100%|██████████| 192/192 [00:18<00:00, 10.23it/s]


Computing inter label-to-label distance for Stability & Secondary_structrue


12it [00:00, 16.80it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:18<00:00,  9.54it/s]


Computing inter label-to-label distance for Stability & Pdbbind


11it [00:00, 13.96it/s]
Computing label-to-label distances: 100%|██████████| 132/132 [00:13<00:00,  9.92it/s]


Computing inter label-to-label distance for Stability & Kinase


2it [00:00, 21.41it/s]
Computing label-to-label distances: 100%|██████████| 24/24 [00:02<00:00, 11.75it/s]


Computing inter label-to-label distance for Fluoresecence & Remote_homology


12it [00:01,  7.01it/s]
Computing label-to-label distances: 100%|██████████| 192/192 [00:21<00:00,  8.99it/s]


Computing inter label-to-label distance for Fluoresecence & Secondary_structrue


12it [00:01,  8.87it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:17<00:00, 10.16it/s]


Computing inter label-to-label distance for Fluoresecence & Pdbbind


11it [00:00, 15.29it/s]
Computing label-to-label distances: 100%|██████████| 132/132 [00:10<00:00, 12.08it/s]


Computing inter label-to-label distance for Fluoresecence & Kinase


2it [00:00,  3.84it/s]
Computing label-to-label distances: 100%|██████████| 24/24 [00:01<00:00, 13.56it/s]


Computing inter label-to-label distance for Remote_homology & Secondary_structrue


15it [00:01, 14.49it/s]
Computing label-to-label distances: 100%|██████████| 240/240 [00:22<00:00, 10.69it/s]


Computing inter label-to-label distance for Remote_homology & Pdbbind


11it [00:01, 10.67it/s]
Computing label-to-label distances: 100%|██████████| 176/176 [00:18<00:00,  9.41it/s]


Computing inter label-to-label distance for Remote_homology & Kinase


2it [00:00, 17.01it/s]
Computing label-to-label distances: 100%|██████████| 32/32 [00:04<00:00,  7.54it/s]


Computing inter label-to-label distance for Secondary_structrue & Pdbbind


11it [00:00, 17.03it/s]
Computing label-to-label distances: 100%|██████████| 165/165 [00:19<00:00,  8.61it/s]


Computing inter label-to-label distance for Secondary_structrue & Kinase


2it [00:00,  6.57it/s]
Computing label-to-label distances: 100%|██████████| 30/30 [00:02<00:00, 14.86it/s]


Computing inter label-to-label distance for Pdbbind & Kinase


2it [00:00, 16.13it/s]
Computing label-to-label distances: 100%|██████████| 22/22 [00:01<00:00, 11.21it/s]


Computing intra label-to-label distance for mlm


15it [00:00, 16.79it/s]
Computing label-to-label distances: 100%|██████████| 105/105 [00:10<00:00,  9.55it/s]


Computing intra label-to-label distance for domain


13it [00:00, 19.79it/s]
Computing label-to-label distances: 100%|██████████| 78/78 [00:06<00:00, 11.35it/s]


Computing intra label-to-label distance for motif


5it [00:01,  2.97it/s]
Computing label-to-label distances: 100%|██████████| 10/10 [00:00<00:00, 10.30it/s]


Computing intra label-to-label distance for region


10it [00:01,  7.30it/s]
Computing label-to-label distances: 100%|██████████| 45/45 [00:03<00:00, 13.43it/s]


Computing intra label-to-label distance for Stability


12it [00:00, 17.72it/s]
Computing label-to-label distances: 100%|██████████| 66/66 [00:05<00:00, 12.90it/s]


Computing intra label-to-label distance for Fluoresecence


12it [00:01,  8.24it/s]
Computing label-to-label distances: 100%|██████████| 66/66 [00:08<00:00,  8.12it/s]


Computing intra label-to-label distance for Remote_homology


16it [00:00, 17.89it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:13<00:00,  8.94it/s]


Computing intra label-to-label distance for Secondary_structrue


15it [00:00, 17.10it/s]
Computing label-to-label distances: 100%|██████████| 105/105 [00:11<00:00,  9.19it/s]


Computing intra label-to-label distance for Pdbbind


11it [00:01, 10.73it/s]
Computing label-to-label distances: 100%|██████████| 55/55 [00:03<00:00, 14.31it/s]


Computing intra label-to-label distance for Kinase


2it [00:00, 17.60it/s]
Computing label-to-label distances: 100%|██████████| 1/1 [00:00<00:00,  4.24it/s]


Finish label embedding in 11.6 mins
Wasserstein embedding...
Finish WTE in 11.6 mins
(1, 6) 
 ==== x ==== ==== x ==== ==== x ==== ==== x ==== ==== x ==== 
Start computing for Pretrain-MultiTask: GO+RMD
PRE-TRAIN domain unique labels: 13
{0: 173, 1: 165, 2: 121, 3: 89, 4: 88, 5: 64, 6: 59, 7: 53, 8: 50, 9: 47, 10: 46, 11: 45, 12: 45}
PRE-TRAIN motif unique labels: 5
{0: 843, 1: 59, 2: 50, 3: 47, 4: 46}
PRE-TRAIN region unique labels: 10
{0: 202, 1: 173, 2: 165, 3: 121, 4: 89, 5: 88, 6: 64, 7: 53, 8: 45, 9: 45}
PRE-TRAIN go unique labels: 9
{0: 347, 1: 229, 2: 133, 3: 89, 4: 59, 5: 50, 6: 47, 7: 46, 8: 45}
Stability unique labels: 12
{0: 101, 1: 87, 2: 83, 3: 57, 4: 53, 5: 34, 6: 27, 7: 20, 8: 14, 9: 11, 10: 7, 11: 6}
Fluoresecence unique labels: 12
{0: 142, 1: 83, 2: 80, 3: 48, 4: 38, 5: 35, 6: 20, 7: 20, 8: 14, 9: 9, 10: 6, 11: 5}
Remote_homology unique labels: 16
{0: 136, 1: 126, 2: 96, 3: 26, 4: 20, 5: 14, 6: 11, 7: 11, 8: 9, 9: 9, 10: 8, 11: 8, 12: 7, 13: 7, 14: 6, 15: 6}
Secondary_

5it [00:01,  4.69it/s]
Computing label-to-label distances: 100%|██████████| 65/65 [00:04<00:00, 13.93it/s]


Computing inter label-to-label distance for domain & region


10it [00:02,  4.43it/s]
Computing label-to-label distances: 100%|██████████| 130/130 [00:11<00:00, 10.90it/s]


Computing inter label-to-label distance for domain & go


9it [00:00, 10.95it/s]
Computing label-to-label distances: 100%|██████████| 117/117 [00:10<00:00, 10.73it/s]


Computing inter label-to-label distance for domain & Stability


12it [00:01,  7.76it/s]
Computing label-to-label distances: 100%|██████████| 156/156 [00:16<00:00,  9.21it/s]


Computing inter label-to-label distance for domain & Fluoresecence


12it [00:01, 10.18it/s]
Computing label-to-label distances: 100%|██████████| 156/156 [00:14<00:00, 11.01it/s]


Computing inter label-to-label distance for domain & Remote_homology


13it [00:01, 12.85it/s]
Computing label-to-label distances: 100%|██████████| 208/208 [00:22<00:00,  9.33it/s]


Computing inter label-to-label distance for domain & Secondary_structrue


13it [00:00, 15.05it/s]
Computing label-to-label distances: 100%|██████████| 195/195 [00:18<00:00, 10.40it/s]


Computing inter label-to-label distance for domain & Pdbbind


11it [00:01, 10.06it/s]
Computing label-to-label distances: 100%|██████████| 143/143 [00:15<00:00,  9.48it/s]


Computing inter label-to-label distance for domain & Kinase


2it [00:00,  7.22it/s]
Computing label-to-label distances: 100%|██████████| 26/26 [00:02<00:00, 10.05it/s]


Computing inter label-to-label distance for motif & region


5it [00:00,  6.89it/s]
Computing label-to-label distances: 100%|██████████| 50/50 [00:03<00:00, 13.50it/s]


Computing inter label-to-label distance for motif & go


5it [00:00,  6.60it/s]
Computing label-to-label distances: 100%|██████████| 45/45 [00:04<00:00, 10.61it/s]


Computing inter label-to-label distance for motif & Stability


5it [00:00, 11.07it/s]
Computing label-to-label distances: 100%|██████████| 60/60 [00:06<00:00,  8.57it/s]


Computing inter label-to-label distance for motif & Fluoresecence


5it [00:00, 19.50it/s]
Computing label-to-label distances: 100%|██████████| 60/60 [00:04<00:00, 13.44it/s]


Computing inter label-to-label distance for motif & Remote_homology


5it [00:00, 14.06it/s]
Computing label-to-label distances: 100%|██████████| 80/80 [00:06<00:00, 12.37it/s]


Computing inter label-to-label distance for motif & Secondary_structrue


5it [00:00,  9.27it/s]
Computing label-to-label distances: 100%|██████████| 75/75 [00:06<00:00, 11.91it/s]


Computing inter label-to-label distance for motif & Pdbbind


5it [00:00, 14.71it/s]
Computing label-to-label distances: 100%|██████████| 55/55 [00:06<00:00,  7.97it/s]


Computing inter label-to-label distance for motif & Kinase


2it [00:01,  1.89it/s]
Computing label-to-label distances: 100%|██████████| 10/10 [00:01<00:00,  7.53it/s]


Computing inter label-to-label distance for region & go


9it [00:00, 13.76it/s]
Computing label-to-label distances: 100%|██████████| 90/90 [00:08<00:00, 10.03it/s]


Computing inter label-to-label distance for region & Stability


10it [00:00, 16.34it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:11<00:00, 10.76it/s]


Computing inter label-to-label distance for region & Fluoresecence


10it [00:01,  5.88it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:09<00:00, 12.40it/s]


Computing inter label-to-label distance for region & Remote_homology


10it [00:02,  4.46it/s]
Computing label-to-label distances: 100%|██████████| 160/160 [00:12<00:00, 12.50it/s]


Computing inter label-to-label distance for region & Secondary_structrue


10it [00:00, 14.24it/s]
Computing label-to-label distances: 100%|██████████| 150/150 [00:15<00:00,  9.43it/s]


Computing inter label-to-label distance for region & Pdbbind


10it [00:01,  5.47it/s]
Computing label-to-label distances: 100%|██████████| 110/110 [00:18<00:00,  6.07it/s]


Computing inter label-to-label distance for region & Kinase


2it [00:00,  2.88it/s]
Computing label-to-label distances: 100%|██████████| 20/20 [00:08<00:00,  2.43it/s]


Computing inter label-to-label distance for go & Stability


9it [00:02,  3.51it/s]
Computing label-to-label distances: 100%|██████████| 108/108 [00:38<00:00,  2.78it/s]


Computing inter label-to-label distance for go & Fluoresecence


9it [00:04,  1.82it/s]
Computing label-to-label distances: 100%|██████████| 108/108 [00:45<00:00,  2.36it/s]


Computing inter label-to-label distance for go & Remote_homology


9it [00:02,  3.90it/s]
Computing label-to-label distances: 100%|██████████| 144/144 [00:55<00:00,  2.59it/s]


Computing inter label-to-label distance for go & Secondary_structrue


9it [00:04,  1.84it/s]
Computing label-to-label distances: 100%|██████████| 135/135 [00:35<00:00,  3.77it/s]


Computing inter label-to-label distance for go & Pdbbind


9it [00:05,  1.69it/s]
Computing label-to-label distances: 100%|██████████| 99/99 [00:17<00:00,  5.55it/s]


Computing inter label-to-label distance for go & Kinase


2it [00:00, 11.29it/s]
Computing label-to-label distances: 100%|██████████| 18/18 [00:01<00:00, 17.60it/s]


Computing inter label-to-label distance for Stability & Fluoresecence


12it [00:00, 19.19it/s]
Computing label-to-label distances: 100%|██████████| 144/144 [00:07<00:00, 19.92it/s]


Computing inter label-to-label distance for Stability & Remote_homology


12it [00:00, 20.46it/s]
Computing label-to-label distances: 100%|██████████| 192/192 [00:09<00:00, 19.29it/s]


Computing inter label-to-label distance for Stability & Secondary_structrue


12it [00:00, 19.09it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:08<00:00, 20.02it/s]


Computing inter label-to-label distance for Stability & Pdbbind


11it [00:00, 21.33it/s]
Computing label-to-label distances: 100%|██████████| 132/132 [00:06<00:00, 19.98it/s]


Computing inter label-to-label distance for Stability & Kinase


2it [00:00, 16.79it/s]
Computing label-to-label distances: 100%|██████████| 24/24 [00:01<00:00, 19.63it/s]


Computing inter label-to-label distance for Fluoresecence & Remote_homology


12it [00:00, 21.02it/s]
Computing label-to-label distances: 100%|██████████| 192/192 [00:09<00:00, 19.97it/s]


Computing inter label-to-label distance for Fluoresecence & Secondary_structrue


12it [00:00, 19.88it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:09<00:00, 19.70it/s]


Computing inter label-to-label distance for Fluoresecence & Pdbbind


11it [00:00, 18.13it/s]
Computing label-to-label distances: 100%|██████████| 132/132 [00:06<00:00, 19.84it/s]


Computing inter label-to-label distance for Fluoresecence & Kinase


2it [00:00, 18.08it/s]
Computing label-to-label distances: 100%|██████████| 24/24 [00:01<00:00, 19.62it/s]


Computing inter label-to-label distance for Remote_homology & Secondary_structrue


15it [00:00, 20.01it/s]
Computing label-to-label distances: 100%|██████████| 240/240 [00:11<00:00, 20.49it/s]


Computing inter label-to-label distance for Remote_homology & Pdbbind


11it [00:00, 21.14it/s]
Computing label-to-label distances: 100%|██████████| 176/176 [00:08<00:00, 20.24it/s]


Computing inter label-to-label distance for Remote_homology & Kinase


2it [00:00, 17.75it/s]
Computing label-to-label distances: 100%|██████████| 32/32 [00:01<00:00, 20.49it/s]


Computing inter label-to-label distance for Secondary_structrue & Pdbbind


11it [00:00, 20.97it/s]
Computing label-to-label distances: 100%|██████████| 165/165 [00:08<00:00, 20.03it/s]


Computing inter label-to-label distance for Secondary_structrue & Kinase


2it [00:00, 17.88it/s]
Computing label-to-label distances: 100%|██████████| 30/30 [00:01<00:00, 20.39it/s]


Computing inter label-to-label distance for Pdbbind & Kinase


2it [00:00, 17.21it/s]
Computing label-to-label distances: 100%|██████████| 22/22 [00:01<00:00, 19.86it/s]


Computing intra label-to-label distance for domain


13it [00:00, 20.72it/s]
Computing label-to-label distances: 100%|██████████| 78/78 [00:03<00:00, 20.70it/s]


Computing intra label-to-label distance for motif


5it [00:00, 19.40it/s]
Computing label-to-label distances: 100%|██████████| 10/10 [00:00<00:00, 20.81it/s]


Computing intra label-to-label distance for region


10it [00:00, 19.31it/s]
Computing label-to-label distances: 100%|██████████| 45/45 [00:02<00:00, 20.22it/s]


Computing intra label-to-label distance for go


9it [00:00, 20.37it/s]
Computing label-to-label distances: 100%|██████████| 36/36 [00:01<00:00, 20.75it/s]


Computing intra label-to-label distance for Stability


12it [00:00, 20.32it/s]
Computing label-to-label distances: 100%|██████████| 66/66 [00:03<00:00, 20.07it/s]


Computing intra label-to-label distance for Fluoresecence


12it [00:00, 20.05it/s]
Computing label-to-label distances: 100%|██████████| 66/66 [00:03<00:00, 19.38it/s]


Computing intra label-to-label distance for Remote_homology


16it [00:00, 21.15it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:05<00:00, 20.20it/s]


Computing intra label-to-label distance for Secondary_structrue


15it [00:00, 20.92it/s]
Computing label-to-label distances: 100%|██████████| 105/105 [00:05<00:00, 19.92it/s]


Computing intra label-to-label distance for Pdbbind


11it [00:00, 19.36it/s]
Computing label-to-label distances: 100%|██████████| 55/55 [00:02<00:00, 20.90it/s]


Computing intra label-to-label distance for Kinase


2it [00:00, 16.35it/s]
Computing label-to-label distances: 100%|██████████| 1/1 [00:00<00:00, 20.47it/s]


Finish label embedding in 10.9 mins
Wasserstein embedding...
Finish WTE in 10.9 mins
(1, 6) 
 ==== x ==== ==== x ==== ==== x ==== ==== x ==== ==== x ==== 
Start computing for Pretrain-MultiTask: MLM+GO+D
PRE-TRAIN mlm unique labels: 15
{2: 148, 10: 117, 6: 116, 8: 101, 9: 98, 4: 95, 5: 59, 14: 54, 1: 49, 3: 47, 7: 46, 12: 45, 0: 45, 11: 13, 13: 12}
PRE-TRAIN domain unique labels: 13
{0: 173, 1: 165, 2: 121, 3: 89, 4: 88, 5: 64, 6: 59, 7: 53, 8: 50, 9: 47, 10: 46, 11: 45, 12: 45}
PRE-TRAIN go unique labels: 9
{0: 347, 1: 229, 2: 133, 3: 89, 4: 59, 5: 50, 6: 47, 7: 46, 8: 45}
Stability unique labels: 12
{0: 101, 1: 87, 2: 83, 3: 57, 4: 53, 5: 34, 6: 27, 7: 20, 8: 14, 9: 11, 10: 7, 11: 6}
Fluoresecence unique labels: 12
{0: 142, 1: 83, 2: 80, 3: 48, 4: 38, 5: 35, 6: 20, 7: 20, 8: 14, 9: 9, 10: 6, 11: 5}
Remote_homology unique labels: 16
{0: 136, 1: 126, 2: 96, 3: 26, 4: 20, 5: 14, 6: 11, 7: 11, 8: 9, 9: 9, 10: 8, 11: 8, 12: 7, 13: 7, 14: 6, 15: 6}
Secondary_structrue unique labels: 15
{9:

13it [00:00, 19.51it/s]
Computing label-to-label distances: 100%|██████████| 195/195 [00:09<00:00, 20.33it/s]


Computing inter label-to-label distance for mlm & go


9it [00:00, 18.74it/s]
Computing label-to-label distances: 100%|██████████| 135/135 [00:06<00:00, 19.86it/s]


Computing inter label-to-label distance for mlm & Stability


12it [00:00, 18.70it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:09<00:00, 19.98it/s]


Computing inter label-to-label distance for mlm & Fluoresecence


12it [00:00, 19.96it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:08<00:00, 20.37it/s]


Computing inter label-to-label distance for mlm & Remote_homology


15it [00:00, 20.03it/s]
Computing label-to-label distances: 100%|██████████| 240/240 [00:12<00:00, 19.97it/s]


Computing inter label-to-label distance for mlm & Secondary_structrue


15it [00:00, 20.93it/s]
Computing label-to-label distances: 100%|██████████| 225/225 [00:11<00:00, 19.81it/s]


Computing inter label-to-label distance for mlm & Pdbbind


11it [00:00, 20.51it/s]
Computing label-to-label distances: 100%|██████████| 165/165 [00:08<00:00, 20.38it/s]


Computing inter label-to-label distance for mlm & Kinase


2it [00:00, 17.53it/s]
Computing label-to-label distances: 100%|██████████| 30/30 [00:01<00:00, 20.37it/s]


Computing inter label-to-label distance for domain & go


9it [00:00, 20.63it/s]
Computing label-to-label distances: 100%|██████████| 117/117 [00:05<00:00, 19.88it/s]


Computing inter label-to-label distance for domain & Stability


12it [00:00, 19.10it/s]
Computing label-to-label distances: 100%|██████████| 156/156 [00:07<00:00, 20.18it/s]


Computing inter label-to-label distance for domain & Fluoresecence


12it [00:00, 18.64it/s]
Computing label-to-label distances: 100%|██████████| 156/156 [00:07<00:00, 20.10it/s]


Computing inter label-to-label distance for domain & Remote_homology


13it [00:00, 20.91it/s]
Computing label-to-label distances: 100%|██████████| 208/208 [00:10<00:00, 19.97it/s]


Computing inter label-to-label distance for domain & Secondary_structrue


13it [00:00, 20.36it/s]
Computing label-to-label distances: 100%|██████████| 195/195 [00:09<00:00, 20.22it/s]


Computing inter label-to-label distance for domain & Pdbbind


11it [00:00, 17.47it/s]
Computing label-to-label distances: 100%|██████████| 143/143 [00:07<00:00, 20.09it/s]


Computing inter label-to-label distance for domain & Kinase


2it [00:00, 15.58it/s]
Computing label-to-label distances: 100%|██████████| 26/26 [00:01<00:00, 19.19it/s]


Computing inter label-to-label distance for go & Stability


9it [00:00, 18.90it/s]
Computing label-to-label distances: 100%|██████████| 108/108 [00:05<00:00, 19.98it/s]


Computing inter label-to-label distance for go & Fluoresecence


9it [00:00, 20.04it/s]
Computing label-to-label distances: 100%|██████████| 108/108 [00:05<00:00, 19.89it/s]


Computing inter label-to-label distance for go & Remote_homology


9it [00:00, 19.24it/s]
Computing label-to-label distances: 100%|██████████| 144/144 [00:07<00:00, 20.21it/s]


Computing inter label-to-label distance for go & Secondary_structrue


9it [00:00, 19.00it/s]
Computing label-to-label distances: 100%|██████████| 135/135 [00:06<00:00, 20.20it/s]


Computing inter label-to-label distance for go & Pdbbind


9it [00:00, 19.54it/s]
Computing label-to-label distances: 100%|██████████| 99/99 [00:04<00:00, 20.26it/s]


Computing inter label-to-label distance for go & Kinase


2it [00:00, 18.00it/s]
Computing label-to-label distances: 100%|██████████| 18/18 [00:00<00:00, 20.48it/s]


Computing inter label-to-label distance for Stability & Fluoresecence


12it [00:00, 19.81it/s]
Computing label-to-label distances: 100%|██████████| 144/144 [00:07<00:00, 20.10it/s]


Computing inter label-to-label distance for Stability & Remote_homology


12it [00:00, 20.10it/s]
Computing label-to-label distances: 100%|██████████| 192/192 [00:09<00:00, 20.37it/s]


Computing inter label-to-label distance for Stability & Secondary_structrue


12it [00:00, 19.63it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:09<00:00, 19.87it/s]


Computing inter label-to-label distance for Stability & Pdbbind


11it [00:00, 20.55it/s]
Computing label-to-label distances: 100%|██████████| 132/132 [00:06<00:00, 20.25it/s]


Computing inter label-to-label distance for Stability & Kinase


2it [00:00, 20.06it/s]
Computing label-to-label distances: 100%|██████████| 24/24 [00:01<00:00, 19.64it/s]


Computing inter label-to-label distance for Fluoresecence & Remote_homology


12it [00:00, 20.55it/s]
Computing label-to-label distances: 100%|██████████| 192/192 [00:09<00:00, 20.37it/s]


Computing inter label-to-label distance for Fluoresecence & Secondary_structrue


12it [00:00, 19.75it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:08<00:00, 20.31it/s]


Computing inter label-to-label distance for Fluoresecence & Pdbbind


11it [00:00, 20.52it/s]
Computing label-to-label distances: 100%|██████████| 132/132 [00:06<00:00, 19.81it/s]


Computing inter label-to-label distance for Fluoresecence & Kinase


2it [00:00, 15.24it/s]
Computing label-to-label distances: 100%|██████████| 24/24 [00:01<00:00, 20.36it/s]


Computing inter label-to-label distance for Remote_homology & Secondary_structrue


15it [00:00, 19.62it/s]
Computing label-to-label distances: 100%|██████████| 240/240 [00:12<00:00, 19.78it/s]


Computing inter label-to-label distance for Remote_homology & Pdbbind


11it [00:00, 20.63it/s]
Computing label-to-label distances: 100%|██████████| 176/176 [00:08<00:00, 19.66it/s]


Computing inter label-to-label distance for Remote_homology & Kinase


2it [00:00, 17.54it/s]
Computing label-to-label distances: 100%|██████████| 32/32 [00:01<00:00, 19.64it/s]


Computing inter label-to-label distance for Secondary_structrue & Pdbbind


11it [00:00, 20.84it/s]
Computing label-to-label distances: 100%|██████████| 165/165 [00:08<00:00, 20.05it/s]


Computing inter label-to-label distance for Secondary_structrue & Kinase


2it [00:00, 11.85it/s]
Computing label-to-label distances: 100%|██████████| 30/30 [00:01<00:00, 20.11it/s]


Computing inter label-to-label distance for Pdbbind & Kinase


2it [00:00, 19.09it/s]
Computing label-to-label distances: 100%|██████████| 22/22 [00:01<00:00, 19.31it/s]


Computing intra label-to-label distance for mlm


15it [00:00, 20.29it/s]
Computing label-to-label distances: 100%|██████████| 105/105 [00:05<00:00, 19.56it/s]


Computing intra label-to-label distance for domain


13it [00:00, 17.87it/s]
Computing label-to-label distances: 100%|██████████| 78/78 [00:03<00:00, 20.47it/s]


Computing intra label-to-label distance for go


9it [00:00, 19.07it/s]
Computing label-to-label distances: 100%|██████████| 36/36 [00:01<00:00, 20.28it/s]


Computing intra label-to-label distance for Stability


12it [00:00, 18.98it/s]
Computing label-to-label distances: 100%|██████████| 66/66 [00:03<00:00, 20.43it/s]


Computing intra label-to-label distance for Fluoresecence


12it [00:00, 19.03it/s]
Computing label-to-label distances: 100%|██████████| 66/66 [00:03<00:00, 19.64it/s]


Computing intra label-to-label distance for Remote_homology


16it [00:00, 20.46it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:05<00:00, 20.09it/s]


Computing intra label-to-label distance for Secondary_structrue


15it [00:00, 19.63it/s]
Computing label-to-label distances: 100%|██████████| 105/105 [00:05<00:00, 19.71it/s]


Computing intra label-to-label distance for Pdbbind


11it [00:00, 19.86it/s]
Computing label-to-label distances: 100%|██████████| 55/55 [00:02<00:00, 20.28it/s]


Computing intra label-to-label distance for Kinase


2it [00:00, 13.43it/s]
Computing label-to-label distances: 100%|██████████| 1/1 [00:00<00:00, 19.55it/s]


Finish label embedding in 5.2 mins
Wasserstein embedding...
Finish WTE in 5.2 mins
(1, 6) 
 ==== x ==== ==== x ==== ==== x ==== ==== x ==== ==== x ==== 
Start computing for Pretrain-MultiTask: RMD
PRE-TRAIN domain unique labels: 13
{0: 173, 1: 165, 2: 121, 3: 89, 4: 88, 5: 64, 6: 59, 7: 53, 8: 50, 9: 47, 10: 46, 11: 45, 12: 45}
PRE-TRAIN motif unique labels: 5
{0: 843, 1: 59, 2: 50, 3: 47, 4: 46}
PRE-TRAIN region unique labels: 10
{0: 202, 1: 173, 2: 165, 3: 121, 4: 89, 5: 88, 6: 64, 7: 53, 8: 45, 9: 45}
Stability unique labels: 12
{0: 101, 1: 87, 2: 83, 3: 57, 4: 53, 5: 34, 6: 27, 7: 20, 8: 14, 9: 11, 10: 7, 11: 6}
Fluoresecence unique labels: 12
{0: 142, 1: 83, 2: 80, 3: 48, 4: 38, 5: 35, 6: 20, 7: 20, 8: 14, 9: 9, 10: 6, 11: 5}
Remote_homology unique labels: 16
{0: 136, 1: 126, 2: 96, 3: 26, 4: 20, 5: 14, 6: 11, 7: 11, 8: 9, 9: 9, 10: 8, 11: 8, 12: 7, 13: 7, 14: 6, 15: 6}
Secondary_structrue unique labels: 15
{9: 55, 3: 50, 1: 46, 14: 45, 8: 42, 7: 38, 13: 38, 0: 32, 6: 30, 11: 28, 

5it [00:00, 18.58it/s]
Computing label-to-label distances: 100%|██████████| 65/65 [00:03<00:00, 19.47it/s]


Computing inter label-to-label distance for domain & region


10it [00:00, 19.15it/s]
Computing label-to-label distances: 100%|██████████| 130/130 [00:06<00:00, 19.86it/s]


Computing inter label-to-label distance for domain & Stability


12it [00:00, 20.23it/s]
Computing label-to-label distances: 100%|██████████| 156/156 [00:08<00:00, 19.46it/s]


Computing inter label-to-label distance for domain & Fluoresecence


12it [00:00, 19.19it/s]
Computing label-to-label distances: 100%|██████████| 156/156 [00:07<00:00, 19.82it/s]


Computing inter label-to-label distance for domain & Remote_homology


13it [00:00, 21.02it/s]
Computing label-to-label distances: 100%|██████████| 208/208 [00:10<00:00, 20.28it/s]


Computing inter label-to-label distance for domain & Secondary_structrue


13it [00:00, 21.54it/s]
Computing label-to-label distances: 100%|██████████| 195/195 [00:10<00:00, 18.65it/s]


Computing inter label-to-label distance for domain & Pdbbind


11it [00:00, 20.10it/s]
Computing label-to-label distances: 100%|██████████| 143/143 [00:07<00:00, 19.91it/s]


Computing inter label-to-label distance for domain & Kinase


2it [00:00, 17.35it/s]
Computing label-to-label distances: 100%|██████████| 26/26 [00:01<00:00, 19.95it/s]


Computing inter label-to-label distance for motif & region


5it [00:00, 19.38it/s]
Computing label-to-label distances: 100%|██████████| 50/50 [00:02<00:00, 19.58it/s]


Computing inter label-to-label distance for motif & Stability


5it [00:00, 18.43it/s]
Computing label-to-label distances: 100%|██████████| 60/60 [00:03<00:00, 18.86it/s]


Computing inter label-to-label distance for motif & Fluoresecence


5it [00:00, 19.41it/s]
Computing label-to-label distances: 100%|██████████| 60/60 [00:02<00:00, 20.39it/s]


Computing inter label-to-label distance for motif & Remote_homology


5it [00:00, 17.09it/s]
Computing label-to-label distances: 100%|██████████| 80/80 [00:03<00:00, 20.38it/s]


Computing inter label-to-label distance for motif & Secondary_structrue


5it [00:00, 18.57it/s]
Computing label-to-label distances: 100%|██████████| 75/75 [00:03<00:00, 20.59it/s]


Computing inter label-to-label distance for motif & Pdbbind


5it [00:00, 20.79it/s]
Computing label-to-label distances: 100%|██████████| 55/55 [00:02<00:00, 20.67it/s]


Computing inter label-to-label distance for motif & Kinase


2it [00:00, 17.26it/s]
Computing label-to-label distances: 100%|██████████| 10/10 [00:00<00:00, 17.44it/s]


Computing inter label-to-label distance for region & Stability


10it [00:00, 19.99it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:05<00:00, 20.47it/s]


Computing inter label-to-label distance for region & Fluoresecence


10it [00:00, 18.89it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:05<00:00, 20.21it/s]


Computing inter label-to-label distance for region & Remote_homology


10it [00:00, 20.23it/s]
Computing label-to-label distances: 100%|██████████| 160/160 [00:07<00:00, 20.60it/s]


Computing inter label-to-label distance for region & Secondary_structrue


10it [00:00, 19.46it/s]
Computing label-to-label distances: 100%|██████████| 150/150 [00:07<00:00, 20.62it/s]


Computing inter label-to-label distance for region & Pdbbind


10it [00:00, 19.26it/s]
Computing label-to-label distances: 100%|██████████| 110/110 [00:05<00:00, 20.35it/s]


Computing inter label-to-label distance for region & Kinase


2it [00:00, 18.09it/s]
Computing label-to-label distances: 100%|██████████| 20/20 [00:01<00:00, 19.51it/s]


Computing inter label-to-label distance for Stability & Fluoresecence


12it [00:00, 21.26it/s]
Computing label-to-label distances: 100%|██████████| 144/144 [00:07<00:00, 19.94it/s]


Computing inter label-to-label distance for Stability & Remote_homology


12it [00:00, 19.62it/s]
Computing label-to-label distances: 100%|██████████| 192/192 [00:09<00:00, 20.39it/s]


Computing inter label-to-label distance for Stability & Secondary_structrue


12it [00:00, 20.11it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:09<00:00, 19.59it/s]


Computing inter label-to-label distance for Stability & Pdbbind


11it [00:00, 20.38it/s]
Computing label-to-label distances: 100%|██████████| 132/132 [00:06<00:00, 20.23it/s]


Computing inter label-to-label distance for Stability & Kinase


2it [00:00,  3.73it/s]
Computing label-to-label distances: 100%|██████████| 24/24 [00:01<00:00, 18.75it/s]


Computing inter label-to-label distance for Fluoresecence & Remote_homology


12it [00:00, 20.59it/s]
Computing label-to-label distances: 100%|██████████| 192/192 [00:09<00:00, 19.67it/s]


Computing inter label-to-label distance for Fluoresecence & Secondary_structrue


12it [00:00, 19.78it/s]
Computing label-to-label distances: 100%|██████████| 180/180 [00:09<00:00, 19.86it/s]


Computing inter label-to-label distance for Fluoresecence & Pdbbind


11it [00:00, 20.06it/s]
Computing label-to-label distances: 100%|██████████| 132/132 [00:06<00:00, 20.20it/s]


Computing inter label-to-label distance for Fluoresecence & Kinase


2it [00:00, 16.18it/s]
Computing label-to-label distances: 100%|██████████| 24/24 [00:01<00:00, 20.45it/s]


Computing inter label-to-label distance for Remote_homology & Secondary_structrue


15it [00:00, 19.43it/s]
Computing label-to-label distances: 100%|██████████| 240/240 [00:11<00:00, 20.34it/s]


Computing inter label-to-label distance for Remote_homology & Pdbbind


11it [00:00, 20.56it/s]
Computing label-to-label distances: 100%|██████████| 176/176 [00:09<00:00, 19.35it/s]


Computing inter label-to-label distance for Remote_homology & Kinase


2it [00:00, 15.44it/s]
Computing label-to-label distances: 100%|██████████| 32/32 [00:01<00:00, 19.87it/s]


Computing inter label-to-label distance for Secondary_structrue & Pdbbind


11it [00:00, 21.32it/s]
Computing label-to-label distances: 100%|██████████| 165/165 [00:08<00:00, 20.42it/s]


Computing inter label-to-label distance for Secondary_structrue & Kinase


2it [00:00, 17.07it/s]
Computing label-to-label distances: 100%|██████████| 30/30 [00:01<00:00, 19.65it/s]


Computing inter label-to-label distance for Pdbbind & Kinase


2it [00:00, 17.63it/s]
Computing label-to-label distances: 100%|██████████| 22/22 [00:01<00:00, 20.27it/s]


Computing intra label-to-label distance for domain


13it [00:00, 20.69it/s]
Computing label-to-label distances: 100%|██████████| 78/78 [00:03<00:00, 19.61it/s]


Computing intra label-to-label distance for motif


5it [00:00, 19.11it/s]
Computing label-to-label distances: 100%|██████████| 10/10 [00:00<00:00, 18.61it/s]


Computing intra label-to-label distance for region


10it [00:00, 20.13it/s]
Computing label-to-label distances: 100%|██████████| 45/45 [00:02<00:00, 20.43it/s]


Computing intra label-to-label distance for Stability


12it [00:00, 20.34it/s]
Computing label-to-label distances: 100%|██████████| 66/66 [00:03<00:00, 19.21it/s]


Computing intra label-to-label distance for Fluoresecence


12it [00:00, 18.21it/s]
Computing label-to-label distances: 100%|██████████| 66/66 [00:03<00:00, 18.95it/s]


Computing intra label-to-label distance for Remote_homology


16it [00:00, 20.57it/s]
Computing label-to-label distances: 100%|██████████| 120/120 [00:05<00:00, 20.18it/s]


Computing intra label-to-label distance for Secondary_structrue


15it [00:00, 20.46it/s]
Computing label-to-label distances: 100%|██████████| 105/105 [00:05<00:00, 20.78it/s]


Computing intra label-to-label distance for Pdbbind


11it [00:00, 20.92it/s]
Computing label-to-label distances: 100%|██████████| 55/55 [00:02<00:00, 18.55it/s]


Computing intra label-to-label distance for Kinase


2it [00:00, 16.27it/s]
Computing label-to-label distances: 100%|██████████| 1/1 [00:00<00:00, 18.33it/s]


Finish label embedding in 4.5 mins
Wasserstein embedding...
Finish WTE in 4.5 mins
(1, 6) 
 ==== x ==== ==== x ==== ==== x ==== ==== x ==== ==== x ==== 


In [6]:
wte_distance

array([[38.40540346, 44.79597617, 33.85493267, 28.63922014, 24.94117958,
        32.10071586],
       [46.82312473, 48.46425588, 40.7069211 , 36.05075889, 31.85468649,
        37.19391109],
       [46.34556328, 49.16766106, 41.38405414, 33.45248207, 30.8719547 ,
        36.89193618],
       [45.22778648, 48.96813173, 41.61017046, 36.56553857, 32.70094148,
        36.49767181]])

In [7]:
wte_t2t_distances_

array([[61.670288, 63.41589 , 56.41153 , 50.012352, 45.240467, 49.394222],
       [56.66255 , 57.94836 , 52.118855, 46.64529 , 42.66111 , 46.740273],
       [47.307526, 52.377655, 44.003674, 41.161655, 38.047565, 42.479202],
       [44.199158, 48.80211 , 40.563343, 36.305386, 35.674587, 40.402863],
       [40.889996, 47.152973, 37.526886, 34.33467 , 35.356403, 40.64837 ]],
      dtype=float32)