In [None]:
# k-means聚类过程中，L2归一化的顺序研究
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import os

# 设置随机数种子
def set_random_seed(seed: int):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)                           # 固定python的随机策略
    np.random.seed(seed)                        # 在使用 Numpy 库取随机数时，需要对其随机数种子进行限制
    torch.manual_seed(seed)                     # 当 Pytorch 使用 CPU 进行运算时，需要设定 CPU 支撑下的 Pytorch 随机数种子
    torch.cuda.manual_seed(seed)                # 单 GPU 情况
    torch.cuda.manual_seed_all(seed)            # 多 GPU 情况
    try:
        torch.backends.cudnn.benchmark = False  # 限制 Cudnn 在加速过程中涉及到的随机策略
        torch.backends.cudnn.deterministic = True
    except:
        pass

# set_random_seed(0)

batch_size = 1
L = 4
dim = 8

net = torch.nn.Sequential(
    # nn.Linear(dim, dim, bias=False),
    nn.GELU(), 
)

data = torch.randn((batch_size, L, dim))
data = net(data)
print('data: \n', data)

# 1. 先归一化k矩阵，再构造聚类中心、k-means聚类
data_norm = F.normalize(data, dim=-1)                               # [B, L, dim]
print('data_norm: \n', data_norm)

center_prenorm = F.normalize(data_norm.sum(dim=-2, keepdim=True), dim=-1)   # [B, S, dim]
print('pre-normal center: \n', center_prenorm)

# 2. 先构造聚类中心，在对k矩阵、聚类中心进行归一化
center_postnorm = F.normalize(data.sum(dim=-2, keepdim=True), dim=-1) 
print('post-normal center: \n', center_postnorm)

cosim = torch.einsum('bld,bsd->bls', center_prenorm, center_postnorm)
print('cosim: \n', cosim)

In [33]:
# 特征单位化的元素相加，其归一化和与原始元素的余弦相似度研究
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import os

# 设置随机数种子
def set_random_seed(seed: int):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)                           # 固定python的随机策略
    np.random.seed(seed)                        # 在使用 Numpy 库取随机数时，需要对其随机数种子进行限制
    torch.manual_seed(seed)                     # 当 Pytorch 使用 CPU 进行运算时，需要设定 CPU 支撑下的 Pytorch 随机数种子
    torch.cuda.manual_seed(seed)                # 单 GPU 情况
    torch.cuda.manual_seed_all(seed)            # 多 GPU 情况
    try:
        torch.backends.cudnn.benchmark = False  # 限制 Cudnn 在加速过程中涉及到的随机策略
        torch.backends.cudnn.deterministic = True
    except:
        pass

# set_random_seed(0)

batch_size = 1
L = 4
dim = 8
num_class = 10

data_index = torch.tensor([0, 0, 9, 1])
# data_index = torch.randint(0, num_class, (L, ))
print('data_index: \n', data_index)

eyes = torch.eye(num_class)
data_onehot = eyes[data_index]                                            # [L, dim]
data_noise = torch.randn_like(data_onehot) * 0.1
# data_noise = torch.zeros_like(data_onehot)
data = F.normalize(data_onehot + data_noise)
# print('data: \n', data)

center = F.normalize(data.sum(dim=0, keepdim=True), dim=-1)  # [1, dim]
affinity = torch.einsum('ld,sd->sl', data, center)
print('affinity: \n', affinity)


data_index: 
 tensor([0, 0, 9, 1])
affinity: 
 tensor([[0.7410, 0.8091, 0.5262, 0.3495]])
