## 1.多头注意力


In [None]:
import torch
import math 
class multiheadattention(torch.nn.Module):
    def __init__(self, d, h):
        super().__init__()
        self.d = d
        self.h = h
        self.k = d // h

        self.wqkv = torch.nn.Linear(d, d*3)
        self.wo = torch.nn.Linear(d, d)
    
    def forward(self, x, mask = None):
        B, L, D = x.shape
        qkv = self.wqkv(x)
        q, k, v = torch.chunk(qkv, 3, -1)
        q = q.view(B, L, self.h, self.k).transpose(1, 2)
        k = k.view(B, L, self.h, self.k).transpose(1, 2)
        v = v.view(B, L, self.h, self.k).transpose(1, 2)

        attention_score =torch.matmul(q, k.transpose(-1,-2)) / math.sqrt(self.k)
        if mask is not None:
            attention_score = torch.masked_fill(attention_score, mask=mask, value=-1e9)
        attention_weight = torch.softmax(attention_score, dim=-1)
        context = torch.matmul(attention_weight, v).transpose(1, 2).contiguous().view(B, L, D)
        output = self.wo(context)
        return output, attention_weight
batch_size = 5
max_seq_len = 10
d_model = 64
head = 4

x = torch.randn(batch_size, max_seq_len, d_model)

attention_model = multiheadattention(d_model, head)
output, attention = attention_model(x) 

print("代码运行成功！")
print("输出张量的形状:", output.shape)
print("注意力权重的形状:", attention.shape)

代码运行成功！
输出张量的形状: torch.Size([5, 10, 64])
注意力权重的形状: torch.Size([5, 4, 10, 10])


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class MHA(nn.Module):
    def __init__(self, d_model, num_heads):
        super().__init__()
        self.dim = d_model
        self.num_heads = num_heads
        self.d_k = d_model // num_heads

        self.wqkv = nn.Linear(d_model, d_model*3)
        self.wo = nn.Linear(d_model, d_model)

    def forward(self, x, mask = None):
        B, L, D = x.shape
        qkv = self.wqkv(x)
        q, k, v = torch.chunk(qkv, 3, dim=-1)

        q = q.view(B, L, self.num_heads, self.d_k).transpose(1, 2)
        k = k.view(B, L, self.num_heads, self.d_k).transpose(1, 2)
        v = v.view(B, L, self.num_heads, self.d_k).transpose(1, 2)

        scores = torch.matmul(q, k.transpose(-1, -2)) / math.sqrt(self.d_k)

        if mask is not None:
            scores = torch.masked_fill(scores, mask, 1e-9)

        atte_weight = F.softmax(scores, dim=-1)

        context = torch.matmul(atte_weight, v).transpose(1, 2).contiguous().view(B, L, D)
        output = self.wo(context)

        return atte_weight, output
# --- 测试代码 ---
batch_size = 2
d_model = 10
head = 2
max_seq_len = 5
x = torch.randn(batch_size, max_seq_len, d_model)

attention_model = MHA(d_model, head)
output, attention = attention_model(x) 

print("代码运行成功！")
print("输出张量的形状:", output.shape)
print("注意力权重的形状:", attention.shape)

## 2.AUC

In [None]:
import numpy as np
 
def auc_rank(labels, scores):
    """
    基于排序（Rank / Mann–Whitney U）的 AUC 计算方法。
 
    输入：
    - labels: List[int] 或 1D numpy array
        样本真实标签，取值为 {0, 1}
    - scores: List[float] 或 1D numpy array
        模型预测分数，分数越大表示越可能为正样本
 
    输出：
    - auc: float
        AUC 值，取值范围 [0, 1]
 
    核心思想：
    1. 按预测分数从小到大排序
    2. 扫描排序后的样本序列
    3. 每遇到一个正样本，统计其前面已有多少负样本
       这些负样本都被该正样本“正确地排在后面”
    """
 
    # 转为 numpy array，便于排序和向量化操作
    labels = np.asarray(labels)
    scores = np.asarray(scores)
    print(f"labels:{labels}")
    print(f"scores:{scores}")

 
    # 获取按照 score 从小到大排序后的索引
    order = np.argsort(scores)
    print(f"order:{order}")
 
    # 按排序后的顺序重排标签
    labels_sorted = labels[order]
    print(f"labels_sorted:{labels_sorted}")
 
 
    # 正样本数量 |P|
    n_pos = np.sum(labels_sorted == 1)
    print(f"n_pos:{n_pos}")
 
    # 负样本数量 |N|
    n_neg = np.sum(labels_sorted == 0)
    print(f"n_neg:{n_neg}")
    # 已扫描到的负样本数量（前缀负样本计数）
    neg_count = 0
 
    # 排序正确的正负样本对数量
    correct = 0.0
 
    # 从低分到高分扫描
    for l in labels_sorted:
        if l == 1:
            # 当前是正样本：
            # 它前面的所有负样本都满足 score_neg < score_pos
            correct += neg_count
        else:
            # 当前是负样本，增加负样本计数
            neg_count += 1
 
    # AUC = 排序正确的正负样本对 / 总正负样本对
    return correct / (n_pos * n_neg)

label = [0, 1, 0, 0, 1, 0, 0, 1]
q = [0.1, 0.9, 0.2, 0.8, 1, 0.2, 0.3, 0.8]
auc  = auc_rank(label, q)
print(f"auc:{auc}")