In [1]:
import os
import sys

local_path = os.getcwd()
# 将项目主目录路径添加到 Python 路径
os.chdir("../../")  # 使用相对路径将工作目录切换到 project 文件夹
project_path = os.path.abspath(os.path.join(local_path, "../../"))
sys.path.append(project_path)   #将模块查找路径切换

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from models import modelset
from train.train import train_FBM
from train.train import DFBM
from utils import *

In [2]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
set_seed(42)

Using device: cuda:1


In [3]:
class_counts = [100]*10
datatype = 'KMNIST'

train_loader = get_dataloader(datatype, batch_size=64, train=True, class_counts=class_counts)
#train_loader = get_dataloader(datatype, batch_size=64, train=True)
test_loader = get_dataloader(datatype, batch_size=64, train=False)

data_iter = iter(train_loader)
images, labels = next(data_iter)
batch, channel, large, _ = images.shape

input_size = channel*large**2

# 标签修改
将标签进行修改，只考虑其中选中的标签，将其它标签置0

In [4]:
import torch
import torch.nn.functional as F

def efface_label(selected_labels, labels, num_classes):
    """
    根据指定的标签列表对输入标签进行处理，将指定的标签转换为 one-hot 编码，
    其余未指定的标签对应的 one-hot 编码保持全零。

    参数:
        selected_labels (list or set): 一个包含指定标签的列表或集合。只有这些标签会被转换为 one-hot 编码，其余标签的 one-hot 编码保持全零。
        labels (torch.Tensor): 输入的标签张量，形状为 (batch_size,)，每个值是一个整数，表示标签类别。
        num_classes (int): 标签的类别总数，用于生成 one-hot 编码的长度。

    返回:
        torch.Tensor: 处理后的 one-hot 编码张量，形状为 (batch_size, num_classes)。其中，
                      - 如果标签在 `selected_labels` 中，则生成对应的 one-hot 编码。
                      - 如果标签不在 `selected_labels` 中，则对应的行全为 0。
    """
    # 创建一个全零的 one-hot 编码张量，形状为 (batch_size, num_classes)
    one_hot_labels = torch.zeros((labels.size(0), num_classes))

    # 遍历每个输入标签
    for i, label in enumerate(labels):
        # 如果标签在 selected_labels 中，将其转换为 one-hot 编码
        if label.item() in selected_labels:
            one_hot_labels[i] = F.one_hot(label, num_classes=num_classes).float()

    return one_hot_labels

label = efface_label([1, 2], labels, 10)

# FBM树模型
构建5个FBM，神经网络分类两组，并且将其它的类别分为一类。然后将这5个网络的输出结果构建为一个整体的向量，将这个向量通过一个全连接层进行分类。

In [5]:
'''
    将所有可能的两两组合情况进行枚举
'''

# 构造所有可能的两两排列组合
import itertools

# 生成 0-9 的所有两两组合（不考虑顺序）
all_combinations = list(itertools.combinations(range(10), 2))

# 函数：检查是否覆盖所有数字 0-9
def covers_all_digits(groups):
    # 获取所有组合中的数字
    digits = set()
    for group in groups:
        digits.update(group)
    # 检查是否覆盖 0-9
    return digits == set(range(10))

# 从所有组合中选择 5 组
valid_groups = []
for groups in itertools.combinations(all_combinations, 5):
    if covers_all_digits(groups):
        valid_groups.append(groups)

# 输出结果
print(f"Total valid combinations: {len(valid_groups)}")
for i, groups in enumerate(valid_groups[:5]):  # 示例输出前 5 种可能情况
    print(f"Valid set {i + 1}: {groups}")


Total valid combinations: 945
Valid set 1: ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9))
Valid set 2: ((0, 1), (2, 3), (4, 5), (6, 8), (7, 9))
Valid set 3: ((0, 1), (2, 3), (4, 5), (6, 9), (7, 8))
Valid set 4: ((0, 1), (2, 3), (4, 6), (5, 7), (8, 9))
Valid set 5: ((0, 1), (2, 3), (4, 6), (5, 8), (7, 9))


In [10]:
'''
    一次完整的训练测试过程
'''


NN_outsize_list = [100, 100, 100, 100, 100]
df_list = [0.45, 0.45, 0.45, 0.45, 0.45] 
alpha_list = [1.0, 1.0, 1.0, 1.0, 1.0]
sel_label = valid_groups[34]

from models import modelset as models
from loss.loss import FBMLoss
from loss.loss import test_accuracy

par_model = []
for NN_outsize, df, alpha, sel_label in zip(NN_outsize_list, df_list, alpha_list, sel_label):
    model = modelset.FBMLayer(input_size, NN_outsize).to(device)
    criterion = FBMLoss(NN_outsize, 0.01, df=df, alpha=alpha, if_onehot=True)
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    model.train()
    # 训练模型
    for epoch in range(30):
        for images, labels in train_loader:
            # 将图像和标签移动到 GPU 上
            images = images.view(-1, input_size).to(device)  # 展平图像并转移到 GPU
            labels = efface_label([1, 2], labels, 10)
            labels = labels.to(device)  # 标签移动到 GPU
            #labels_one_hot = F.one_hot(labels, num_classes=num_classes).float()
            
            # 前向传播
            outputs = model(images)
            #loss = criterion(outputs, labels_one_hot, model.linear.weight)
            loss = criterion(outputs, labels, model.linear.weight)
            
            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
    par_model.append(model)

par_model = models.ParallelNetworks(par_model).eval()

from models import MLP
# 定义损失函数和优化器
modelout = MLP(sum(NN_outsize_list), 10).to(device)
criterion2 = nn.CrossEntropyLoss()  # 使用交叉熵损失
optimizer = optim.Adam(modelout.parameters(), lr=0.01)  # 使用随机梯度下降优化器

modelout.train()
# 训练模型
max_accury = 0.0
epochs = 50
for epoch in range(epochs):
    for images, labels in train_loader:
        # 将图像展平为一维向量，并将标签进行 one-hot 编码
        images = images.view(-1, input_size).to(device)  # 展平图像
        labels_one_hot = F.one_hot(labels, num_classes=10).float().to(device)  # 将标签转换为 one-hot 编码

        # 前向传播
        with torch.no_grad():
            deal_images = par_model(images)
            #deal_images = model6(deal_images)

        outputs = modelout(deal_images)

        # 计算损失
        loss = criterion2(outputs, labels_one_hot)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if epoch % 10 == 0:
        total_model = models.ModelPipeline()
        total_model.add_model(par_model)
        total_model.add_model(modelout)
        max_accury = max(test_accuracy(total_model, test_loader, device), max_accury)

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {max_accury:.4f}")

Epoch [1/50], Loss: 0.5664
Epoch [2/50], Loss: 0.5664
Epoch [3/50], Loss: 0.5664
Epoch [4/50], Loss: 0.5664
Epoch [5/50], Loss: 0.5664
Epoch [6/50], Loss: 0.5664
Epoch [7/50], Loss: 0.5664
Epoch [8/50], Loss: 0.5664
Epoch [9/50], Loss: 0.5664
Epoch [10/50], Loss: 0.5664
Epoch [11/50], Loss: 0.6076
Epoch [12/50], Loss: 0.6076
Epoch [13/50], Loss: 0.6076
Epoch [14/50], Loss: 0.6076
Epoch [15/50], Loss: 0.6076
Epoch [16/50], Loss: 0.6076
Epoch [17/50], Loss: 0.6076
Epoch [18/50], Loss: 0.6076
Epoch [19/50], Loss: 0.6076
Epoch [20/50], Loss: 0.6076
Epoch [21/50], Loss: 0.6269
Epoch [22/50], Loss: 0.6269
Epoch [23/50], Loss: 0.6269
Epoch [24/50], Loss: 0.6269
Epoch [25/50], Loss: 0.6269
Epoch [26/50], Loss: 0.6269
Epoch [27/50], Loss: 0.6269
Epoch [28/50], Loss: 0.6269
Epoch [29/50], Loss: 0.6269
Epoch [30/50], Loss: 0.6269
Epoch [31/50], Loss: 0.6282
Epoch [32/50], Loss: 0.6282
Epoch [33/50], Loss: 0.6282
Epoch [34/50], Loss: 0.6282
Epoch [35/50], Loss: 0.6282
Epoch [36/50], Loss: 0.6282
E

In [None]:
class ParallelNetworks(nn.Module):
    def __init__(self, networks):
        """
        初始化多网络模块。

        参数:
            networks (list): 一个包含多个神经网络的列表，每个网络将接收相同的输入。
        """
        super(ParallelNetworks, self).__init__()
        self.networks = nn.ModuleList(networks)  # 将网络列表包装成 nn.ModuleList

    def forward(self, x):
        """
        前向传播，同时运行所有网络，并将结果拼接。

        参数:
            x (torch.Tensor): 输入张量，形状为 [batch_size, ...]。

        返回:
            torch.Tensor: 拼接后的输出张量。
        """
        # 同时运行多个网络，并将它们的输出收集到一个列表中
        outputs = [network(x) for network in self.networks]
        # 沿着最后一维 (feature 维度) 拼接所有网络的输出
        return torch.cat(outputs, dim=1)

model = ParallelNetworks([model1, model2, model3, model4, model5])

In [None]:
'''用于测试正确性的函数'''

def test_accuracy(model, test_loader):
    # 准确率计数
    correct = 0
    total = 0

    # 得到inout_size
    data_iter = iter(train_loader)
    images, labels = next(data_iter)
    batch, channel, large, _ = images.shape
    input_size = channel*large**2

    # 禁用梯度计算，加速测试过程
    with torch.no_grad():
        for images, labels in test_loader:
            # 将数据加载到 GPU
            images = images.view(-1, input_size).to(device)
            labels = labels.to(device)

            # 前向传播
            outputs = model(images)
            
            # 获取预测结果
            _, predicted = torch.max(outputs, 1)
            
            # 更新计数
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # 计算准确率
    accuracy = 1.0 * correct / total
    #print(f'Accuracy on the test dataset: {accuracy:.2f}%') 

    return accuracy

from models import modelset as models

total_model = models.ModelPipeline()
total_model.add_model(models.ParallelNetworks([model1, model2, model3, model4, model5]))
total_model.add_model(modelout)
test_accuracy(total_model, test_loader)

0.6441