## 测试不同深度神经网络模型在不同数据上的性能
即对比模型在相同数据上的性能，也侧面反映数据划分是否存在问题

In [None]:
import os
import sys

import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# 将项目根目录加入环境变量
PROJECT_DIR = os.path.dirname(os.getcwd())
sys.path.append(PROJECT_DIR)
print(PROJECT_DIR)

from utils import read_options

In [None]:
config, cluster_partitioner, model = read_options()

In [None]:
num_cluster = config['num_cluster']
num_client = config['num_client']
batch_size = config['local_bs']
num_client_per_cluster = int(num_client / num_cluster)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

client_list = [i*num_client_per_cluster for i in range(num_cluster)]

model_file_dir = os.path.join(PROJECT_DIR, 'result', 'models', 'notebook', 'model_test')

In [None]:
def train_model_init(model_name: str):
    model_existed = False
    train_model = model()
    model_file_dir = os.path.join(PROJECT_DIR, 'result', 'models', 'notebook', 'model_test')
    model_file_path = os.path.join(model_file_dir, "{}_{}_{}.pth".format(config['dataset'], config['model'], model_name))
    if os.path.exists(model_file_path):
        print("Loading model from {}".format(model_file_path))
        train_model.load_state_dict(torch.load(model_file_path))
        model_existed = True
    train_criteria = torch.nn.CrossEntropyLoss()
    train_optimizer = torch.optim.SGD(train_model.parameters(), lr=0.001)
    return train_model, train_criteria, train_optimizer, model_existed

In [None]:
def origin_data_init():
    raw_data_dir = os.path.join(PROJECT_DIR, 'data', config['dataset'], 'raw')
    transform_train = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5070751592371323, 0.48654887331495095, 0.4409178433670343), 
                            (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)) 
    ])
    cifar100_trainset = torchvision.datasets.CIFAR100(root=raw_data_dir, train=True, download=True, transform=transform_train)
    cifar100_trainloader = DataLoader(cifar100_trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    cifar100_testset = torchvision.datasets.CIFAR100(root=raw_data_dir, train=False, download=True, transform=transform_train)
    cifar100_testloader = DataLoader(cifar100_testset, batch_size=batch_size, shuffle=False, num_workers=2)
    return cifar100_trainloader, cifar100_testloader

In [None]:
def train(train_model: torch.nn.Module, train_criteria, train_optimizer, trainloader, epoch):
    train_model.train()
    train_model.to(device)
    running_loss = 0.0
    for epoch in range(epoch):
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs, targets = inputs.to(device), targets.to(device)
            train_optimizer.zero_grad()
            outputs = train_model(inputs)
            loss = train_criteria(outputs, targets)
            loss.backward()
            train_optimizer.step()

            running_loss += loss.item()

        if (epoch + 1) % 10 == 0:
            print('Epoch: %d, Loss: %.3f' % (epoch + 1, running_loss / (10 * len(trainloader))))
            running_loss = 0.0
    print('Finished Training\n')

In [None]:
def test(train_model: torch.nn.Module, testloader: DataLoader):
    correct_1 = 0
    correct_5 = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        train_model.to(device)
        train_model.eval()
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = train_model(images)
            _, pred = outputs.topk(5, 1, largest=True, sorted=True)

            labels = labels.view(labels.size(0), -1).expand_as(pred)
            correct = pred.eq(labels).float()

            # compute top 5
            correct_5 += correct[:, :5].sum()

            # compute top 1
            correct_1 += correct[:, :1].sum()

    print("correct_1: ", correct_1/len(testloader.dataset))
    print("correct_5: ", correct_5/len(testloader.dataset))

In [None]:
def save_model(model_name, train_model: torch.nn.Module):
    if not os.path.exists(model_file_dir):
        os.makedirs(model_file_dir)
    model_file_path = os.path.join(model_file_dir, "{}_{}_{}.pth".format(config['dataset'], config['model'], model_name))
    torch.save(train_model.state_dict(), model_file_path)

### 原始数据训练与测试

In [None]:
train_model_1, train_criteria_1, train_optimizer_1, model_existed = train_model_init('model_1')
train_loader_1, test_loader_1 = origin_data_init()
if model_existed:
    print("Model existed, skip training")
    test(train_model_1, test_loader_1)
else:
    train(train_model_1, train_criteria_1, train_optimizer_1, train_loader_1, 100)
    test(train_model_1, test_loader_1)
    save_model('model_1', train_model_1)

### 划分数据训练与测试

In [None]:
train_model_2, train_criteria_2, train_optimizer_2, model_existed = train_model_init('model_2')
test_loader_2 = cluster_partitioner.get_cluster_dataloader(client_list, config['local_bs'], type='test')
if model_existed:
    print("Model existed, skip training")
    test(train_model_2, test_loader_2)
else:
    train_loader_2 = cluster_partitioner.get_cluster_dataloader(client_list, config['local_bs'])
    train(train_model_2, train_criteria_2, train_optimizer_2, train_loader_2, 100)
    test(train_model_2, test_loader_2)
    save_model('model_2', train_model_2)