In [1]:
import torch
import random
import torchvision
import torchvision.transforms as transforms
from torch.nn import init
import torch.optim as optim
from numpy import *
import torch.nn as nn
device = torch.device('cpu')

In [2]:
mnist_train = torchvision.datasets.MNIST(root='./', train=True,
download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.MNIST(root='./', train=False,
download=True, transform=transforms.ToTensor())

In [14]:
loss=torch.nn.CrossEntropyLoss()

In [3]:
num_inputs = 784
num_outputs = 10
num_hiddens = 512
class FlattenLayer(torch.nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0], -1)
Net = nn.Sequential(
    FlattenLayer(),
    nn.Linear(num_inputs,num_hiddens),
    nn.ReLU(),
    nn.Linear(num_hiddens,num_outputs),
)

In [4]:
for params in Net.parameters():
    init.normal_(params,mean=0,std=0.01)

In [7]:
lr = 0.0001
wd = 0.05
optimizer = optim.SGD(Net.parameters(), lr=lr, weight_decay=wd)

In [8]:
def evaluate(data_iter, net):
    test_loss,acc_sum, n = 0.0, 0.0, 0
    for x, y in data_iter:
        x = x.unsqueeze(1).type(torch.FloatTensor)
        y_hat = net(x)
        acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
        l = loss(y_hat, y).sum()
        test_loss += l.item()
        n += y.shape[0]
    return acc_sum / n, test_loss / n

In [9]:
def get_kfold_data(k, i, X, y):
    fold_size = X.shape[0] // k
    val_start = i * fold_size
    if i != k - 1:
        val_end = (i + 1) * fold_size
        X_valid, y_valid = X[val_start:val_end], y[val_start:val_end]
        X_train = torch.cat((X[0:val_start], X[val_end:]), dim = 0)
        y_train = torch.cat((y[0:val_start], y[val_end:]), dim = 0)
    else:
        X_valid, y_valid = X[val_start:], y[val_start:]
        X_train = X[0:val_start]
        y_train = y[0:val_start]
    return X_train, y_train, X_valid, y_valid

In [10]:
def k_fold(k, X_train, y_train):
    train_loss_sum, valid_loss_sum = 0, 0
    train_acc_sum, valid_acc_sum = 0, 0
    for i in range(k):
        print('第', i + 1, '折验证结果')
        data = get_kfold_data(k, i, X_train, y_train)
        net = Net
        for params in Net.parameters():
            init.normal_(params, mean=0, std=0.01)
        train_loss, val_loss, train_acc, val_acc = train(net,*data,loss,num_epochs,batch_size,params,lr,optimizer)
        print('train loss:{:.4f}, train accuracy:{:.3f}'.format(train_loss, train_acc))
        print('valid loss:{:.4f}, valid accuracy:{:.3f}'.format(val_loss, val_acc))
        train_loss_sum += train_loss
        valid_loss_sum += val_loss
        train_acc_sum += train_acc
        valid_acc_sum += val_acc
    print('\n', '最终10折交叉验证结果：')
    print('ave_train_loss:{:.4f}, ave_train_acc:{:.3f}'.format(train_loss_sum/k, train_acc_sum/k))
    print('ave_valid_loss:{:.4f}, ave_valid_acc:{:.3f}'.format(valid_loss_sum/k, valid_acc_sum/k))
    return

In [11]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)])
        yield features.index_select(0, j), labels.index_select(0, j)

In [12]:
num_epochs = 5
batch_size = 256
xzhou = []
y_train_loss = []
y_test_loss = []
y_train_acc = []
y_test_acc = []
def train(net, train_features, train_labels, test_features, test_labels, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None):
    y_train_loss, y_test_loss, y_train_acc, y_test_acc = [], [], [], []
    for epoch in range(num_epochs):
        train_l_num, train_acc_num, n = 0.0, 0.0, 0
        for x, y in data_iter(batch_size, train_features, train_labels):
            x = x.unsqueeze(1).type(torch.FloatTensor)
            y_hat = net(x)
            l = loss(y_hat, y).sum()
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
            optimizer.step()
            train_l_num += l.item()
            train_acc_num += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc, test_loss = evaluate(data_iter(batch_size, test_features, test_labels), net)
        y_train_loss.append(train_l_num / n)
        y_test_loss.append(test_loss)
        y_train_acc.append(train_acc_num / n)
        y_test_acc.append(test_acc)
    return y_train_loss[num_epochs-1], y_test_loss[num_epochs-1], y_train_acc[num_epochs-1], y_test_acc[num_epochs-1]

In [15]:
X_train, Y_train = mnist_train.data, mnist_train.targets
X_test, Y_test = mnist_test.data,mnist_test.targets
k_fold(10, X_train, Y_train)

第 1 折验证结果
train loss:0.0011, train accuracy:0.917
valid loss:0.0011, valid accuracy:0.924
第 2 折验证结果
train loss:0.0011, train accuracy:0.915
valid loss:0.0012, valid accuracy:0.910
第 3 折验证结果
train loss:0.0011, train accuracy:0.918
valid loss:0.0013, valid accuracy:0.903
第 4 折验证结果
train loss:0.0011, train accuracy:0.916
valid loss:0.0011, valid accuracy:0.922
第 5 折验证结果
train loss:0.0011, train accuracy:0.917
valid loss:0.0012, valid accuracy:0.914
第 6 折验证结果
train loss:0.0011, train accuracy:0.916
valid loss:0.0012, valid accuracy:0.904
第 7 折验证结果
train loss:0.0011, train accuracy:0.916
valid loss:0.0012, valid accuracy:0.911
第 8 折验证结果
train loss:0.0011, train accuracy:0.916
valid loss:0.0013, valid accuracy:0.904
第 9 折验证结果
train loss:0.0011, train accuracy:0.917
valid loss:0.0013, valid accuracy:0.907
第 10 折验证结果
train loss:0.0011, train accuracy:0.914
valid loss:0.0009, valid accuracy:0.932

 最终10折交叉验证结果：
ave_train_loss:0.0011, ave_train_acc:0.916
ave_valid_loss:0.0012, ave_valid_acc:0.91