In [1]:
import torch
import torch.nn as nn
import torch.utils.data
import torch.nn.init
import numpy as np

n_train = 7000
n_test = 3000
num_inputs = 500
true_w = torch.ones(num_inputs, 1) * 0.0056
true_b = 0.028

features = torch.randn(n_train + n_test, num_inputs)
labels = torch.matmul(features, true_w) + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype= torch.float)

batch_size = 200
num_epochs = 20
lr = 0.01


def squared_loss(y_hat, y):
    return (y_hat - y.view(y_hat.size())) ** 2 / 2
loss = squared_loss

# 训练模型
def train(x_train, y_train, x_valid, y_valid):
    net = nn.Linear(num_inputs, 1)
    nn.init.normal_(net.weight, mean=0, std=0.1)
    nn.init.normal_(net.bias, mean=0, std=1)
    optimizer_w = torch.optim.SGD(params=[net.weight], lr=lr)
    optimizer_b = torch.optim.SGD(params=[net.bias], lr=lr)

    train_dataset = torch.utils.data.TensorDataset(x_train,y_train)
    train_dataiter = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

    valid_dataset = torch.utils.data.TensorDataset(x_valid,y_valid)
    valid_dataiter = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

    train_ls = []
    test_ls = []
    n, train_loss_sum = 0, 0
    n_test, test_loss_sum = 0, 0

    for epoch in range(num_epochs):
        for X, y in train_dataiter:
            l = loss(net(X), y).mean()
            optimizer_b.zero_grad()
            optimizer_w.zero_grad()
            l.backward()
            optimizer_w.step()
            optimizer_b.step()
            n += y.shape[0]
            train_loss_sum +=l.item()
        train_ls.append(train_loss_sum / n)

        for X, y in valid_dataiter:
            l = loss(net(X), y).mean()
            n_test += y.shape[0]
            test_loss_sum += l.item()
        test_ls.append(test_loss_sum / n)

        # print('epoch %d, w= %.4f, b= %.3f' % (epoch + 1, net.weight.mean().item(), net.bias.mean().item()))
    return train_ls, test_ls


def get_k_fold_data(k, i, X, y):
    # 返回第i折交叉验证时所需要的训练和验证数据，分开放，X_train为训练数据，X_valid为验证数据
    assert k > 1
    fold_size = X.shape[0] // k  
    X_train, y_train = None, None
    for j in range(k):
        idx = slice(j * fold_size, (j + 1) * fold_size)  #slice(start,end,step)切片函数
        X_part, y_part = X[idx, :], y[idx]
        if j == i:
            X_valid, y_valid = X_part, y_part
        elif X_train is None:
            X_train, y_train = X_part, y_part
        else:
            X_train = torch.cat((X_train, X_part), dim=0)
            y_train = torch.cat((y_train, y_part), dim=0)
    return X_train, y_train, X_valid, y_valid


def k_fold(k, X_train, y_train, num_epochs, batch_size):
    for i in range(k):
        data = get_k_fold_data(k, i, X_train, y_train) # 获取k折交叉验证的训练和验证数据
        train_ls, valid_ls = train(*data)
        train_l_sum, valid_l_sum, train_acc_sum, valid_acc_sum = 0.0, 0.0, 0.0, 0.0
        for ii in train_ls:
            train_l_sum += ii
        for jj in valid_ls:
            valid_l_sum += jj

        print('fold %d, train loss %.6f, valid loss %.6f' % (i + 1, train_l_sum / num_epochs, valid_l_sum / num_epochs))


k_fold(10, features, labels, num_epochs, batch_size)


fold 1, train loss 0.002749, valid loss 0.000194
fold 2, train loss 0.003578, valid loss 0.000232
fold 3, train loss 0.003604, valid loss 0.000236
fold 4, train loss 0.002041, valid loss 0.000147
fold 5, train loss 0.002485, valid loss 0.000180
fold 6, train loss 0.002277, valid loss 0.000169
fold 7, train loss 0.004013, valid loss 0.000252
fold 8, train loss 0.002703, valid loss 0.000194
fold 9, train loss 0.002221, valid loss 0.000156
fold 10, train loss 0.002759, valid loss 0.000204


In [2]:
import torch
import matplotlib.pyplot as plt
import random

#数据集
train1 = torch.normal(-1, 1, size=(7000, 200))
test1 = torch.normal(-1, 1, size=(3000, 200))
train2 = torch.normal(1, 1, size=(7000, 200))
test2 = torch.normal(1, 1, size=(3000, 200))
label_train1 = torch.zeros(7000)
label_train2 = torch.ones(7000)
label_test1 = torch.zeros(3000)
label_test2 = torch.ones(3000)

train_features = torch.cat((train1, train2), 0).type(torch.FloatTensor)
train_labels = torch.cat((label_train1, label_train2), 0)
test_features = torch.cat((test1, test2), 0).type(torch.FloatTensor)
test_labels = torch.cat((label_test1, label_test2), 0)


def data_iter(batch_size,features,labels,shuffle):
    num_examples = len(features)
    indices = list(range(num_examples))
    if shuffle == True:
        random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(indices[i:min(i+batch_size,num_examples)])
        yield features.index_select(0,j),labels.index_select(0,j)

def relu(x):
    return torch.max(input = x, other=torch.tensor(0.0))

def sigmoid(x):
    return 1.0/(1+torch.exp(-x))

def crossloss(output, label):
    output=output.squeeze(-1)
    return -(label*torch.log(output)+(1-label)*torch.log(1-output))

def sgd(params,lr,batch_size):
    for param in params:
        param.data-=lr*param.grad/batch_size
        param.grad.zero_()

num_inputs = 200
num_hidden = 100
num_outputs = 1
def train(batch_size, num_epochs, lr, train_features, train_labels, test_features, test_labels):
    train_ls, test_ls, x_epoch, list_train_acc, list_test_acc=[], [], [], [], []
    w1 = torch.normal(0, 0.01, size=(num_inputs, num_hidden), requires_grad=True)
    b1 = torch.zeros(num_hidden, requires_grad=True)
    w2 = torch.normal(0, 0.01, size=(num_hidden, num_outputs), requires_grad=True)
    b2 = torch.zeros(num_outputs, requires_grad=True)

    for epoch in range(num_epochs):
        train_loss_sum, train_num, train_acc = 0, 0, 0
        test_loss_sum,test_num,test_acc=0, 0, 0

        for x,y in data_iter(batch_size, train_features, train_labels, True):
            hidden = relu(torch.mm(x, w1) + b1)
            output = sigmoid(torch.mm(hidden, w2) + b2)
            loss = crossloss(output, y).sum()
            train_loss_sum += loss.item()
            output_temp = output.squeeze(-1)     
            train_acc += (output_temp.gt(0.5) == y).sum().item()
            train_num += x.shape[0]
            loss.backward()
            sgd([w1, b1, w2, b2], lr, batch_size)
        train_ls.append(train_loss_sum/train_num)
        x_epoch.append(epoch + 1)

        for x,y in data_iter(batch_size, test_features, test_labels, False):
            hidden = relu(torch.mm(x, w1) + b1)
            output = sigmoid(torch.mm(hidden, w2) + b2)
            loss = crossloss(output, y).sum()
            test_loss_sum += loss.item()
            output_temp = output.squeeze(-1)
            test_acc += (output_temp.gt(0.5) == y).sum().item()
            test_num += x.shape[0]
        test_ls.append(test_loss_sum/test_num)
        list_train_acc.append(train_acc/train_num)
        list_test_acc.append(test_acc/test_num)
        # print("epoch %d,train loss %f,train acc %f,test loss %f,test acc %f"%(epoch + 1, train_loss_sum/train_num, train_acc/train_num, test_loss_sum/test_num, test_acc/test_num))

    return train_ls, test_ls, list_train_acc, list_test_acc
# train(batch_size, num_epochs, lr, train_features, train_labels, test_features, test_labels)

def get_k_fold_data(k, i, X, y):
    # 返回第i折交叉验证时所需要的训练和验证数据，分开放，X_train为训练数据，X_valid为验证数据
    assert k > 1
    fold_size = X.shape[0] // k  # 双斜杠表示除完后再向下取整
    X_train, y_train = None, None
    for j in range(k):
        idx = slice(j * fold_size, (j + 1) * fold_size)  #slice(start,end,step)切片函数
        X_part, y_part = X[idx, :], y[idx]
        if j == i:
            X_valid, y_valid = X_part, y_part
        elif X_train is None:
            X_train, y_train = X_part, y_part
        else:
            X_train = torch.cat((X_train, X_part), dim=0)
            y_train = torch.cat((y_train, y_part), dim=0)
    return X_train, y_train, X_valid, y_valid

def k_fold(k, X_train, y_train, num_epochs, batch_size):
    for i in range(k):
        data = get_k_fold_data(k, i, X_train, y_train) # 获取k折交叉验证的训练和验证数据
        train_ls, valid_ls, train_acc, valid_acc = train(batch_size, num_epochs, lr, *data)
        train_l_sum, valid_l_sum, train_acc_sum, valid_acc_sum = 0.0, 0.0, 0.0, 0.0
        for ii in train_ls:
            train_l_sum += ii
        for jj in valid_ls:
            valid_l_sum += jj
        for ii in train_acc:
            train_acc_sum +=ii
        for jj in valid_acc:
            valid_acc_sum +=jj
        print('fold %d, train loss %.4f, valid loss %.4f, train acc %.4f, test acc %.4f' % (i + 1, train_l_sum / num_epochs, valid_l_sum / num_epochs, train_acc_sum / num_epochs, valid_acc_sum / num_epochs))

batch_size = 256
num_epochs = 10
lr = 0.1
fold = 10
k_fold(fold, train_features, train_labels, num_epochs, batch_size)

fold 1, train loss 0.0106, valid loss 0.0010, train acc 0.9993, test acc 1.0000
fold 2, train loss 0.0106, valid loss 0.0010, train acc 0.9992, test acc 1.0000
fold 3, train loss 0.0106, valid loss 0.0010, train acc 0.9996, test acc 1.0000
fold 4, train loss 0.0106, valid loss 0.0010, train acc 0.9994, test acc 1.0000
fold 5, train loss 0.0109, valid loss 0.0010, train acc 0.9990, test acc 1.0000
fold 6, train loss 0.0107, valid loss 0.0010, train acc 0.9990, test acc 1.0000
fold 7, train loss 0.0107, valid loss 0.0010, train acc 0.9983, test acc 1.0000
fold 8, train loss 0.0112, valid loss 0.0010, train acc 0.9981, test acc 1.0000
fold 9, train loss 0.0105, valid loss 0.0010, train acc 0.9997, test acc 1.0000
fold 10, train loss 0.0107, valid loss 0.0010, train acc 0.9989, test acc 1.0000


In [3]:
import torch.nn as nn
from torch.nn import init
import torch.utils.data
import torchvision
import torchvision.transforms as transforms

mnist_train = torchvision.datasets.MNIST(root='./Datasets/MNIST', train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.MNIST(root='./Datasets/MNIST', train=False, download=True, transform=transforms.ToTensor())

batch_size = 200
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False,num_workers=0)

a, b =[], []
for x, y in mnist_train:
    a.append(x)
    b.append(y)

x = torch.stack(a, dim=0)
y = torch.tensor(b)

num_inputs, num_outputs, num_hiddens = 784, 10, 256


def evaluate_accuracy(test_iter, net):
    acc_sum, n = 0, 0
    for X, y in test_iter:
        y_hat = net(X)
        acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
        n += y.shape[0]
    return acc_sum / n


num_epochs = 12


def train(x_train, y_train, x_valid, y_valid, num_epochs, batch_size):
    train_ls, test_ls, x_epoch, list_train_acc, list_test_acc = [], [], [], [], []
    train_dataset = torch.utils.data.TensorDataset(x_train,y_train)
    train_dataiter = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    valid_dataset = torch.utils.data.TensorDataset(x_valid,y_valid)
    valid_dataiter = torch.utils.data.DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

    class FlattenLayer(nn.Module):
        def __init__(self):
            super(FlattenLayer, self).__init__()

        def forward(self, x):
            return x.view(x.shape[0], -1)

    net = nn.Sequential(
        FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens),
        nn.ReLU(),  
        nn.Linear(num_hiddens, num_outputs),
    )

    for params in net.parameters():
        init.normal_(params, mean=0, std=0.1)

    loss = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(), lr=0.2)

    for epoch in range(num_epochs):
        train_1_sum, train_acc_sum, n = 0.0, 0.0, 0
        train_1_test_sum, n_test = 0.0, 0
        for X, y in train_dataiter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

            train_1_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        train_ls.append(train_1_sum / n)
        x_epoch.append(epoch + 1)
        test_acc = evaluate_accuracy(test_iter, net)
        list_train_acc.append(train_acc_sum / n)
        list_test_acc.append(test_acc)
        # print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_1_sum / n, train_acc_sum / n, test_acc))

        for X_test, y_test in valid_dataiter:
            y_hat = net(X_test)
            l = loss(y_hat, y_test).sum()
            train_1_test_sum += l.item()
            n_test += y_test.shape[0]
        test_ls.append(train_1_test_sum / n_test)

    return train_ls, test_ls, list_train_acc, list_test_acc


def get_k_fold_data(k, i, X, y):
    # 返回第i折交叉验证时所需要的训练和验证数据，分开放，X_train为训练数据，X_valid为验证数据
    assert k > 1
    fold_size = X.shape[0] // k  
    X_train, y_train = None, None
    for j in range(k):
        idx = slice(j * fold_size, (j + 1) * fold_size)  #slice(start,end,step)切片函数
        X_part, y_part = X[idx, :], y[idx]
        if j == i:
            X_valid, y_valid = X_part, y_part
        elif X_train is None:
            X_train, y_train = X_part, y_part
        else:
            X_train = torch.cat((X_train, X_part), dim=0)
            y_train = torch.cat((y_train, y_part), dim=0)
    return X_train, y_train, X_valid, y_valid


def k_fold(k, X_train, y_train, num_epochs, batch_size):
    for i in range(k):
        data = get_k_fold_data(k, i, X_train, y_train) # 获取k折交叉验证的训练和验证数据
        train_ls, valid_ls, train_acc, valid_acc = train(*data, num_epochs, batch_size)
        train_l_sum, valid_l_sum, train_acc_sum, valid_acc_sum = 0.0, 0.0, 0.0, 0.0
        for ii in train_ls:
            train_l_sum += ii
        for jj in valid_ls:
            valid_l_sum += jj
        for ii in train_acc:
            train_acc_sum += ii
        for jj in valid_acc:
            valid_acc_sum += jj
        print('fold %d, train loss %.4f, valid loss %.4f, train acc %.4f, test acc %.4f' % (i + 1, train_l_sum / num_epochs, valid_l_sum / num_epochs, train_acc_sum / num_epochs, valid_acc_sum / num_epochs))


k_fold(10, x, y, num_epochs, batch_size)


fold 1, train loss 0.0007, valid loss 0.0007, train acc 0.9598, test acc 0.9612
fold 2, train loss 0.0007, valid loss 0.0007, train acc 0.9602, test acc 0.9617
fold 3, train loss 0.0007, valid loss 0.0008, train acc 0.9593, test acc 0.9585
fold 4, train loss 0.0007, valid loss 0.0007, train acc 0.9596, test acc 0.9602
fold 5, train loss 0.0007, valid loss 0.0008, train acc 0.9596, test acc 0.9596
fold 6, train loss 0.0007, valid loss 0.0008, train acc 0.9594, test acc 0.9602
fold 7, train loss 0.0007, valid loss 0.0008, train acc 0.9605, test acc 0.9613
fold 8, train loss 0.0007, valid loss 0.0008, train acc 0.9613, test acc 0.9616
fold 9, train loss 0.0007, valid loss 0.0008, train acc 0.9605, test acc 0.9609
fold 10, train loss 0.0007, valid loss 0.0006, train acc 0.9592, test acc 0.9608
