In [1]:
import torch
import random
import numpy as np
import torch.nn as nn
from torch.nn import init
import torch.optim as optim

In [2]:
n_train, n_test, num_inputs,num_hiddens, num_outputs= 7000, 3000, 500, 256, 1
true_w, true_b = torch.ones(num_inputs, 1) * 0.0056, 0.028
features = torch.randn((n_train + n_test, num_inputs))
labels = torch.matmul(features, true_w)+true_b
labels += torch.tensor(np.random.normal(0, 0.001, size=labels.size()), dtype=torch.float)
train_features, test_features = features[:n_train, :], features[n_train:, :]
train_labels, test_labels = labels[:n_train], labels[n_train:]

In [3]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)])
        yield features.index_select(0, j), labels.index_select(0, j)

In [4]:
loss = nn.MSELoss()

In [6]:
class FlattenLayer(torch.nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0], -1)
net = nn.Sequential(
    FlattenLayer(),
    nn.Linear(500,256),
    nn.ReLU(),
    nn.Linear(256,1),
)

In [7]:
for params in net.parameters():
    init.normal_(params,mean=0,std=0.01)

In [8]:
lr = 0.01
optimizer = optim.SGD(net.parameters(), lr=lr)

In [9]:
#K折
def get_kfold_data(k, i, X, y):
    fold_size = X.shape[0] // k
    val_start = i * fold_size
    if i != k - 1:
        val_end = (i + 1) * fold_size
        X_valid, y_valid = X[val_start:val_end], y[val_start:val_end]
        X_train = torch.cat((X[0:val_start], X[val_end:]), dim = 0)
        y_train = torch.cat((y[0:val_start], y[val_end:]), dim = 0)
    else:
        X_valid, y_valid = X[val_start:], y[val_start:]
        X_train = X[0:val_start]
        y_train = y[0:val_start]
    return X_train, y_train, X_valid, y_valid

In [10]:
def k_fold(k, X_train, y_train):
    train_loss_sum, valid_loss_sum = 0, 0
    for i in range(k):
        print('第', i + 1, '折验证结果')
        data = get_kfold_data(k, i, X_train, y_train)
        for params in net.parameters():
            init.normal_(params, mean=0, std=0.01)
        train_loss, val_loss= train(*data)
        print('train loss:{:.4f}'.format(train_loss))
        print('valid loss:{:.4f}'.format(val_loss))
        train_loss_sum += train_loss
        valid_loss_sum += val_loss
    print('\n', '最终10折交叉验证结果：')
    print('ave_train_loss:{:.4f}'.format(train_loss_sum/k))
    print('ave_valid_loss:{:.4f}'.format(valid_loss_sum/k))
    return

In [11]:
batch_size = 128
num_epochs = 5

xzhou = []
y_train_loss = []
y_test_loss = []
def train(train_features, train_labels, test_features, test_labels):
    for epoch in range(num_epochs):
        for x, y in data_iter(batch_size, train_features, train_labels):
            l = loss(net(x), y).sum()
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
            optimizer.step()
        train_l = loss(net(train_features), train_labels)
        test_l = loss(net(test_features), test_labels)
        xzhou.append(epoch + 1)
        y_train_loss.append(train_l.mean().item())
        y_test_loss.append(test_l.mean().item())
    return y_train_loss[num_epochs-1], y_test_loss[num_epochs-1]

In [12]:
k_fold(10, train_features, train_labels)

第 1 折验证结果
train loss:0.0141
valid loss:0.0133
第 2 折验证结果
train loss:0.0141
valid loss:0.0133
第 3 折验证结果
train loss:0.0141
valid loss:0.0133
第 4 折验证结果
train loss:0.0141
valid loss:0.0133
第 5 折验证结果
train loss:0.0141
valid loss:0.0133
第 6 折验证结果
train loss:0.0141
valid loss:0.0133
第 7 折验证结果
train loss:0.0141
valid loss:0.0133
第 8 折验证结果
train loss:0.0141
valid loss:0.0133
第 9 折验证结果
train loss:0.0141
valid loss:0.0133
第 10 折验证结果
train loss:0.0141
valid loss:0.0133

 最终10折交叉验证结果：
ave_train_loss:0.0141
ave_valid_loss:0.0133
