In [1]:
import random
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True' # 防止使用plot挂掉

In [17]:
# 回归数据集创建
def tensorGenReg(num_example=1000, w=[2, -1, 1], bias=True, delta=0.01, deg=1):
    if bias == True:
        num_input = len(w) - 1
        features_true = torch.randn(num_example, num_input)
        w_true = torch.tensor(w[:-1]).reshape(-1, 1).float()
        b_true = torch.tensor(w[-1]).float()
        if num_input == 1:
            labels_true = torch.pow(features_true, deg) * w_true + b_true
        else:
            labels_true = torch.mm(torch.pow(features_true, deg), w_true) + b_true
        features = torch.cat((features_true, torch.ones(len(features_true), 1)), 1)
        labels = labels_true + torch.randn(size=labels_true.shape) * delta
    else:
        num_input = len(w)
        features_truee = torch.randn(num_example, num_input)
        w_true = torch.tensor(w).reshape(-1, 1).float()
        if num_input == 1:
            labels_true = torch.pow(features_true, deg) * w_true
        else:
            labels_true = torch.mm(torch.pow(features_true, deg), w_true)
        labels = labels_true + torch.randn(size=labels_true.shape) * delta
    return features, labels

In [18]:
# 生成数据集
torch.manual_seed(428)
features, labels = tensorGenReg()

In [4]:
# 1.模型选择
def linreg(X, w):
    return torch.mm(X, w)

In [5]:
# 2.确定目标函数
def squared_loss(yhat, y):
    num_ = y.numel()
    sse = torch.sum((yhat.reshape(-1, 1) - y.reshape(-1, 1)) ** 2)
    return sse / num_

In [6]:
# 3.定义优化算法
def sgd(params, lr):
    params.data -= lr * params.grad
    params.grad.zero_()

In [7]:
# 创建小批量切分函数
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices) # 打乱数据集索引
    l = []
    for i in range(0, num_examples, batch_size):
        j = torch.tensor(indices[i: min(i + batch_size, num_examples)])
        l.append([torch.index_select(features, 0, j), torch.index_select(labels, 0, j)])
    return l

In [22]:
writer = SummaryWriter(log_dir='reg_loss')

In [24]:
# 4.模型训练
torch.manual_seed(428)

batch_size = 10 # 小批量数据个数
lr = 0.03       # 学习率
num_epochs = 3  # 训练次数
w = torch.zeros(3, 1, requires_grad = True) # 初始权重

net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w), y)
        l.backward() # 反向传播
        sgd(w, lr)
    train_l = loss(net(features, w), labels)
    writer.add_scalar('mul', train_l, epoch)
    print('epoch %d, loss %f' % (epoch + 1, train_l))

epoch 1, loss 0.000125
epoch 2, loss 0.000096
epoch 3, loss 0.000096


In [21]:
w

tensor([[ 2.0003],
        [-1.0007],
        [ 0.9992]], requires_grad=True)

In [27]:
# 线性回归快速实现
batch_size = 10
lr = 0.03
num_epochs = 3

In [29]:
# 生成数据集
torch.manual_seed(428)
features, labels = tensorGenReg()
features = features[:, :-1] #  去除最后一列 因为nn会创建
data = TensorDataset(features, labels) # 数据封装
batchData = DataLoader(data, batch_size=batch_size, shuffle=True) # 数据加载

In [30]:
features

tensor([[ 1.0109,  0.3103],
        [ 1.3131,  0.1687],
        [-0.4335,  0.4715],
        ...,
        [ 1.4493,  1.4995],
        [-1.6545, -0.2729],
        [ 0.2654, -0.5429]])

In [39]:
class Model(nn.Module):
    def __init__(self, in_features=2, out_feature=1):
        super(Model, self).__init__()
        self.linear = nn.Linear(in_features, out_feature)
        
    def forward(self, x):
        out = self.linear(x)
        return out

In [40]:
# 1.模型选择
net = Model()
# 2.确定目标函数
criterion = nn.MSELoss()
# 3.确定优化方法
optimizer = optim.SGD(net.parameters(), lr=lr)

In [41]:
# 4.模型训练
torch.manual_seed(428)
for epoch in range(num_epochs):
    for X, y in batchData:
        yhat = net.forward(X)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    writer.add_scalar('mul', train_l, epoch)
    print('epoch %d, loss %f' % (epoch + 1, loss))

epoch 1, loss 0.000100
epoch 2, loss 0.000096
epoch 3, loss 0.000066


In [42]:
list(net.parameters())

[Parameter containing:
 tensor([[ 1.9997, -1.0002]], requires_grad=True),
 Parameter containing:
 tensor([0.9998], requires_grad=True)]

In [43]:
criterion(net(features), labels)

tensor(9.5491e-05, grad_fn=<MseLossBackward>)