In [17]:
import torch
import torch.nn as nn

In [11]:
x = torch.tensor(1., requires_grad = True)
w = torch.tensor(2., requires_grad = True)
b = torch.tensor(3., requires_grad = True)

In [13]:
y = w*x + b
y.backward()

In [37]:
print(x.grad)
print(w.grad)
print(b.grad)

None
tensor(1.)
tensor(1.)


In [42]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)
# 实例化模型
linear = nn.Linear(3, 2)
print(linear.weight)
print(linear.bias)
# 实例化目标函数、优化器
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

Parameter containing:
tensor([[ 0.3153, -0.0369,  0.5598],
        [ 0.0326,  0.5248, -0.0057]], requires_grad=True)
Parameter containing:
tensor([0.5059, 0.0343], requires_grad=True)


In [43]:
y_pre = linear(x)
loss = criterion(y, y_pre)

print('before train .loss:', loss.item())

before train .loss: 32.376705169677734


In [44]:
# 开始训练
loss.backward()
print('dw:', linear.weight.grad)
print('db:', linear.bias.grad)

print(linear.weight)
print(linear.bias)

dw: tensor([[ 3.3697, -0.6791,  8.8961],
        [ 0.1031, 14.8054, 11.4115]])
db: tensor([13.3412,  3.5310])
Parameter containing:
tensor([[ 0.3153, -0.0369,  0.5598],
        [ 0.0326,  0.5248, -0.0057]], requires_grad=True)
Parameter containing:
tensor([0.5059, 0.0343], requires_grad=True)


In [45]:
optimizer.step()
print(linear.weight)
print(linear.bias)

Parameter containing:
tensor([[ 0.2816, -0.0302,  0.4708],
        [ 0.0316,  0.3768, -0.1199]], requires_grad=True)
Parameter containing:
tensor([ 0.3725, -0.0010], requires_grad=True)


In [46]:
y_pre = linear(x)
loss = criterion(y, y_pre)
print('after 1 train .loss:', loss.item())

after 1 train .loss: 26.88092803955078


#2层前馈神经网络 预测MNIST

In [27]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transfrorms
import torch.utils.data as data
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# 超参数
input_size = 784
hidden_size = 500
num_classes = 10
num_epoch = 5
batch_size = 100
learning_rate = 1e-4

# 数据加载
# dataset
train_data = torchvision.datasets.MNIST(
    root='pytorch-Learning/data',
    train=True,
    transform=transfrorms.ToTensor(),
    download=True
)
test_data = torchvision.datasets.MNIST(
    root='pytorch-Learning/data',
    train=False,
    transform=transfrorms.ToTensor()
)
# dataloader
train_loader = data.DataLoader(
    dataset=train_data,
    batch_size=batch_size,  # 批大小，用于每次训练的样本数量
    shuffle=True  # 每步迭代时，要打乱dataset的顺序来取出batch_size个样本
)
test_loader = data.DataLoader(
    dataset=test_data,
    batch_size=batch_size,  # 批大小，用于每次训练的样本数量
    shuffle=False  # 每步迭代时，不要打乱dataset的顺序来取出batch_size个样本
)


# 定义2层全连接模型
class twoLayerNeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, ouput_size):
        super(twoLayerNeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, ouput_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


model = twoLayerNeuralNet(input_size, hidden_size, num_classes).to(device)

# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 开始训练:
# 训练集有60000，每个训练的批大小为100，于是每一个epoch的迭代（step）次数为600次：
# 即data_loader的大小(total_step)
total_step = len(train_loader)
for epoch in range(num_epoch):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, 28 * 28).to(device)  # 把图片flat化
        labels = labels.to(device)
        # 前向传播，计算损失
        outputs = model(images)
        loss = criterion(outputs, labels)
        # 后向传播，更新权值
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch + 1, num_epoch, i + 1, total_step, loss.item()))

# 开始测试 不计算梯度（内存效率）
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28 * 28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))


Epoch [1/5], Step [100/600], Loss: 1.2049


Epoch [1/5], Step [200/600], Loss: 0.8092


Epoch [1/5], Step [300/600], Loss: 0.6841


Epoch [1/5], Step [400/600], Loss: 0.4100


Epoch [1/5], Step [500/600], Loss: 0.4778


Epoch [1/5], Step [600/600], Loss: 0.3230


Epoch [2/5], Step [100/600], Loss: 0.3616


Epoch [2/5], Step [200/600], Loss: 0.3663


Epoch [2/5], Step [300/600], Loss: 0.3533


Epoch [2/5], Step [400/600], Loss: 0.2344


Epoch [2/5], Step [500/600], Loss: 0.3189


Epoch [2/5], Step [600/600], Loss: 0.2223


Epoch [3/5], Step [100/600], Loss: 0.2618


Epoch [3/5], Step [200/600], Loss: 0.2053


Epoch [3/5], Step [300/600], Loss: 0.2804


Epoch [3/5], Step [400/600], Loss: 0.3787


Epoch [3/5], Step [500/600], Loss: 0.2056


Epoch [3/5], Step [600/600], Loss: 0.3005


Epoch [4/5], Step [100/600], Loss: 0.2674


Epoch [4/5], Step [200/600], Loss: 0.2138


Epoch [4/5], Step [300/600], Loss: 0.2815


Epoch [4/5], Step [400/600], Loss: 0.1672


Epoch [4/5], Step [500/600], Loss: 0.3036


Epoch [4/5], Step [600/600], Loss: 0.1338


Epoch [5/5], Step [100/600], Loss: 0.2124


Epoch [5/5], Step [200/600], Loss: 0.1869


Epoch [5/5], Step [300/600], Loss: 0.1388


Epoch [5/5], Step [400/600], Loss: 0.2160


Epoch [5/5], Step [500/600], Loss: 0.1042


Epoch [5/5], Step [600/600], Loss: 0.2553


Accuracy of the network on the 10000 test images: 94.68 %


# 2层的CNN网络


In [34]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transfrorms
import torch.utils.data as data
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# 超参数
num_classes = 10
num_epoch = 5
batch_size = 100
learning_rate = 1e-4

# 数据加载
# dataset
train_data = torchvision.datasets.MNIST(
    root='pytorch-Learning/data',
    train=True,
    transform=transfrorms.ToTensor(),
    download=True
)
test_data = torchvision.datasets.MNIST(
    root='pytorch-Learning/data',
    train=False,
    transform=transfrorms.ToTensor()
)
# dataloader
train_loader = data.DataLoader(
    dataset=train_data,
    batch_size=batch_size,  # 批大小，用于每次训练的样本数量
    shuffle=True  # 每步迭代时，要打乱dataset的顺序来取出batch_size个样本
)
test_loader = data.DataLoader(
    dataset=test_data,
    batch_size=batch_size,  # 批大小，用于每次训练的样本数量
    shuffle=False  # 每步迭代时，不要打乱dataset的顺序来取出batch_size个样本
)


# 定义2层CNN模型
class twoLayerConvNet(nn.Module):
    def __init__(self, input_size, hidden_size, ouput_size):
        super(twoLayerConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Linear(7 * 7 * 32, num_classes)
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out


model = twoLayerConvNet(input_size, hidden_size, num_classes).to(device)

# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 开始训练:
# 训练集有60000，每个训练的批大小为100，于是每一个epoch的迭代（step）次数为600次：
# 即data_loader的大小(total_step)
total_step = len(train_loader)
for epoch in range(num_epoch):
    for i, (images, labels) in enumerate(train_loader):
        # 不需要把图片flat化
        images = images.to(device)
        labels = labels.to(device)
        # 前向传播，计算损失
        outputs = model(images)
        loss = criterion(outputs, labels)
        # 后向传播，更新权值
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch + 1, num_epoch, i + 1, total_step, loss.item()))

# 开始测试 不计算梯度（内存效率）
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))


Epoch [1/5], Step [100/600], Loss: 1.0864


Epoch [1/5], Step [200/600], Loss: 0.5048


Epoch [1/5], Step [300/600], Loss: 0.3769


Epoch [1/5], Step [400/600], Loss: 0.2544


Epoch [1/5], Step [500/600], Loss: 0.3016


Epoch [1/5], Step [600/600], Loss: 0.1826


Epoch [2/5], Step [100/600], Loss: 0.1998


Epoch [2/5], Step [200/600], Loss: 0.1712


Epoch [2/5], Step [300/600], Loss: 0.1463


Epoch [2/5], Step [400/600], Loss: 0.1016


Epoch [2/5], Step [500/600], Loss: 0.0897


Epoch [2/5], Step [600/600], Loss: 0.1271


Epoch [3/5], Step [100/600], Loss: 0.1420


Epoch [3/5], Step [200/600], Loss: 0.1158


Epoch [3/5], Step [300/600], Loss: 0.0671


Epoch [3/5], Step [400/600], Loss: 0.0524


Epoch [3/5], Step [500/600], Loss: 0.1103


Epoch [3/5], Step [600/600], Loss: 0.0763


Epoch [4/5], Step [100/600], Loss: 0.0378


Epoch [4/5], Step [200/600], Loss: 0.1394


Epoch [4/5], Step [300/600], Loss: 0.0486


Epoch [4/5], Step [400/600], Loss: 0.0916


Epoch [4/5], Step [500/600], Loss: 0.0929


Epoch [4/5], Step [600/600], Loss: 0.1012


Epoch [5/5], Step [100/600], Loss: 0.0216


Epoch [5/5], Step [200/600], Loss: 0.0264


Epoch [5/5], Step [300/600], Loss: 0.0490


Epoch [5/5], Step [400/600], Loss: 0.0960


Epoch [5/5], Step [500/600], Loss: 0.0345


Epoch [5/5], Step [600/600], Loss: 0.0597


Accuracy of the network on the 10000 test images: 98.48 %


True