In [31]:
import torch

#### 1，我们一般只叶子节点的导数保存，如果想对非叶子节点的导数保存，使用retain_grad()

In [34]:
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
# y.retain_grad()
z = y * y * 3
out = z.mean()
out.backward()
print(y.grad)

None


  import sys


#### 2,定义神经网络

In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        
        # 定义神经网络中每层的结构
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    # 构建模型
    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [None]:
##You just have to define the forward function, and the backward function 
# (where gradients are computed) is automatically defined for you using autograd. 
# You can use any of the Tensor operations in the forward function.

##The learnable parameters of a model are returned by net.parameters()

In [37]:
net.parameters()

<generator object Module.parameters at 0x000001E222C50848>

In [38]:
params = list(net.parameters())
print(len(params))
print(params[0].size())  

10
torch.Size([6, 1, 5, 5])


In [40]:
# print(params)

#### 使用模型预测结果

In [41]:
input = torch.randn(1, 1, 32, 32)

In [42]:
input

tensor([[[[ 0.3711, -0.2958,  0.8775,  ...,  0.8098,  0.5758,  0.9222],
          [ 0.5273, -0.4217, -1.1892,  ..., -0.5869,  1.9505, -0.2103],
          [ 1.6812, -0.7292, -1.2361,  ..., -0.4509, -0.2713,  0.1097],
          ...,
          [-1.4513, -0.6343, -1.9639,  ...,  2.2364, -0.2389,  0.3598],
          [-0.0505,  0.4481, -0.7992,  ...,  0.4705,  2.4979, -1.0948],
          [-1.7410,  0.6912, -0.0222,  ...,  0.7940,  0.4846,  0.1252]]]])

In [43]:
out = net(input)
print(out)

tensor([[ 0.1120,  0.0761, -0.0765, -0.0766,  0.0298,  0.0522, -0.0017,  0.0768,
         -0.1018, -0.1242]], grad_fn=<AddmmBackward>)


In [46]:
# out = net(input)
# print(out)

In [47]:
# print(out)

#### 梯度清零

In [53]:
net.zero_grad()
out.backward(torch.randn(1, 10))

#### note：torch.nn only supports mini-batches. The entire torch.nn package only supports inputs that are a mini-batch of samples, 
#### and not a single sample.
#### For example, nn.Conv2d will take in a 4D Tensor of nSamples x nChannels x Height x Width.
#### If you have a single sample, just use input.unsqueeze(0) to add a fake batch dimension.

#### 3，定义LOSS FUNCTION

In [54]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(1.6975, grad_fn=<MseLossBackward>)


In [55]:
# input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
#       -> view -> linear -> relu -> linear -> relu -> linear
#       -> MSELoss
#       -> loss

In [56]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x000001E224424308>
<AddmmBackward object at 0x000001E224424AC8>
<AccumulateGrad object at 0x000001E224424308>


In [57]:
#  Indeed. Internally, PyTorch loves to connect all relevant inputs to the graph, 
# and if those don’t require gradients, you get None.

#### 反向传播

In [58]:
#### To backpropagate the error all we have to do is to loss.backward(). 
#### You need to clear the existing gradients though, 
#### else gradients will be accumulated to existing gradients.

In [59]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0197, -0.0051, -0.0149,  0.0156, -0.0051, -0.0036])


#### weight = weight - learning_rate * gradient

In [60]:
## 更新梯度
# learning_rate = 0.01
# for f in net.parameters():
#     f.data.sub_(f.grad.data * learning_rate)

#### However, as you use neural networks, you want to use various different update rules such as SGD, Nesterov-SGD, Adam, RMSProp, etc. To enable this, we built a small package: torch.optim that implements all these methods. Using it is very simple:

In [61]:
import torch.optim as optim

In [62]:
# optim.

In [64]:
import torch.optim as optim

# create your optimizer
optimizer = optim.Adadelta(net.parameters(), lr=0.01)

# in your training loop:
output = net(input)
loss = criterion(output, target)
optimizer.zero_grad()   # zero the gradient buffers
# net.zero_grad() 

loss.backward()
optimizer.step()    # Does the update

#### Observe how gradient buffers had to be manually set to zero using optimizer.zero_grad(). This is because gradients are accumulated as explained in Backprop section.

In [15]:
#### 定义类似线性回归模型

In [72]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

In [73]:
boston = load_boston()
X = boston.data
Y = boston.target
print(np.shape(X))
print(np.shape(Y))

(506, 13)
(506,)


In [74]:
std = StandardScaler()
X = std.fit_transform(X)

In [75]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=66)

In [79]:
import torch.nn.functional as f

In [92]:
## 定义模型
class RegressionModel(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(RegressionModel,self).__init__()
        self.linear = nn.Linear(input_dim, 100)
        self.linear2 = nn.Linear(100, output_dim)
        
    def forward(self, x):
        x = self.linear(x)
        x = f.relu(x)
        out = self.linear2(x)
        return out
# 模型实例化
input_dim = 13
output_dim = 1
model = RegressionModel(input_dim, output_dim)
print(model)
## 定义损失函数和优化器
learning_rate = 0.01
criterion = torch.nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr = learning_rate)

RegressionModel(
  (linear): Linear(in_features=13, out_features=100, bias=True)
  (linear2): Linear(in_features=100, out_features=1, bias=True)
)


In [93]:
# help(torch.nn.MSELoss)

In [94]:

#数据转为tensor类型
# inputs = torch.as_tensor(torch.from_numpy(X_train),dtype=torch.float32)
# labels = torch.as_tensor(torch.from_numpy(Y_train),dtype=torch.float32)
inputs = torch.tensor(X_train,dtype=torch.float32)
labels = torch.tensor(Y_train,dtype=torch.float32)

# inputs = torch.tensor(X_train)
# labels = torch.tensor(Y_train)
epochs = 5000
for epoch in range(epochs):

    
    #前向传播
    outputs = model(inputs)
    
    #计算loss
    loss = criterion(outputs, labels.view(-1,1))
    
    #梯度清零
    optimizer.zero_grad()

    #反向传播
    loss.backward()

    #更新参数
    optimizer.step()
    
    if epoch % 100 == 0:
        print('epoch {}, loss {}'.format(epoch, loss.item()))
        
    
    

epoch 0, loss 585.7290649414062
epoch 100, loss 14.637965202331543
epoch 200, loss 9.695610046386719
epoch 300, loss 7.963910102844238
epoch 400, loss 6.784719467163086
epoch 500, loss 5.989674091339111
epoch 600, loss 5.308857440948486
epoch 700, loss 4.740423679351807
epoch 800, loss 4.2938127517700195
epoch 900, loss 3.983525037765503
epoch 1000, loss 3.7086453437805176
epoch 1100, loss 3.4333131313323975
epoch 1200, loss 3.166347026824951
epoch 1300, loss 2.9186582565307617
epoch 1400, loss 2.683389902114868
epoch 1500, loss 2.464951515197754
epoch 1600, loss 2.241133213043213
epoch 1700, loss 2.0261950492858887
epoch 1800, loss 1.8405084609985352
epoch 1900, loss 1.6862565279006958
epoch 2000, loss 1.5421223640441895
epoch 2100, loss 1.4378657341003418
epoch 2200, loss 1.3430838584899902
epoch 2300, loss 1.2618528604507446
epoch 2400, loss 1.1689767837524414
epoch 2500, loss 1.1014906167984009
epoch 2600, loss 1.0422548055648804
epoch 2700, loss 0.9792011976242065
epoch 2800, loss

In [95]:
#### 梯度累计
# inputs = torch.tensor(X_train,dtype=torch.float32)
# labels = torch.tensor(Y_train,dtype=torch.float32)
# model.zero_grad()
# epochs = 10000
# for epoch in range(epochs):

    
#     #前向传播
#     outputs = model(inputs),在正常情况下，nimi-batch数据需要更新，这里用了全量数据
    
#     #计算loss
#     loss = criterion(outputs, labels.view(-1,1))
    


#     #反向传播
#     loss.backward()

    
#     if epoch % 100 == 0:
#             #更新参数
#         optimizer.step()
#             #梯度清零
#         optimizer.zero_grad()
#         print('epoch {}, loss {}'.format(epoch, loss.item()))
        
    

In [96]:
input_ = torch.tensor(X_test[5],dtype=torch.float32).view(1,-1)
print(model(input_))
print(Y_test[5])

tensor([[21.9926]], grad_fn=<AddmmBackward>)
19.9


In [27]:
input_ = torch.tensor(X_test,dtype=torch.float32)
predicts = model(input_)


In [28]:
# predicts.numpy()

In [97]:
from sklearn.metrics import r2_score

In [98]:
r2_score(predicts.detach().numpy(),Y_test)

0.8898898827531612