# 动手去学习深度学习_pytorch

## 简单实现线性回归

In [152]:
import torch
import torch.nn as nn
import numpy as np

In [153]:
# 真实参数 w,b
w0 = torch.tensor([2.5,3])
b0 = 1
n_input = 2
n_examples = 1000

In [154]:
# build X and y
X = torch.tensor(np.random.normal(0,1,(n_examples,n_input)))
labels = X[:,0]*w0[0]+X[:,1]*w0[1]+b0
labels = labels+torch.tensor(np.random.normal(0,0.01,labels.size()))

In [155]:
# build data loader
dataset = torch.utils.data.TensorDataset(X,labels)
dataloader = torch.utils.data.DataLoader(
            dataset = dataset,
            batch_size = 10,
            shuffle = True)

In [156]:
linear_model = torch.nn.Sequential(
    nn.Linear(n_input,1))

In [157]:
# init
from torch.nn import init

init.normal_(linear_model[0].weight,mean = 0,std = 0.01)
init.constant_(linear_model[0].bias,val = 0)

Parameter containing:
tensor([0.], requires_grad=True)

In [158]:
for param in linear_model.parameters():
    print(param)

Parameter containing:
tensor([[-0.0171, -0.0097]], requires_grad=True)
Parameter containing:
tensor([0.], requires_grad=True)


In [159]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(linear_model.parameters(),lr=0.03)

In [161]:
# train
num_epochs = 5
for i in range(num_epochs):
    for inputs,labels in dataloader:
        inputs = inputs.float()
        outputs = linear_model(inputs)
        labels = labels.float()
        print(labels)
        print(labels.view(-1,1))
        loss = criterion(outputs,labels.view(-1,1))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        break
    print("epoch %d, loss: %f" %(i+1,loss.item()))

tensor([ 5.8189,  0.1676, -3.2272, -5.5785, -4.2469,  1.2893,  5.3571,  3.6089,
         0.4966,  1.9021])
tensor([[ 5.8189],
        [ 0.1676],
        [-3.2272],
        [-5.5785],
        [-4.2469],
        [ 1.2893],
        [ 5.3571],
        [ 3.6089],
        [ 0.4966],
        [ 1.9021]])
epoch 1, loss: 0.000065
tensor([ 1.3533,  0.6045,  0.9706,  5.0472, -2.1584,  0.0159, -0.9849,  7.5362,
        -1.8615, -1.5185])
tensor([[ 1.3533],
        [ 0.6045],
        [ 0.9706],
        [ 5.0472],
        [-2.1584],
        [ 0.0159],
        [-0.9849],
        [ 7.5362],
        [-1.8615],
        [-1.5185]])
epoch 2, loss: 0.000092
tensor([-6.1374, -0.9583,  0.9985, -8.3147, -2.1810,  2.9151,  0.0398,  6.2411,
         4.3393,  5.0756])
tensor([[-6.1374],
        [-0.9583],
        [ 0.9985],
        [-8.3147],
        [-2.1810],
        [ 2.9151],
        [ 0.0398],
        [ 6.2411],
        [ 4.3393],
        [ 5.0756]])
epoch 3, loss: 0.000103
tensor([ 3.8316,  4.7838, -1.6300,

In [149]:
linear_model[0].weight

Parameter containing:
tensor([[2.4961, 2.9972]], requires_grad=True)

In [150]:
linear_model[0].bias

Parameter containing:
tensor([1.0081], requires_grad=True)

## 模拟梯度爆炸和弥散

In [199]:
import torch
import torch.nn
import numpy as np

In [200]:
# parameters
input_size = 100
hidden_size = 100

In [211]:
class net(nn.Module):
    
    def __init__(self,hidden_size):
        super(net,self).__init__()
        self.linears = nn.ModuleList([nn.Linear(hidden_size,hidden_size,bias = False) for i in range(100)])
        
    def forward(self,x):
        for linear in self.linears:
            x = linear(x)
        return x 
    
    def initialize(self):
        for m in self.modules():
            if isinstance(m,nn.Linear):
                nn.init.normal_(m.weight.data,std = np.sqrt(hidden_size) )

In [212]:
x = torch.rand((16,256))
model = net(256)
model.initialize()

out = model(x)
print(out)

tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], grad_fn=<MmBackward>)


## 简单实现softmax

In [22]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.nn import init

In [23]:
# hyperparameter
input_size = 784 # 28*28
out_size = 10 # 10 classes
learning_rate = 0.1
num_epochs = 5

In [24]:
# get data
train_data = torchvision.datasets.FashionMNIST("input/train",train=True,download=True,transform=transforms.ToTensor())
test_data = torchvision.datasets.FashionMNIST("input/test",train=False,download=True,transform=transforms.ToTensor())

In [25]:
# build dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_data,shuffle=True,batch_size=256)
test_loader = torch.utils.data.DataLoader(dataset=test_data,shuffle=False,batch_size = 256)

In [26]:
# build the net
class net(nn.Module):
    def __init__(self,input_size,out_size):
        super(net,self).__init__()
        self.input_size = input_size
        self.out_size = out_size
        self.fc = nn.Linear(input_size,out_size,bias=True)
    def forward(self,X):
        out = X.view(-1,input_size)
        out = self.fc(out)
        return out
    
    def initialize(self):
        for m in self.modules():
            if isinstance(m,nn.Linear):
                init.normal_(m.weight.data,mean=0,std=0.01)
                init.constant_(m.bias.data, val = 0)

In [27]:
model = net(input_size,out_size)

In [28]:
# net init
model.initialize()

In [29]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.0034,  0.0021,  0.0015,  ..., -0.0029, -0.0004,  0.0123],
        [-0.0127,  0.0049,  0.0074,  ..., -0.0005, -0.0025,  0.0114],
        [-0.0012,  0.0088,  0.0120,  ...,  0.0076, -0.0146, -0.0200],
        ...,
        [ 0.0009,  0.0022, -0.0154,  ...,  0.0014, -0.0040,  0.0034],
        [-0.0011, -0.0027,  0.0058,  ...,  0.0093, -0.0009, -0.0037],
        [-0.0066,  0.0066, -0.0053,  ..., -0.0147,  0.0143, -0.0083]],
       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)


In [30]:
# cost function and optimzer
criterion = nn.CrossEntropyLoss()
optimzer = torch.optim.SGD(model.parameters(),lr= learning_rate)

In [31]:
# train model
for epoch in range(num_epochs):
    for inputs,labels in train_loader:
        
        outputs = model(inputs)
        #print(outputs.size())
        #print(labels.size())  # 应当只具有batch这一个维度 or 和outputsize一样
        loss = criterion(outputs,labels)
        
        optimzer.zero_grad()
        loss.backward()
        optimzer.step()
    print("epoch:{}, loss为:{}".format(epoch+1,loss.item()))

epoch:1, loss为:0.7835202217102051
epoch:2, loss为:0.5758370757102966
epoch:3, loss为:0.45879819989204407
epoch:4, loss为:0.5836923122406006
epoch:5, loss为:0.34031832218170166


## 多层感知机

In [4]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.nn import init

In [5]:
batch_size = 256
learning_rate = 0.1
input_size = 784
hidden_size = 100
output_size = 10

In [6]:
# get data
train_data = torchvision.datasets.FashionMNIST("input/train/",train = True,download=True,transform=transforms.ToTensor())
test_data = torchvision.datasets.FashionMNIST("input/train/",train = False,download=True,transform=transforms.ToTensor())

In [7]:
# build dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_data,shuffle=True,batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(dataset=test_data,shuffle=False,batch_size=batch_size)

In [10]:
#build network
class net(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(net,self).__init__()
        self.fc1 = nn.Linear(input_size,hidden_size,bias=True)
        self.fc2 = nn.Linear(hidden_size,output_size)
    def forward(self,X):
        output = X.view(-1,input_size)
        output = self.fc1(output)
        output = self.fc2(output)
        return output

In [13]:
model = net(input_size,hidden_size,output_size)

In [16]:
model.parameters

<bound method Module.parameters of net(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)>

In [17]:
# cost function and optimizer
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)