In [1]:
import numpy as np
import torch 


"""numpy 和 pytorch 之间的数据转化"""

mat = np.arange(6).reshape((2, 3))
# array to tensor
torch_mat = torch.from_numpy(mat)
# tensor to array
torch_np = torch_mat.numpy()
print(mat,torch_mat,torch_np,mat.T,sep='\n\n')

[[0 1 2]
 [3 4 5]]

tensor([[0, 1, 2],
        [3, 4, 5]], dtype=torch.int32)

[[0 1 2]
 [3 4 5]]

[[0 3]
 [1 4]
 [2 5]]


In [2]:
"""
torch中的任意维度数据类型都是tensor，
而numpy中一维度叫array，二维叫ndarray,

在torch中矩阵的运算很方便

创建tensor时候有个属性.requires_grad默认为False，如果选为True可以追踪计算历史
然后在变量计算后生成的新变量可以用 新变量.grad_fn 来追踪其计算历史
"""
x = torch.ones(2,2,requires_grad=True)
print('\nx',x)
y = x + 2
print('\ny:',y,y.grad_fn)
z = y * y * 3
out = z.mean()
print('\nz:',z,
    '\nout:',z.mean())


x tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

y: tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>) <AddBackward0 object at 0x00000265154B3AC8>

z: tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) 
out: tensor(27., grad_fn=<MeanBackward1>)


In [18]:
"""用numpy实现网络"""
# 输入的批大小N，输入的维度D_in，隐藏层维度H，输出维度D_out
# 64个样本，每个样本1000个特征，中间层100个特征，输出层10个特征
N,D_in,H,D_out = 64, 1000, 100, 10

# 随机产生随机数
x = np.random.randn(N, D_in) # 64行，1000列
y = np.random.randn(N, D_out) # 64行，10列

# 随机初始化权重
w1 = np.random.randn(D_in, H) # 1000行，100列
w2 = np.random.randn(H, D_out) # 100行，10列

learning_rate = 1e-6
for t in range(500): # 500次迭代
    # 向前传播，预测y的值
    h = x.dot(w1) # 中间层的z值  64x1000 dot 1000x100 = 64x100
    h_relu = np.maximum(h,0) # 中间层的a值（激活值） 64x100
    y_pred = h_relu.dot(w2) # 输出层的值 64x10
    
    # 计算并显示loss,这里采用的是取方差
    loss = np.square(y_pred-y).sum()
    print(t, loss)
    
    # 反向传播，计算w1,w2对loss的梯度，利用导数的链式法则
    # 先对平方求导  64x10
    grad_y_pred = 2.0 * (y_pred - y) 
    # 把参数按权重反向传播，算出w2的梯度
    grad_w2 = h_relu.T.dot(grad_y_pred) 
    # 计算梯度
    grad_h_relu = grad_y_pred.dot(w2.T) 
    # 
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    # 更新权重
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 27275346.989853866
1 21175848.431020647
2 20940355.309204634
3 23316748.071630508
4 25832739.266779028
5 25839688.64673529
6 22066968.241072647
7 15695288.747608311
8 9555024.612931557
9 5313496.392971631
10 2918037.594058994
11 1694393.3367524447
12 1080415.791046992
13 761388.259470461
14 581926.7076798596
15 470493.50565662875
16 394014.06617179955
17 337225.0154756553
18 292431.6295350407
19 255765.9943350961
20 225093.87124263187
21 198977.92309881657
22 176548.147471287
23 157153.94168475753
24 140295.85979749827
25 125553.10919031143
26 112635.67593339214
27 101259.2160791789
28 91218.39369776208
29 82327.93054734654
30 74430.93763566963
31 67401.35626086469
32 61136.419079536776
33 55532.37253042846
34 50508.4362019657
35 45998.37064222544
36 41940.21972642483
37 38284.9129958189
38 34988.04465088323
39 32008.729149069157
40 29312.128200055584
41 26869.27269731776
42 24654.505898183164
43 22641.700397771332
44 20806.212112634552
45 19135.907210116653
46 17613.766088584878
47 

407 0.0001146434357300437
408 0.00010948499917930045
409 0.00010456040993488657
410 9.985712411421443e-05
411 9.536570429629401e-05
412 9.107796628303336e-05
413 8.698274679917755e-05
414 8.307190482248955e-05
415 7.9337937061244e-05
416 7.577310742148206e-05
417 7.23681482883362e-05
418 6.911691710653945e-05
419 6.601171645368303e-05
420 6.304641307755195e-05
421 6.0214529173295775e-05
422 5.751032535573901e-05
423 5.492826785608632e-05
424 5.246296327828158e-05
425 5.0108358730413554e-05
426 4.785901148523994e-05
427 4.571108562597938e-05
428 4.366005171021715e-05
429 4.170116186186632e-05
430 3.983054847847403e-05
431 3.804417665303538e-05
432 3.633794475946574e-05
433 3.47085043228311e-05
434 3.31521943512635e-05
435 3.166588047618108e-05
436 3.0246412047580553e-05
437 2.8890731109288462e-05
438 2.7596247668322298e-05
439 2.6359567476730547e-05
440 2.517854581470041e-05
441 2.4050584137987057e-05
442 2.297329413084175e-05
443 2.19442975879289e-05
444 2.096153264134652e-05
445 2.002

In [20]:
"""用pytorch实现网络"""
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 

# N是批大小； D_in 是输入维度；
# H 是隐藏层维度； D_out 是输出维度
N, D_in, H, D_out = 64, 1000, 100, 10

# 产生随机输入和输出数据
x = torch.randn(N, D_in, device=device)
y = torch.randn(N, D_out, device=device)

# 随机初始化权重
w1 = torch.randn(D_in, H, device=device)
w2 = torch.randn(H, D_out, device=device)

learning_rate = 1e-6
for t in range(500):
    # 前向传播：计算预测值y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)

    # 计算并输出loss；loss是存储在PyTorch的tensor中的标量，维度是()（零维标量）；
    # 我们使用loss.item()得到tensor中的纯python数值。
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.item())

    # 反向传播，计算w1、w2对loss的梯度
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)

    # 使用梯度下降更新权重
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 45886568.0
1 46342152.0
2 41720284.0
3 28739322.0
4 15301122.0
5 7181843.5
6 3744319.5
7 2369647.75
8 1748471.0
9 1397237.5
10 1158885.75
11 979093.125
12 836441.625
13 720158.0
14 624050.25
15 543797.8125
16 476243.5625
17 418890.96875
18 369969.78125
19 327979.28125
20 291726.46875
21 260294.734375
22 232906.984375
23 208958.328125
24 187943.8125
25 169423.140625
26 153036.53125
27 138511.5625
28 125591.8125
29 114063.21875
30 103755.46875
31 94533.828125
32 86266.4453125
33 78812.375
34 72096.28125
35 66030.9296875
36 60545.3046875
37 55578.3046875
38 51074.109375
39 46981.3125
40 43256.41015625
41 39863.296875
42 36770.625
43 33944.3359375
44 31359.095703125
45 28992.8984375
46 26823.669921875
47 24832.693359375
48 23006.6171875
49 21327.8125
50 19784.52734375
51 18363.767578125
52 17055.4453125
53 15849.50390625
54 14737.306640625
55 13710.15625
56 12761.1005859375
57 11883.603515625
58 11072.1943359375
59 10320.650390625
60 9624.349609375
61 8978.6552734375
62 8379.80078125
63 

394 0.0008938453393056989
395 0.0008666227804496884
396 0.0008395709446631372
397 0.0008116571698337793
398 0.0007874571601860225
399 0.0007630696636624634
400 0.0007412589038722217
401 0.0007185806753113866
402 0.0006988886161707342
403 0.0006767279119230807
404 0.0006575488951057196
405 0.0006391741917468607
406 0.0006194733432494104
407 0.0006013453239575028
408 0.00058400520356372
409 0.0005683242343366146
410 0.0005514936055988073
411 0.0005373413441702724
412 0.0005229495582170784
413 0.000507258519064635
414 0.0004943185485899448
415 0.0004818532906938344
416 0.00046842856681905687
417 0.0004549766017589718
418 0.000442762509919703
419 0.0004309524374548346
420 0.0004199397808406502
421 0.0004088123678229749
422 0.0003981615591328591
423 0.0003873473615385592
424 0.0003775705408770591
425 0.00036835612263530493
426 0.000359603320248425
427 0.0003516644355840981
428 0.0003424056922085583
429 0.0003345429722685367
430 0.00032575681689195335
431 0.0003185480600222945
432 0.00031218

In [28]:
"""使用pytorch的封装模块nn以及自动求导"""
# 还是以上米的两层神经网络为例子
import torch

# 指定是否使用gpu
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

N,D_in,H,D_out = 64, 1000, 100, 10

# 随机产生随机数
x = torch.randn(N, D_in) # 64行，1000列
y = torch.randn(N, D_out) # 64行，10列

# 不必再随机初始化权重
# w1 = torch.randn(D_in, H) # 1000行，100列
# w2 = torch.randn(H, D_out) # 100行，10列

# 使用nn包来将模型定义为一系列的层
# nn.Sequential类似sklearn中的管道,是包含其他模块的模块
# 每个线性模块Linear使用线性的函数从输入计算输出，并保存期内部的权重w和偏差张量div_tensor
# 在构造模型后
model = torch.nn.Sequential(
            torch.nn.Linear(D_in, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, D_out)
            ).to(device)

"""
nn包还有其他损失函数的定义
这里我们使用的是mse平均平方误差的和而不是平均值，保持与上面的实验一致
但是在实际应用中，我们可以使用reduction = 'elementwise_mean'作为损失函数
"""
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-4
for i in range(500):
    # 1.前向传播
    y_pred = model(x)
    
    # 2.计算并打印损失。传递y和y_pred的张量，损失函数返回包含损失的张量
    loss = loss_fn(y_pred, y)
    print(t, loss.item())
    
    # 3.反向传播之前清零梯度
    model.zero_grad()
    
    # 4.反向传播，计算模型的损失队友对所有科学系参数的导数（梯度）
    # 在内部，每个模块的参数存储在require_grad=True的张量中
    # 因此这个调用将计算模型中所有可学习参数的梯度
    loss.backward()
    
    # 5.使用梯度下降法跟新权重w
    # 每个参数都是张量，但是这里不必要计算x更新的梯度，可以省计算量
    with torch.no_grad():
        for param in model.parameters():
            param.data = param.data - learning_rate*param.grad

    

499 652.4908447265625
499 604.4564208984375
499 562.19384765625
499 525.0216064453125
499 492.122314453125
499 462.45733642578125
499 435.4586486816406
499 410.9239501953125
499 388.46148681640625
499 367.59722900390625
499 348.0813293457031
499 329.853271484375
499 312.6656188964844
499 296.3749694824219
499 280.9075927734375
499 266.1230163574219
499 252.01254272460938
499 238.53399658203125
499 225.77374267578125
499 213.6545867919922
499 202.123779296875
499 191.19961547851562
499 180.77960205078125
499 170.8846893310547
499 161.48512268066406
499 152.5156707763672
499 143.99342346191406
499 135.89967346191406
499 128.22825622558594
499 120.94721221923828
499 114.03925323486328
499 107.49867248535156
499 101.29328155517578
499 95.40764617919922
499 89.85326385498047
499 84.60803985595703
499 79.64813995361328
499 74.96629333496094
499 70.54258728027344
499 66.36783599853516
499 62.43629837036133
499 58.73366928100586
499 55.24304962158203
499 51.957305908203125
499 48.8564720153808

499 0.00026784755755215883
499 0.0002600181906018406
499 0.0002524350129533559
499 0.00024507383932359517
499 0.00023793352011125535
499 0.00023101511760614812
499 0.0002242886257590726
499 0.000217777953366749
499 0.00021144869970157743
499 0.00020530857727862895
499 0.00019934578449465334
499 0.0001935665204655379
499 0.0001879590708995238
499 0.00018251586880069226
499 0.0001772277755662799
499 0.00017210749501828104
499 0.00016713111835997552
499 0.00016230794426519424
499 0.00015761997201479971
499 0.00015306880231946707
499 0.00014865427510812879
499 0.00014436822675634176
499 0.00014020927483215928
499 0.0001361783070024103
499 0.00013225905422586948
499 0.0001284553436562419
499 0.0001247642794623971
499 0.00012118175800424069
499 0.0001176991208922118
499 0.00011432322935434058
499 0.00011104904115200043
499 0.00010786931670736521
499 0.00010477749310666695
499 0.00010177692456636578
499 9.886353655019775e-05
499 9.603731450624764e-05
499 9.329942986369133e-05
499 9.0631612692