# torch.tensor与numpy.array、python list之间的相互转化

In [2]:
import torch
import numpy as np
ts = torch.tensor([1,2,3])  #构造tensor
arr = np.array([1,2,3]) #构造array
lst = [1,2,3] #构造list

# tensor->list
ts.tolist()
# array->list
arr.tolist()
# list->tensor
torch.tensor(lst)
# array->tensor
torch.from_numpy(arr)
# tensor->array
ts.numpy()
"""
注:tensor通常会被我们放到GPU上进行高效运算，在转换时通常先转到cpu上，同时转成array之后希望做的操作通常是非模型操作，所以使用detach从计算图中拿出
使用：ts.cpu().detach().numpy()
"""
#list->array
np.array(lst)

array([1, 2, 3])

# 模型的输入构造pipeline

In [46]:
import torch
from torch.utils.data import Dataset,DataLoader
import random
"""
假设我现在有100条数据，每条数据是一个8维向量，表示输入特征，根据这些特征来判断label
假设我现在的任务是根据一个8维向量来预测输出是0还是1
torch.randn（[shape]）用来生成随机数字的tensor，这些随机数字满足标准正态分布N（0, 1）。
"""
myData_feature = torch.randn([100,8])
myData_label = torch.tensor([random.randint(0,1) for _ in range(100)],dtype = torch.float32)
class MyDataset(Dataset): # 需要继承Dataset
    """
    以下三个函数为自己构造Dataset类时必须定义的三个函数
    """
    def __init__(self,feature,label):
        self.myData_feature = feature
        self.myData_label = label
    def __getitem__(self,index):
        """
        输入一个索引index，能获取到这个数据集中第index条数据，若是训练集，通常也返回对应的label
        """
        return self.myData_feature[index],self.myData_label[index]
    def __len__(self):   #返回数据集总大小，DataLoader读取长度与其保持一致
        return len(self.myData_label)   # 通常，我们的数据集是一条数据对应一个label，因此数据集的大小和label的大小一样
    
myDataset = MyDataset(myData_feature,myData_label)
print("第一条数据的样子：{}".format(myDataset[0]))  # 自动调用__getitem__函数
print("我的数据集长度为：{}".format(len(myDataset)))  # 自动调用__len__函数

# 构建Dataloader
myDataloader = DataLoader(myDataset,batch_size = 4,shuffle = True)   # batch_size为批大小，就是我们通常说的minibatch，梯度下降法更新参数时，
                                                                      #  用的梯度是一批数据的平均梯度
for batch in myDataloader:
    print("一批数据：{}".format(batch))
    print("一批数据的特征形状：{}".format(batch[0].shape))
    print("一批数据的标签形状：{}".format(batch[1].shape))
    break

第一条数据的样子：(tensor([ 2.2422,  0.6942, -0.1510,  0.3743,  0.8162, -0.5271,  1.7544, -0.7739]), tensor(0.))
我的数据集长度为：100
一批数据：[tensor([[-1.3681,  2.2397, -0.9045, -1.0270, -0.7246, -0.5111,  1.0600, -0.1699],
        [ 0.7327, -1.2467,  0.3584,  0.2602,  0.7247, -0.3379,  0.7422, -0.0888],
        [-0.2866, -1.0838, -2.1790, -0.2982, -0.4854, -0.1753, -1.0616, -0.2699],
        [ 1.0127, -0.7313,  0.2761, -0.5202, -0.3447,  1.2014, -0.5099,  1.0688]]), tensor([1., 0., 1., 1.])]
一批数据的特征形状：torch.Size([4, 8])
一批数据的标签形状：torch.Size([4])


# 模型构建pipeline

In [47]:
import torch.nn as nn

# 构建一个模型，将“模型的输入构造pipeline”这部分的可执行代码生成的数据作为训练数据，由于是二分类任务，采用逻辑回归即可
class myModel(nn.Module):
    def __init__(self,input_dim,output_dim):  
        super(myModel,self).__init__()
        self.linear = nn.Linear(input_dim,output_dim)
        self.acti = nn.Sigmoid()
    def forward(self,X):
        output = self.acti(self.linear(X))
        return output
model = myModel(8,1)  # 目的是根据8维的特征，生成一维的标签
# model(myData_feature[0])
print("第一条数据的特征输出模型，运算结果为：{}".format(model(myData_feature[0])))
for batch in myDataloader:
    print("第一批数据的特征输出模型，运算结果为：{}".format(model(batch[0])))   # batch实际上是一个tuple，第一个元素是feature，第二个元素是label，
                                                                                # 这与我们定义getitem函数的返回值有关
    break

第一条数据的特征输出模型，运算结果为：tensor([0.3255], grad_fn=<SigmoidBackward0>)
第一批数据的特征输出模型，运算结果为：tensor([[0.5972],
        [0.2588],
        [0.6374],
        [0.3190]], grad_fn=<SigmoidBackward0>)


# 模型训练pipeline

In [103]:
import torch.optim as optim  # pytorch的优化器模块
import torch.nn as nn

loss_fn = nn.MSELoss()  # 使用 L2loss
optimizer = optim.SGD(model.parameters(),lr = 1e-2)   # 定义优化器，传入模型参数，学习率lr，常用优化器为optim.SGD, optim.Adam
num_epochs = 1000
model.train()
for epoch in range(num_epochs): # 一个epoch表示一轮训练，完整的使用一遍参数，几个epoch就扫几次数据
    epoch_loss = 0.0  # 统计一个epoch总的loss，输出方便看模型效果，也可以统计其他信息，如每个step的loss放在一个list中，方便画图看训练过程
    
    for batch in myDataloader:
        output = model(batch[0])  #首先进行forward过程，算出当前模型的输出
        loss = loss_fn(output,batch[1])  #利用当前模型的输出和数据的label计算损失
        epoch_loss+=loss.item()  # 将损失加到我想看的变量上
        loss.backward()   # 进行backward计算，计算梯度
        optimizer.step() # 利用计算出的梯度更新模型参数
        optimizer.zero_grad()  # pytorch的optimizer梯度会累积，所以每个step需要将梯度清空，不影响下一次的操作
    print("Epoch:{}, loss:{}".format(epoch+1,epoch_loss))  # 一定程度输出一次训练相关信息，也可以每10个或每100个step输出一次，根据训练速度来
    

Epoch:1, loss:6.191679671406746
Epoch:2, loss:6.259651482105255
Epoch:3, loss:6.23063451051712
Epoch:4, loss:6.150608032941818
Epoch:5, loss:6.150680527091026
Epoch:6, loss:6.31143981218338
Epoch:7, loss:6.1301189959049225
Epoch:8, loss:6.230892017483711
Epoch:9, loss:6.183418199419975
Epoch:10, loss:6.226795494556427
Epoch:11, loss:6.180679336190224
Epoch:12, loss:6.1967426389455795
Epoch:13, loss:6.217332407832146
Epoch:14, loss:6.1811216324567795
Epoch:15, loss:6.20612071454525
Epoch:16, loss:6.160012513399124
Epoch:17, loss:6.224044904112816
Epoch:18, loss:6.1859928369522095
Epoch:19, loss:6.215324357151985
Epoch:20, loss:6.258609846234322
Epoch:21, loss:6.242331355810165
Epoch:22, loss:6.2004929929971695
Epoch:23, loss:6.252199470996857
Epoch:24, loss:6.135031193494797
Epoch:25, loss:6.298722118139267
Epoch:26, loss:6.189524248242378
Epoch:27, loss:6.182799771428108
Epoch:28, loss:6.258231684565544
Epoch:29, loss:6.14731852710247
Epoch:30, loss:6.151639103889465
Epoch:31, loss:6.2

Epoch:248, loss:6.222890853881836
Epoch:249, loss:6.197977274656296
Epoch:250, loss:6.158413603901863
Epoch:251, loss:6.302483975887299
Epoch:252, loss:6.233510404825211
Epoch:253, loss:6.180692076683044
Epoch:254, loss:6.168416514992714
Epoch:255, loss:6.23874306678772
Epoch:256, loss:6.269084200263023
Epoch:257, loss:6.227123245596886
Epoch:258, loss:6.19400680065155
Epoch:259, loss:6.174692541360855
Epoch:260, loss:6.105771094560623
Epoch:261, loss:6.2879489958286285
Epoch:262, loss:6.1129355281591415
Epoch:263, loss:6.247320860624313
Epoch:264, loss:6.201350763440132
Epoch:265, loss:6.24216715991497
Epoch:266, loss:6.20621033012867
Epoch:267, loss:6.17166505753994
Epoch:268, loss:6.237898766994476
Epoch:269, loss:6.2078996151685715
Epoch:270, loss:6.204673260450363
Epoch:271, loss:6.249122902750969
Epoch:272, loss:6.1955112516880035
Epoch:273, loss:6.185748353600502
Epoch:274, loss:6.282337874174118
Epoch:275, loss:6.1543926149606705
Epoch:276, loss:6.227121993899345
Epoch:277, los

Epoch:498, loss:6.216079398989677
Epoch:499, loss:6.289544895291328
Epoch:500, loss:6.227234393358231
Epoch:501, loss:6.230939507484436
Epoch:502, loss:6.2090741991996765
Epoch:503, loss:6.183747470378876
Epoch:504, loss:6.1790996342897415
Epoch:505, loss:6.217067375779152
Epoch:506, loss:6.190540045499802
Epoch:507, loss:6.103445902466774
Epoch:508, loss:6.285639509558678
Epoch:509, loss:6.160823673009872
Epoch:510, loss:6.237912356853485
Epoch:511, loss:6.209683895111084
Epoch:512, loss:6.204653695225716
Epoch:513, loss:6.253494217991829
Epoch:514, loss:6.1895791888237
Epoch:515, loss:6.209912985563278
Epoch:516, loss:6.2390627562999725
Epoch:517, loss:6.1432215720415115
Epoch:518, loss:6.185603320598602
Epoch:519, loss:6.174490720033646
Epoch:520, loss:6.262667596340179
Epoch:521, loss:6.189312368631363
Epoch:522, loss:6.261126533150673
Epoch:523, loss:6.260770425200462
Epoch:524, loss:6.2207818031311035
Epoch:525, loss:6.200330227613449
Epoch:526, loss:6.159565851092339
Epoch:527, 

Epoch:745, loss:6.230482116341591
Epoch:746, loss:6.202311769127846
Epoch:747, loss:6.260981097817421
Epoch:748, loss:6.173250138759613
Epoch:749, loss:6.190250188112259
Epoch:750, loss:6.286184579133987
Epoch:751, loss:6.235432997345924
Epoch:752, loss:6.229233041405678
Epoch:753, loss:6.228306457400322
Epoch:754, loss:6.17940878868103
Epoch:755, loss:6.243790954351425
Epoch:756, loss:6.219336435198784
Epoch:757, loss:6.196612164378166
Epoch:758, loss:6.2233913242816925
Epoch:759, loss:6.209858030080795
Epoch:760, loss:6.2134106159210205
Epoch:761, loss:6.205882206559181
Epoch:762, loss:6.2162038534879684
Epoch:763, loss:6.2632865607738495
Epoch:764, loss:6.192164182662964
Epoch:765, loss:6.166012793779373
Epoch:766, loss:6.252973422408104
Epoch:767, loss:6.26440754532814
Epoch:768, loss:6.133538469672203
Epoch:769, loss:6.194201320409775
Epoch:770, loss:6.2100353091955185
Epoch:771, loss:6.177235171198845
Epoch:772, loss:6.286980003118515
Epoch:773, loss:6.144144997000694
Epoch:774, 

Epoch:993, loss:6.1833851635456085
Epoch:994, loss:6.1690171509981155
Epoch:995, loss:6.162021264433861
Epoch:996, loss:6.18581622838974
Epoch:997, loss:6.249984860420227
Epoch:998, loss:6.174445375800133
Epoch:999, loss:6.178466618061066
Epoch:1000, loss:6.232809603214264


In [104]:
myDataset[0]

(tensor([ 2.2422,  0.6942, -0.1510,  0.3743,  0.8162, -0.5271,  1.7544, -0.7739]),
 tensor(0.))

In [105]:
model(myDataset[0][0])

tensor([0.5006], grad_fn=<SigmoidBackward0>)