In [1]:
from torch.utils.data import Dataset
import torch
x=torch.linspace(-1,1,10)
y=x**2
print(x)



tensor([-1.0000, -0.7778, -0.5556, -0.3333, -0.1111,  0.1111,  0.3333,  0.5556,
         0.7778,  1.0000])


In [6]:
#Custom dataset
class Simpledataset(Dataset):
    def __init__(self,x,y):
        super().__init__()
        self.x=x
        self.y=y
    def __getitem__(self,index):
        return self.x[index],self.y[index]
    def __len__(self):
        return len(self.x)
    
dataset=Simpledataset(x,y)

In [6]:
#pytorch自带数据集
#如果需要transform需要导入torchvision.transforms并且自定义transform函数
#import torchvision.transforms as transforms


from torchvision import datasets
train_dataset=datasets.MNIST(root=r'C:\Users\sx119\OneDrive\桌面\kaggle playground\Pytorch practice\MNIST_train',train=True,transform=None,download=True)

In [7]:
#查看已有数据集
#数据集类型
print("type(train_dataset):{}".format(type(train_dataset)))
#每一个数据的样式
#(<PIL.Image.Image image mode=L size=28x28 at 0x169D273BA30>, 5)
#tuple: (image,label)
print("train_dataset[0]:{}".format(train_dataset[0]))



type(train_dataset):<class 'torchvision.datasets.mnist.MNIST'>
train_dataset[0]:(<PIL.Image.Image image mode=L size=28x28 at 0x1CD044D4E50>, 5)


In [41]:
#此处用来展示如何自定义transform
import torchvision.transforms as transforms
transform=transforms.Compose([
    #随机裁剪
    #transforms.RandomResizedCrop(size=(224,224)),
    #裁剪
    #transforms.Resize(size=(224,224)),
    #随机水平翻转
    #transforms.RandomHorizontalFlip(),
    #变为张量
    transforms.ToTensor(),
    #归一化,前面是均值，后面是方差，每一个通道对应一个均值和方差
    #在这个数据库中，图片是灰度图片，故只有一个通道，所以均值和方差都是一个数
    transforms.Normalize((0.5),(0.5))
])
train_data_transform=datasets.MNIST(root=r'C:\Users\sx119\OneDrive\桌面\kaggle playground\Pytorch practice\MNIST_train',train=False,transform=transform,download=True)
#可以看到，tuple中的image已经变成了tensor，且已经归一化了，tuple中的label没有变化，label是第二个元素
print("train_data_transform[0]:{}".format(train_data_transform[0]))


train_data_transform[0]:(tensor([[[-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
         [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
         [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
         [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
          -1.0000, -1.0

In [36]:
#Dataloader
from torch.utils.data import DataLoader
#在使用loader之前，需要将dataset中非tensor的数据（非结构化的数据）转换为tensor
train_loader = DataLoader(
    dataset=train_data_transform,
    batch_size=64,
    shuffle=True,#不同的epoch之间顺序会被打乱
)
#查看每一个batch的数据类型
#每一个batch都是一个长度为2的list，第一个元素是图像转变的tensor，第二个元素是label
#torch.Size([64, 1, 28, 28])，64是batch_size，1是通道数，28是图像的长和宽
#Batch里的label已经变为了tensor
for batch in train_loader:
    images,labels=batch
    print("type(batch):{}".format(type(batch)))
    print("len(batch):{}".format(len(batch)))
    #第一张图片的tensor
    #print(images[0])
    print("images.shape:{}".format(images.shape))
    print("labels.shape:{}".format(labels.shape))
    break
#Batch本身是iterable的，可以使用python自带的enumerate函数来查看每一个batch的index,也可以用这种方法查看每一个batch的信息
for index,(image,label) in enumerate(train_loader):
    print("index:{}".format(index))
    print("image.shape:{}".format(image[index].shape))
    print("label.shape:{}".format(label[index].shape))
    break#只查看第一个batch的信息


type(batch):<class 'list'>
len(batch):2
images.shape:torch.Size([64, 1, 28, 28])
labels.shape:torch.Size([64])
index:0
image.shape:torch.Size([1, 28, 28])
label.shape:torch.Size([])


In [58]:
#Model
from torch import nn
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()#继承父类的构造函数
        self.flatten = nn.Flatten()#将tensor展平
        #定义网络结构，选用激活函数，定义卷积层，全连接层，池化层等
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )#inplace=True表示直接在原来的内存上进行操作，不会开辟新的内存
        
    def forward(self,x):
        #定义前向传播
        x=self.flatten(x)
        logits=self.linear_relu_stack(x)
        

        return logits

In [62]:
#调用model,实现一次前向传播，并计算loss
model=Model()
x=train_data_transform[0][0]
y_hat=model.forward(x)
loss=nn.MSELoss()(y_hat,label[0])
loss.item()

35.66476058959961

In [51]:
#optimizer:优化器。优化器的作用是根据loss来更新网络的参数。常用的优化器有SGD，Adam等

from torch import optim
optimizer=optim.SGD(params=model.parameters(),lr=0.01,momentum=0.9)

In [67]:
#training
#结构化的训练，格式非常固定。
#1.清空梯度
#2.前向传播
#3.计算loss
#4.反向传播
#5.更新参数
#6.打印loss
#7.重复1-6步骤

from tqdm import tqdm#进度条
from torch.nn.functional import one_hot
epochs=10
for epoch in range(epochs):
    with tqdm(train_loader,desc="EPOCH: {}".format(epoch)) as train_bar:
        for (image,label) in train_bar:
            #我们需要将label转换为one-hot编码，因为我们的网络的输出是one-hot编码。否则会报错：
            # The size of tensor a (14) must match the size of tensor b (64) at non-singleton dimension 3
            y = one_hot(label, num_classes=10).type(torch.float)
            optimizer.zero_grad()
            output=model(image)
            loss=nn.CrossEntropyLoss()(output,y)
            loss.backward()
            optimizer.step()#更新参数
    print ("loss:{}".format(loss.item()))#loss.item()是将loss转换为python中的数值类型

#储存weights
torch.save(model.state_dict(),r'C:\Users\sx119\OneDrive\桌面\kaggle playground\Pytorch practice\weights.pth')

EPOCH: 0: 100%|██████████| 157/157 [00:01<00:00, 140.61it/s]


loss:2.3214809894561768


EPOCH: 1: 100%|██████████| 157/157 [00:01<00:00, 141.64it/s]


loss:2.2823400497436523


EPOCH: 2: 100%|██████████| 157/157 [00:01<00:00, 141.09it/s]


loss:2.2510695457458496


EPOCH: 3: 100%|██████████| 157/157 [00:01<00:00, 141.85it/s]


loss:2.258948802947998


EPOCH: 4: 100%|██████████| 157/157 [00:01<00:00, 147.50it/s]


loss:2.28664493560791


EPOCH: 5: 100%|██████████| 157/157 [00:01<00:00, 153.47it/s]


loss:2.2572824954986572


EPOCH: 6: 100%|██████████| 157/157 [00:01<00:00, 152.35it/s]


loss:2.3073363304138184


EPOCH: 7: 100%|██████████| 157/157 [00:01<00:00, 154.65it/s]


loss:2.3282628059387207


EPOCH: 8: 100%|██████████| 157/157 [00:01<00:00, 154.02it/s]


loss:2.3134541511535645


EPOCH: 9: 100%|██████████| 157/157 [00:01<00:00, 154.21it/s]

loss:2.2898576259613037



