In [2]:
# import requests

# url = "Your_URL_here"
# response = requests.get(url)
# with open("file_name.extension", "wb") as file:
#     file.write(response.content)

In [3]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor,Lambda,Compose
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import numpy as np

## 数据读取与预处理

In [4]:
!unzip '/content/drive/MyDrive/深度学习100例/Pytorch版/01cat-dog.zip'

Archive:  /content/drive/MyDrive/深度学习100例/Pytorch版/01cat-dog.zip
   creating: 1-cat-dog/
   creating: 1-cat-dog/train/
   creating: 1-cat-dog/train/cat/
  inflating: 1-cat-dog/train/cat/flickr_cat_000002.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000004.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000005.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000006.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000007.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000009.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000010.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000013.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000014.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000015.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000017.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000018.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000019.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_000020.jpg  
  inflating: 1-cat-dog/train/cat/flickr_cat_00002

In [5]:
ls

[0m[01;34m1-cat-dog[0m/  [01;34mdrive[0m/  [01;34msample_data[0m/


In [8]:
train_datadir='/content/1-cat-dog'
test_datadir='/content/1-cat-dog'

train_transforms=transforms.Compose([
    transforms.Resize([224,224]),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485,0.456,0.406],
        std=[0.229,0.224,0.225]
    )
])

test_transforms=transforms.Compose([
    transforms.Resize([224,224]),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485,0.456,0.406],
        std=[0.229,0.224,0.225]
    )
])

train_data=datasets.ImageFolder(train_datadir,transform=train_transforms)
test_data=datasets.ImageFolder(test_datadir,transform=test_transforms)

train_loader=torch.utils.data.DataLoader(train_data,batch_size=4,shuffle=True,num_workers=1)
test_loader=torch.utils.data.DataLoader(test_data,batch_size=4,shuffle=True,num_workers=1)

总结：

步骤：

**处理数据-读取数据-包装数据**

1.  transforms.compose
2.  datasets.ImageFolder
3.  torch.utils.data.DataLoader

🤔问题：

**transforms.ToTensor()的作用？**

归一化，
torchvision.transforms.ToTensor，其作用是将数据归一化到[0,1]（是将数据除以255），transforms.ToTensor（）会把HWC会变成C *H *W（拓展：格式为(h,w,c)，像素顺序为RGB）

**transforms.Normalize()中的mean和std是如何确定的？**

[0.485, 0.456, 0.406]这一组平均值是从imagenet训练集中抽样算出来的。

**那么归一化后为什么还要接一个标准化呢?**

Normalize()是对数据按通道进行标准化，即减去均值，再除以方差

别人的解答：数据如果分布在(0,1)之间，可能实际的bias，就是神经网络的输入b会比较大，而模型初始化时b=0的，这样会导致神经网络收敛比较慢，经过Normalize后，可以加快模型的收敛速度。
因为对RGB图片而言，数据范围是[0-255]的，需要先经过ToTensor除以255归一化到[0,1]之后，再通过Normalize计算过后，将数据归一化到[-1,1]。
是否可以这样理解：
[0，1]只是范围改变了，并没有改变分布，mean和std处理后可以让数据正态分布😂
此题参考链接：https://cloud.tencent.com/developer/article/2102114

In [10]:
for X,y in test_loader:
  print("shape of X [N,C,H,W]:",X.shape)
  print("shape of y:",y.shape,y.dtype)
  break

shape of X [N,C,H,W]: torch.Size([4, 3, 224, 224])
shape of y: torch.Size([4]) torch.int64


数据经过DataLoader长啥样？DataLoader的作用？

DataLoader相当于给数据打包，将数据包装成一捆一捆（batch-size）的

## 定义模型

![剖析LeNet-5 网络结构_lenet5卷积神经网络-CSDN博客](https://img-blog.csdnimg.cn/img_convert/a82904732a0f8c3b0c2a2d248fb93b37.png)

![LeNet structure](https://www.datasciencecentral.com/wp-content/uploads/2021/10/1gNzz6vvWmF6tDN6pTRTd9g.jpeg)

In [21]:
import torch.nn.functional as F

#GPU
device="cuda" if torch.cuda.is_available() else "cpu"
print("using {} device".format(device))

#定义模型
class LeNet(nn.Module):
  #定义网络结构所需要的具体操作算子
  def __init__(self):
    super(LeNet,self).__init__()
    # Conv2d的第一个参数是输入的channel数量，第二个是输出的channel数量，第三个是kernel size
    self.conv1=nn.Conv2d(3,6,5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2=nn.Conv2d(6,16,5)
    self.fc1=nn.Linear(16*53*53,120)
    self.fc2=nn.Linear(120,84)
    self.fc3=nn.Linear(84,2)
  #网络结构
  def forward(self,x):
    x=F.relu(self.conv1(x))
    x=self.pool(x)
    x=F.relu(self.conv2(x))
    x=self.pool(x)
    x=x.view(-1,16*53*53) # 这步把二维特征图变为一维，这样全连接层才能处理
    x=F.relu(self.fc1(x))
    x=F.relu(self.fc2(x))
    x=self.fc3(x)
    return x

model=LeNet().to(device)
print(model)

using cpu device
LeNet(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=44944, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=2, bias=True)
)


为啥fc1是16*53*53？
16是通道数
卷积公式：
output_size=(input_size+2*padding-kernel_size)/stride +1


* conv1:(224+0-5)/1+1=220
* pool1:(220=0-5)/2+1=110
* conv2:(110+0-5)/1+1=106
* pool2:(106=0-5)/2+1=53

## 定义损失函数和优化器

In [13]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(),lr=1e-3)

## 定义训练函数

In [17]:
def train(dataloader,model,loss_fn,optimizer):
  size=len(dataloader.dataset)
  model.train()
  for batch,(X,y) in enumerate(dataloader):
    X,y=X.to(device),y.to(device)

    #计算预测误差
    pred=model(X)
    loss=loss_fn(pred,y)#parameter:input,target

    #反向传播
    optimizer.zero_grad()# 清空参数梯度
    loss.backward()   # 计算参数的梯度
    optimizer.step()  #  更新参数梯度

    if batch %100 ==0:
      loss,current=loss.item(),batch*len(X)
      print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

**训练函数需要哪些东西？**

1. 数据:dataloader
2. 模型:model
3. 损失函数:loss_fn
4. 优化器:optimizer

**数据的标签y是如何确定的？**

在之前用torch.util.data.DataLoader包装数据时就已确定了y


**训练步骤？**

计算损失-通过反向传播优化/更新参数

    optimizer.zero_grad()# 清空参数梯度
    loss.backward()   # 计算参数的梯度
    optimizer.step()  #  更新参数梯度



##定义测试函数

In [1]:
def test(dataloader,model,loss_fn):
  size=len(dataloader.dataset) #数据集中样本的总数
  num_batches=len(dataloader) #迭代器的批量大小
  model.eval()
  test_loss,correct=0,0
  with torch.no_grad():
    for X,y in dataloader:
      X,y=X.to(device),y.to(device)
      pred=model(X)
      test_loss+=loss_fn(pred,y).item()
      correct+=(pred.argmax(1)==y).type(torch.float).sum().item()#正确的个数
    test_loss/=num_batches #这一批量大小的损失
    correct/=size #整个数据集的平均正确率
    print(f"Test Error:\n Accuracy:{(100*correct):>0.1f}%, Avg loss:{test_loss:>8f}\n")

**测试函数需要哪些东西？**

1. 数据:dataloader
2. 模型:model
3. 损失函数:loss_fn

没有优化器，因为它不需要优化

**pred.argmax(1)==y是什么意思？**

argmax()
（1）针对一维数组：返回数组中最大值元素的索引位置，
这里指的是预测的种类与真实值种类数值一样，也就是预测值=真实值

**核心？**

计算损失和准确率


## 训练

In [23]:
epochs=20
for epoch in range(epochs):
  print(f"Epoch {epoch+1}\n----------------------------")
  train(train_loader,model,loss_fn,optimizer)
  test(test_loader,model,loss_fn)
print("Done!")

Epoch 1
----------------------------
loss: 0.658465  [    0/  600]
loss: 0.605311  [  400/  600]
Test Error:
 Accuracy:80.0%, Avg loss:0.644979

Epoch 2
----------------------------
loss: 0.652341  [    0/  600]
loss: 0.651562  [  400/  600]
Test Error:
 Accuracy:80.0%, Avg loss:0.644979

Epoch 3
----------------------------
loss: 0.616466  [    0/  600]
loss: 0.655541  [  400/  600]
Test Error:
 Accuracy:80.0%, Avg loss:0.644979

Epoch 4
----------------------------
loss: 0.651191  [    0/  600]
loss: 0.609980  [  400/  600]
Test Error:
 Accuracy:80.0%, Avg loss:0.644979

Epoch 5
----------------------------
loss: 0.618232  [    0/  600]
loss: 0.699593  [  400/  600]
Test Error:
 Accuracy:80.0%, Avg loss:0.644979

Epoch 6
----------------------------
loss: 0.606158  [    0/  600]
loss: 0.656435  [  400/  600]
Test Error:
 Accuracy:80.0%, Avg loss:0.644979

Epoch 7
----------------------------
loss: 0.607190  [    0/  600]
loss: 0.702996  [  400/  600]
Test Error:
 Accuracy:80.0%, Avg 