## CHAPTER 1 GET YOUR TORCH READY

In [50]:
import torch
from sklearn.externals.array_api_compat.torch import equal
from torch import dtype
from torch.onnx.symbolic_opset11 import chunk

# 1.torch installation test
print(torch.__version__) #输出版本
print(torch.cuda.is_available()) #输出是否使用cuda
print(torch.version.cuda) #输出CUDA版本
print(torch.cuda.get_device_name()) #输出torch使用的GPU设备

# deep learning with Pytorch 建议使用1070以上 8G显存的卡

2.8.0+cu129
True
12.9
NVIDIA GeForce RTX 4070 Laptop GPU


## CHAPTER 2 RUN SOME PRETRAINED MODELS

In [51]:
import torch
from torchvision import models
#models built in
dir(models)

['AlexNet',
 'AlexNet_Weights',
 'ConvNeXt',
 'ConvNeXt_Base_Weights',
 'ConvNeXt_Large_Weights',
 'ConvNeXt_Small_Weights',
 'ConvNeXt_Tiny_Weights',
 'DenseNet',
 'DenseNet121_Weights',
 'DenseNet161_Weights',
 'DenseNet169_Weights',
 'DenseNet201_Weights',
 'EfficientNet',
 'EfficientNet_B0_Weights',
 'EfficientNet_B1_Weights',
 'EfficientNet_B2_Weights',
 'EfficientNet_B3_Weights',
 'EfficientNet_B4_Weights',
 'EfficientNet_B5_Weights',
 'EfficientNet_B6_Weights',
 'EfficientNet_B7_Weights',
 'EfficientNet_V2_L_Weights',
 'EfficientNet_V2_M_Weights',
 'EfficientNet_V2_S_Weights',
 'GoogLeNet',
 'GoogLeNetOutputs',
 'GoogLeNet_Weights',
 'Inception3',
 'InceptionOutputs',
 'Inception_V3_Weights',
 'MNASNet',
 'MNASNet0_5_Weights',
 'MNASNet0_75_Weights',
 'MNASNet1_0_Weights',
 'MNASNet1_3_Weights',
 'MaxVit',
 'MaxVit_T_Weights',
 'MobileNetV2',
 'MobileNetV3',
 'MobileNet_V2_Weights',
 'MobileNet_V3_Large_Weights',
 'MobileNet_V3_Small_Weights',
 'RegNet',
 'RegNet_X_16GF_Weights'

In [52]:
############### use resnet to recognize pics
resnet=models.resnet101(pretrained=True)
print(resnet)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [53]:
# use transform to make your data input into usable form for the model
from torchvision import transforms
from PIL import Image

preprocess=transforms.Compose([
    transforms.Resize(256), #对图片缩放 256*256
    transforms.CenterCrop(224), #裁切取用中心 224*224
    transforms.ToTensor(), #图片转为张量 RGB
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) #RGB网络的初始张量均值和标准差
])

test_image=Image.open("./images/border_collie.jpg")
test_image.show()
input=preprocess(test_image)
print(input.shape)

batch_t = torch.unsqueeze(input, 0)

resnet.eval() ##启用evaluate 模式

out = resnet(batch_t)
#print(out) #good! you got a possibility result for each class as a tensor

with open('text_files/imagenet_classes.txt') as f:
    labels = [line.strip() for line in f.readlines()]

print(torch.max(out,1))
ori_value, index = torch.max(out, 1) #取用结果中可能性最大的一个结果
#print(ori_value)
#print(index)  #这两个东西都是一个tensor(像list一样，取值请加[0])
percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100  #将输出层做softmax变换可得到可能性阵
print(labels[index[0]], percentage[index[0]].item())  #最佳预测

_, indices = torch.sort(out, descending=True)
print([(labels[idx], percentage[idx].item()) for idx in indices[0][:5]]) #前五名最佳预测

torch.Size([3, 224, 224])
torch.return_types.max(
values=tensor([17.8954], grad_fn=<MaxBackward0>),
indices=tensor([232]))
Border collie 75.03390502929688
[('Border collie', 75.03390502929688), ('collie', 24.824813842773438), ('kelpie', 0.04702140390872955), ('Shetland sheepdog, Shetland sheep dog, Shetland', 0.025132307782769203), ('Cardigan, Cardigan Welsh corgi', 0.020679719746112823)]


## CHAPTER3 TENSORS

In [54]:
#############
#create tensor from ones or zeros
a=torch.ones(3)
print(a)
print(a[2])
print(float(a[2]))
a[2]=3.0
print(a)

b=torch.zeros(3,2)
print(b)
#there are two ways to call the index of a tensor
b[0][1]=1.0
b[1,1]=3.0
print(b)


tensor([1., 1., 1.])
tensor(1.)
1.0
tensor([1., 1., 3.])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0., 1.],
        [0., 3.],
        [0., 0.]])


In [55]:
#the way to call the index is  just like using a Python list
print(b[2,1])
print(b[2][1])
print(b[1:,:])
print(b[:2,:1])
print(b[None]) ##add a dimension of size 1

tensor(0.)
tensor(0.)
tensor([[0., 3.],
        [0., 0.]])
tensor([[0.],
        [0.]])
tensor([[[0., 1.],
         [0., 3.],
         [0., 0.]]])


In [56]:
#create tensor from list-like objects
locs=torch.tensor([[1.0,2.0],[3.0,4.0],[5.0,1.0],[2.0,3.0]])
print(locs)
print(locs.shape)

tensor([[1., 2.],
        [3., 4.],
        [5., 1.],
        [2., 3.]])
torch.Size([4, 2])


In [57]:
#tensor broadcast
img_t = torch.randn(3, 5, 5) # shape [channels, rows, columns]
batched_img_t = torch.randn(2, 3, 5, 5) # shape [batch, channels, rows, columns]
img_gray_naive = img_t.mean(-3)
batch_gray_naive = batched_img_t.mean(-3)
print(img_gray_naive.shape, batch_gray_naive.shape)

weights = torch.tensor([0.2126, 0.7152, 0.0722])
unsqueezed_weights = weights.unsqueeze(-1).unsqueeze_(-1) #expand the ori-weight tensor into a 3,1,1 tensor
print(unsqueezed_weights.shape)
print(unsqueezed_weights)
"""
广播的核心规则
广播遵循从 最后一维向前比较 的规则，逐维度检查是否满足以下条件之一：
1.维度大小相等，或
2.其中一个维度大小为 1（可扩展），或
3.其中一个张量在该维度不存在（默认为1）

复制时从左向右，按照该tensor(此处可以理解为list)index下的格式，复制后扩增维目标维度
"""
img_weights = (img_t * unsqueezed_weights)   #注意！这里在做乘法的时候激活了广播机制，相当于原本的unsqueezed_weight被扩展成了3*5*5,而而这三个5*5矩阵分别是原始weight*I的矩阵
batched_img_weight = (batched_img_t * unsqueezed_weights) #这里与上面类似，被广播成了2，3,5,5
img_gray_weighted = img_weights.sum(-3)
batch_gray_weighted = batched_img_weight.sum(-3)

print(img_weights.shape, batched_img_weight.shape)
print(img_gray_weighted.shape, batch_gray_weighted.shape)

torch.Size([5, 5]) torch.Size([2, 5, 5])
torch.Size([3, 1, 1])
tensor([[[0.2126]],

        [[0.7152]],

        [[0.0722]]])
torch.Size([3, 5, 5]) torch.Size([2, 3, 5, 5])
torch.Size([5, 5]) torch.Size([2, 5, 5])


In [58]:
#give your tensor index a name?
weights_named = torch.tensor([0.2126, 0.7152, 0.0722], names=['channels'])
print(weights_named)
img_named =  img_t.refine_names(..., 'channels', 'rows', 'columns') #对dimension 进行重命名
batch_named = batched_img_t.refine_names(..., 'channels', 'rows', 'columns') #这里的... 相当于list中的 :
print("img named:", img_named.shape, img_named.names)
print("batch named:", batch_named.shape, batch_named.names)

tensor([0.2126, 0.7152, 0.0722], names=('channels',))
img named: torch.Size([3, 5, 5]) ('channels', 'rows', 'columns')
batch named: torch.Size([2, 3, 5, 5]) (None, 'channels', 'rows', 'columns')


In [59]:
#对 拥有维度命名的 Tensor 进行自动维度扩增 将 align as b
# 该过程也被称为广播机制
'''Requirements:
All dimensions of the input tensor (self) must be named.
All dimension names of self must exist in other.names. However, other can have additional dimension names not present in self, for which size-one dimensions are added in the output
'''
weights_aligned = weights_named.align_as(img_named)
weights_aligned.shape, weights_aligned.names

gray_named = (img_named * weights_aligned).sum('channels')
gray_named.shape, gray_named.names


(torch.Size([5, 5]), ('rows', 'columns'))

In [60]:
print(img_named[..., :3])
print(weights_named)
##当使用 基于维度名称的 方法调用时，两个tensor调用维度右边的维度结构和对应名称需要一致
gray_named = (img_named[..., :3] * weights_named).sum('channels')

tensor([[[ 0.0910,  0.8224, -0.1598],
         [-0.9540,  0.4426,  0.2627],
         [ 0.2740, -0.2844,  0.6031],
         [-0.1608,  1.5730, -0.4043],
         [ 0.1560, -0.5832,  0.0192]],

        [[-2.0772,  1.2196,  2.5565],
         [ 1.3061, -0.9751, -0.3076],
         [ 0.2361, -0.5522, -0.6863],
         [-0.7045,  0.5349,  1.9416],
         [ 0.0611,  0.2697,  1.1704]],

        [[-0.6427, -0.1154, -0.2301],
         [ 0.1354,  1.7750, -0.1301],
         [ 0.9476,  0.3809, -0.5073],
         [-0.2495, -1.5202, -0.3195],
         [-0.9610, -0.6555, -1.8018]]], names=('channels', 'rows', 'columns'))
tensor([0.2126, 0.7152, 0.0722], names=('channels',))


RuntimeError: Error when attempting to broadcast dims ['channels', 'rows', 'columns'] and dims ['channels']: dim 'columns' and dim 'channels' are at the same position from the right but do not match.

In [18]:
# 由于这玩意到底是个实验功能，最后还是让我们回到 未命名的tensor域
gray_plain = gray_named.rename(None) #使用 remane(None)来消除原始命名
gray_plain.shape, gray_plain.names

(torch.Size([5, 3]), (None, None))

In [22]:
# customize your dtype in the tensor
double_points = torch.ones(10, 2, dtype=torch.double)
short_points = torch.tensor([[1, 2], [3, 4]], dtype=torch.short)
print(double_points.dtype)
print(short_points.dtype)
double_points = torch.ones(10,2).double()
short_points=torch.tensor([[1,3],[2,4]]).short()
print(double_points.dtype)
print(short_points.dtype)
double_points=torch.ones(10,2).to(torch.double)
short_points=torch.tensor([[1,3],[2,4]]).to(torch.short)
print(double_points.dtype)
print(short_points.dtype)


torch.float64
torch.int16
torch.float64
torch.int16
torch.float64
torch.int16


In [None]:
# 张量转置/切换维度次序

x = torch.randn(2, 3, 4, 5)  # shaz, 3, 4, 5]
y = x.transpose(1, 2)   # 交换 dim1 和 dim2  #transpose适合两个维度进行交换
print(y.shape)  # 输出: [2, 4, 3, 5]

x = torch.randn(2, 3) #偷懒模式下可以使用 t()来进行转置， 但这一函数只能用在维度小于等于2的情况
y = x.t()
print(y.shape)

x = torch.randn(2, 3, 4, 5)  # shape: [2, 3, 4, 5]
y = x.permute(0, 2, 1, 3)    # 调整维度顺序为 [0, 2, 1, 3] permute适合同时多个维度调整
print(y.shape)  # 输出: [2, 4, 3, 5]

x = torch.randn(2, 3, 4, 5)  # shape: [2, 3, 4, 5]
y = x.mT                     # 转置最后两个维度，这个操作的逻辑多见于图像处理
print(y.shape)  # 输出: [2, 3, 5, 4]


In [None]:
#张量基本计算操作

x=torch.randn(2,2,2)
print(x)
#以下操作都是可以选定要执行操作的具体dim的
#1.point wise(element wise)
print("-----point wise----")
y=x.abs()
print(y)
y=x.cos()
print(y)

#2.reduction ops
print("----reduction----")
y=x.mean()
print(y)
y=x.std()
print(y)
y=x.norm()
print(y)

#3.comparison ops
print("----comparison----")
z=torch.randn(2,2,2)
print(x.max())
print(x.equal(z))



In [None]:
## contiguous tensor
## 当tensor的存储(storge)格式恰好是其要表示的tensor从左到右的元素排列（2D下逐行从左到右写在storge中)时 该Tensor是contiguous的
## torch的很多功能仅限于contiguous tensor

a=torch.randn(2,2,2)
print(a.is_contiguous())
print(a.storage())
print(a.stride())
#转置不会更改storge中等的元素存储 但会改变stride信息
b=a.transpose(1,2)
print(b.is_contiguous())
print(b.storage())
print(b.stride())


In [61]:
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
## hdf5 locking files turned down

In [None]:
## serialize a tensor
torch.save(a,"temp_storge/temp_tensor.t")
points = torch.load('temp_storge/temp_tensor.t')
print(points)

## serialize a tensor with h5
import h5py
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'

with h5py.File("temp_storge/ourpoints.hdf5", 'w') as f:
    dset = f.create_dataset('test_point', data=points.numpy())
    print("Dataset created successfully.")

f2 = h5py.File('temp_storge/ourpoints.hdf5', 'r')
dset_load = f2['test_point']
print(dset_load)
f2.close()


In [73]:
## exercise after book I
list_a=[i for i in range(9)]
print(list_a)
init_tesnor=torch.tensor(list_a,device=torch.device('cuda'))
print(init_tesnor.dtype)
print(init_tesnor.shape)
print(init_tesnor.stride())

b=init_tesnor.view(3,3)
print(init_tesnor.storage())
print(b.storage())
print(b)

c=b[1:,1:]
print(c)

[0, 1, 2, 3, 4, 5, 6, 7, 8]
torch.int64
torch.Size([9])
(1,)
 0
 1
 2
 3
 4
 5
 6
 7
 8
[torch.storage.TypedStorage(dtype=torch.int64, device=cuda:0) of size 9]
 0
 1
 2
 3
 4
 5
 6
 7
 8
[torch.storage.TypedStorage(dtype=torch.int64, device=cuda:0) of size 9]
tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]], device='cuda:0')
tensor([[4, 5],
        [7, 8]], device='cuda:0')


In [80]:
## exercise after book II
import math
b=init_tesnor.square()
c=init_tesnor**(1/2)
d=torch.sqrt(init_tesnor)
# e=math.sqrt(init_tesnor) #You can't use scala method on tensors more than 1 dim
print(d)

ValueError: only one element tensors can be converted to Python scalars