In [100]:
import torch
import torchvision
import numpy as np

# 交叉熵损失

## softmax
+ 使得最后每一行之和为1 ，即将之前无意义的输出转换为一个概率

In [3]:
def softmax(x):
    exp_x = torch.exp(x)
    return exp_x / exp_x.sum(dim=1, keepdim=True)   

In [9]:
test_x = torch.tensor([[0.5,0.8,1],
                      [1.2,1.5,1.6]])
x_softmax = softmax(test_x)
x_softmax

tensor([[0.2501, 0.3376, 0.4123],
        [0.2603, 0.3514, 0.3883]])

In [14]:
softmax(x_softmax)

tensor([[0.3060, 0.3340, 0.3599],
        [0.3094, 0.3389, 0.3517]])

In [32]:
# 注，重复多次的取softmax 依然会变化
softmax(softmax(x_softmax))

tensor([[0.3243, 0.3335, 0.3422],
        [0.3254, 0.3351, 0.3394]])

## 交叉熵
+ 注意交叉熵最后取的是所有样本交叉熵的均值

In [59]:
# 法1
def cross_loss_1(y_hat,y):
    # loss 列表 用来存放 每一个样本的损失值
    loss=[]
    # len(y) 个样本
    for i in range(len(y)):
        loss.append ( - torch.log(y_hat[i][y[i]]).item() )    # 注意取 item() 和 不取item() 的区别
    return loss

In [69]:
# 法2
def cross_loss_2(y_hat, y):
    return -torch.log(y_hat.gather(1,y.view(-1,1)))  # gather 收集输入的特定维度指定位置的数值

In [70]:
y_hat = torch.tensor([[0.5,0.8,1],
                      [1.2,1.5,1.6]])
y = torch.tensor([2,1])
y_hat = softmax(y_hat)
y_hat

tensor([[0.2501, 0.3376, 0.4123],
        [0.2603, 0.3514, 0.3883]])

In [71]:
# 观察取 交叉损失的过程 
# 交叉损失 就是对  样本真实类别的 对应的概率值 取 log
# 在y_hat 第一行取了第2个元素
# 在 y_hat第二行取了第1个元素
# 2和1 分别为样本的真实值 ,y=[2，1]
print('对全部概率取-log：')
print(torch.log(y_hat))
print('\n')
print('对真实类别对应概率取-log：')
print('法1',cross_loss_1(y_hat,y))
print('法2',cross_loss_2(y_hat,y))

对全部概率取-log：
tensor([[-1.3859, -1.0859, -0.8859],
        [-1.3459, -1.0459, -0.9459]])


对真实类别对应概率取-log：
法1 [0.8859393000602722, 1.0459107160568237]
法2 tensor([[0.8859],
        [1.0459]])


In [98]:
# 测试 
# 假设4个样本 3个训练出来的 结果为
o_hat = torch.tensor([[1.5, 1.9, 2.1],
                     [5.6,6.1,4.9],
                     [3.5,3.9,1.2],
                     [9.8,9.0,7.8]])
print('o_hat:',o_hat,'\n')
# softmax 
y_hat = softmax(o_hat)
print('y_hat:',y_hat,'\n')

#样本真实值
y = [1,0,1,2]
y_target = torch.tensor(y)
print('y:',y,'\n')

# 对所有概率值 取负对数
print('log_all:',-torch.log(y_hat),'\n')

# 交叉损失值
loss = cross_loss_1(y_hat,y)
print('loss:',loss,'\n')
# 对交叉熵取均值
print('loss_mean',torch.tensor(loss).mean())

# 直接用 torch.nn.CrossEntropyLoss函数
crossentropyloss=torch.nn.CrossEntropyLoss()
crossentropyloss_output=crossentropyloss(o_hat,torch.tensor(y))
print('crossentropyloss_output:',crossentropyloss_output)



o_hat: tensor([[1.5000, 1.9000, 2.1000],
        [5.6000, 6.1000, 4.9000],
        [3.5000, 3.9000, 1.2000],
        [9.8000, 9.0000, 7.8000]]) 

y_hat: tensor([[0.2318, 0.3458, 0.4224],
        [0.3179, 0.5242, 0.1579],
        [0.3858, 0.5755, 0.0387],
        [0.6310, 0.2835, 0.0854]]) 

y: [1, 0, 1, 2] 

log_all: tensor([[1.4619, 1.0619, 0.8619],
        [1.1459, 0.6459, 1.8459],
        [0.9525, 0.5525, 3.2525],
        [0.4604, 1.2604, 2.4604]]) 

loss: [1.0618523359298706, 1.1459113359451294, 0.5524619817733765, 2.4603724479675293] 

loss_mean tensor(1.3051)
crossentropyloss_output: tensor(1.3051)


# Fashion_MNIST数据集

In [102]:
# 载入数据集
# torchvision.datasets 里面有很多数据集合 
def load_data_fashion_MNIST(batch_size):
    # torchvision.datasets 里面有很多数据集合 ,从这里加载 到本地临时文件夹
    mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMnist', train=True, download=True,transform=torchvision.transforms.ToTensor()) # train=True 是训练集
    mnist_test =  torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMnist',train=False, download=True,transform=torchvision.transforms.ToTensor()) #train=False 是测试集
    # 将加载的数据集转化为可迭代批量数据集
    # 用到 torch.utils.data 里面的函数
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=0)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=True, num_workers=0)
    return train_iter,test_iter

In [104]:
## 观察数据集合
batch_size=256
train_iter,test_iter = load_data_fashion_MNIST(batch_size)

for X,y in train_iter:
    print(X.shape,y.shape)
    break
# 可以看到 每次迭代出来的一批数据大小为batch_size
# for 循环一直执行下去，会遍历完所有的样本

torch.Size([256, 1, 28, 28]) torch.Size([256])
