In [61]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [62]:
#下载训练集
train_dataset = datasets.MNIST(root = './',
                              train = True,
                              transform = transforms.ToTensor(),
                              download=True)

#下载测试集
test_dataset = datasets.MNIST(root = './',
                              train = False,
                              transform = transforms.ToTensor(),
                              download=True)

In [63]:
#批次大小
batch_size = 64

#装载训练集
train_loader = DataLoader(dataset = train_dataset,
                         batch_size = batch_size,
                         shuffle=True)

#装载测试集
test_loader = DataLoader(dataset = test_dataset,
                         batch_size = batch_size,
                         shuffle=True)

In [89]:
#input_size输入特征的大小 
#hidden_size,LSTM模块的数量
# num_layer 表示隐藏层的层数
#LSTM默认input(seq_len, batch, feature)
# batch_first = Ture， 数据的格式变成input和output(batch, seq_len, feature)
class LSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = torch.nn.LSTM(
        input_size = 28,
        hidden_size = 64,
        num_layers = 5,
        batch_first = True)
        self.out = torch.nn.Linear(in_features=64, out_features=10)
        self.softmax = torch.nn.Softmax(dim = 1)
    def forward(self, x):
        #（batch, seq_len, feature）
        #output:[batch,seq_len,hidden_size]
        #虽然LSTM的batch_first设置为True,但是h_n,c_n的第一个维度还是num_layers
        #h_n:[num_layers, batch, hidden_size]只包含最后一个输出序列的结果
        #c_n:[num_layers, batch, hidden_size]只包含最后一个输出序列的结果
        x = x.view(-1, 28, 28)
        output,(h_n, c_n) = self.lstm(x)
        output_in_last_timestep = h_n[-1,:,:]
        x = self.out(output_in_last_timestep)
        x = self.softmax(x)
        
        return x

In [90]:
LR = 0.0003
model = LSTM()
entroy_loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), LR)

In [91]:
def train():
    model.train()
    for i, data in enumerate(train_loader):
        inputs, labels = data
        out = model(inputs)
        loss = entroy_loss(out, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def test():
    model.eval()
    correct = 0
    for i, data in enumerate(test_loader):
        inputs, labels = data
        out = model(inputs)
        _, predicted = torch.max(out, 1)
        correct += (predicted == labels).sum()
    print("Train acc:{0}".format(correct.item()/len(test_dataset)))

In [92]:
for epoch in range(0, 10):
    print('epoch:', epoch)
    train()
    test()

epoch: 0
Train acc:0.7504
epoch: 1
Train acc:0.9023
epoch: 2
Train acc:0.9338
epoch: 3
Train acc:0.9495
epoch: 4
Train acc:0.9552
epoch: 5
Train acc:0.9599
epoch: 6
Train acc:0.962
epoch: 7
Train acc:0.9679
epoch: 8
Train acc:0.964
epoch: 9
Train acc:0.9737


In [88]:
# 创建一个二维张量
x = torch.tensor([[1, 3, 2], [4, 100, 6], [7, 8, 9]])

# 在第一个维度（行）上查找最大值，并保留形状
max_value_row, max_index_row = torch.max(x, 0, keepdim=True)
print("Max values per row (keeping shape):", max_value_row)
print("Max indices per row (keeping shape):", max_index_row)

# 在第二个维度（列）上查找最大值，并保留形状
max_value_col, max_index_col = torch.max(x, 1, keepdim=True)
print("Max values per column (keeping shape):", max_value_col)
print("Max indices per column (keeping shape):", max_index_col)

Max values per row (keeping shape): tensor([[  7, 100,   9]])
Max indices per row (keeping shape): tensor([[2, 1, 2]])
Max values per column (keeping shape): tensor([[  3],
        [100],
        [  9]])
Max indices per column (keeping shape): tensor([[1],
        [1],
        [2]])


In [68]:
# 创建一个二维张量
x = torch.tensor([[1, 3, 2], [4, 5, 100], [7, 8, 9]])

# 查找并打印最大值
max_value, max_index_row = torch.max(x, dim = 0)
print("Max value:", max_value)

Max value: tensor([  7,   8, 100])
