# TensorBoard 的使用

In [1]:
#!pip install tensorboard

In [2]:
# 验证版本号
!tensorboard --version

2.20.0


TensorFlow installation not found - running with reduced feature set.


In [3]:
import tensorboard
print("TensorBoard 版本:", tensorboard.__version__)

TensorBoard 版本: 2.20.0


In [4]:
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import numpy as np

# 使用
# 创建SunmmeryWriter实例（自动生成时间戳的目录）
writer = SummaryWriter('runs/experment_1')


# 创建模型
class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(10,5)

    def forward(self, x):
        return self.fc(x)

In [5]:
#记录数据
model = SimpleModel()
for step in range(100):
    writer.add_scalar('Loss/train', np.sin(step*0.1), step)
    writer.add_scalar('Accuracy/train', np.cos(step*0.1), step)

    # 记录直方图
    writer.add_histogram('Weight/fc', model.fc.weight, step)

    # 记录图像（例子）
    # 记录图像 (示例)
    if step % 10 == 0:
        dummy_img = torch.rand(3, 64, 64)  # 随机图像
        writer.add_image('Sample_image', dummy_img, step, dataformats='CHW')

    # 记录模型结构
    if step == 0:
        dummy_input = torch.rand(1, 10)
        writer.add_graph(model, dummy_input)

# 确保数据写入磁盘
writer.flush()
writer.close()  # 结束时关闭
print("TensorBoard日志生成完成!")

TensorBoard日志生成完成!


In [7]:
# 加载 TensorBoard 扩展
%reload_ext tensorboard

# 启动tensorboard
%tensorboard --logdir runs --port 6007

# 一个完整实例（MLP）

In [7]:
# help(torch.optim.Adam)

In [8]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

# 检测设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device :{device}')

# 准备数据集
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5), (0.5))]
)

train_set = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=False)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)

# 创建模型
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
).to(device)

# 初始化TensorBoard
writer = SummaryWriter('runs/MNIST_experiment')

# 记录数据模型结构
image, _ = next(iter(train_loader))
writer.add_graph(model, image.to(device))

# 训练样本
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
total_step = 0

for epoch in range(5):
    for i, (images, labels) in enumerate(train_loader):
        # 移动到GPU
        images = images.to(device)
        labels = labels.to(device)
        
        # 前向传播
        outputs = model(images)
        loss = criterion(outputs, labels)

        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 计算精度
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / labels.size(0)

        # 记录标量
        if i % 100 == 0:
            writer.add_scalar('Loss/train', loss.item(), total_step)
            writer.add_scalar('Accuracy/train', accuracy, total_step)

            # 记录标量
            img_grid = torchvision.utils.make_grid(images[:16].cpu())
            writer.add_image('Input_images', img_grid, total_step)

        # 记录权重和梯度
        if i % 200 == 0:
            for name, param in model.named_parameters():
                writer.add_histogram(f'weights/{name}', param, total_step)
                if param.grad is not None:
                    writer.add_histogram(f'grads/{name}', param.grad, total_step)
        
        total_step += 1

# 关闭writer
writer.close()

Using device :cuda


In [8]:
%tensorboard --logdir runs/mnist_experiment --port 6007

Reusing TensorBoard on port 6007 (pid 12556), started 1 day, 9:51:03 ago. (Use '!kill 12556' to kill it.)

In [14]:
!netstat -ano | findstr ":6007"

In [13]:
!taskkill /PID 21400 /F

错误: 没有找到进程 "21400"。
