In [1]:
# 导入库
import torch, torchvision
import numpy as np
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

In [None]:
# 1、导入数据

# 定义路径
train_path = r"D:\BaiduNetdiskDownload\FLIR_ADAS_1_3\aligned\ImageFolder\train"
test_path = r"D:\BaiduNetdiskDownload\FLIR_ADAS_1_3\aligned\ImageFolder\test"

# 预处理步骤
transform_train = torchvision.transforms.Compose([       
        transforms.ToTensor(),  # 转变为(C,H,W)的Tensor格式
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),  # 归一化，Imagenet数据集的均值和标准差
        transforms.RandomHorizontalFlip(),  # 使用RandomHorizontalFlip转换以50%的概率随机水平翻转图像
        # transforms.Resize([56, 56]),  # 重新设置图片大小
])
transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        # transforms.Resize([56, 56]),  # 重新设置图片大小
])

# ImageFolder类的数据集
train_dataset = torchvision.datasets.ImageFolder(train_path, transform=transform_train)
test_dataset = torchvision.datasets.ImageFolder(test_path, transform=transform_test)
# dataloader
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True)    # drop_last 丢弃不满一个batch的数据
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=8)

In [None]:
print(test_dataset.classes)     # label是按照文件夹名顺序排序后存成字典，即{类名:类序号(从0开始)}
print(test_dataset.class_to_idx)    # 查看映射关系
for i, (inputs, labels) in enumerate(train_iter):
    if i == 2:
        break

    print(inputs.shape, labels) # inputs.shape: b, c, w, h  labels: 一个批次的label

In [None]:
# 2、构建模型

class IAN(nn.Module):
    def __init__(self) -> None:
        super(IAN, self).__init__()
        self.Conv1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, 1, 1),
            nn.SiLU(),
            nn.MaxPool2d(2)
        )
        self.Conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, 1, 1),
            nn.SiLU(),
            nn.MaxPool2d(2)
        )
        self.dropout = nn.Dropout(0.5)
        self.FC1 = nn.Linear(64*14*14, 512)    # 14*14 是卷积之后的特征图大小
        self.FC2 = nn.Linear(512, 256)
        self.FC3 = nn.Linear(256, 2)

        self.GAP = nn.AdaptiveAvgPool2d([56, 56])
        
    def forward(self, x):
        _ = self.GAP(x)                 # 直接用GAP当resize用
        _ = self.Conv2(self.Conv1(_))
        # print(_.shape)    # debug point
        _ = _.view(x.size(0), 64 * 14 * 14)   # 展开为1维向量，方便做全连接操作
        _ = self.FC1(_)
        _ = self.dropout(_)
        _ = self.FC2(_)
        _ = self.dropout(_)
        y = self.FC3(_)
        return y
    
model = IAN()

In [None]:
# 3、开始训练

### 超参定义区 ###
num_epochs = 10
#################

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# optimizer = torch.optim.SGD(model.parameters(), momentum=0.937, lr=0.01)


device = "cuda:0"
model = model.to(device)    # 统一在GPU上面训练

for epoch in range(num_epochs):
    # 用tqdm装饰训练迭代
    with tqdm(train_iter, desc=f'Epoch {epoch}/{num_epochs - 1}') as tepoch:
        for inputs, labels in tepoch:
            # 前向传播
            outputs = model(inputs.to(device))
            loss = criterion(outputs, labels.to(device))
            # 反向传播及优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 更新tqdm显示的loss
            tepoch.set_postfix(loss=loss.item())

    print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {loss.item():.4f}')

In [None]:
# 4、预测
for inputs, labels in test_iter:
    output = model(inputs.to(device))
    output = F.softmax(output, dim=1)
    print(output)


In [None]:
# 保存模型（可选）

# torch.save(model, 'IAN.pt')  # 模型的图结构一并保存
torch.save(model.state_dict(), 'IAN.pth')  # 仅保存模型的参数
print(f'Model saved!')

In [None]:
# 预测其他数据集(独立于以上步骤)

# 加载模型
# loaded_model = IAN()    # 初始化模型结构,在加载仅保存参数的模型时需要
device = "cuda:0"
# loaded_model.load_state_dict(torch.load('IAN.pth'))  # 加载仅保存参数的模型
loaded_model = torch.load('IAN.pt')   # 加载完整图结构模型
loaded_model.eval()
loaded_model.to(device)

transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
test_path = r"D:\BaiduNetdiskDownload\FLIR_ADAS_1_3\aligned\ImageFolder\test"   # 文件夹必须按imageFolder格式，即要有class子文件夹，否则会报错。class子文件夹最好正确，以验证性能，若class子文件夹错误，只能自己对着结果分析了
test_dataset = torchvision.datasets.ImageFolder(test_path, transform=transform_test)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=32)

for inputs, labels in test_iter:
    output = loaded_model(inputs.to(device))     # 结果是模型预测的输出，取决于模型好坏，不取决于class子文件夹
    output = F.softmax(output, dim=1)
    print(output)

In [None]:
w = softmax_score[0,:]   # (b, 2)   只取一个试验
print(w.shape)
x = torch.Tensor(2, 128, 64, 64)
f = nn.Linear(2, 2, True)
f = f(w)    # 这样调用全连接层才合理，也可以叫线性映射
print(f.shape)
# print(x[0,:,:,0].shape) # 全取的部分用冒号替代，对齐的部分按序号取值
y = torch.add(x[0,:]*f[0], x[1,:]*f[1])

In [None]:
# 尝试给自己模型初始化

# 加载模型
loaded_model = IAN()    # 要运行一下class IAN 
model_path = 'IAN.pth'
device = "cuda:0"
loaded_model.load_state_dict(torch.load(model_path))
print(loaded_model)

In [8]:
from models.yolo_test import Model

model = Model("/project/multispectral-object-detection/models/my_test/yolov5l_illumination_FLIR_v5.yaml", ch=3, nc=3).to("cuda:7")  # 模型实例
# print(model)

state_dict = torch.load("IAN.pth")  # 预训练模型
print(state_dict.keys()) 
# odict_keys(['Conv1.0.weight', 'Conv1.0.bias', 'Conv2.0.weight', 'Conv2.0.bias', 'FC1.weight', 'FC1.bias', 'FC2.weight', 'FC2.bias', 'FC3.weight', 'FC3.bias'])
# 需要的格式
# "model.20.Conv1.0.weight", "model.20.Conv1.0.bias", "model.20.Conv2.0.weight", "model.20.Conv2.0.bias", "model.20.FC1.weight", "model.20.FC1.bias", "model.20.FC2.weight", "model.20.FC2.bias",

new_state_dict = {}     # 训练模型的权重表
# new_state_dict["model.20.illumination.Conv1.conv.weight"] = state_dict["illumination.Conv1.conv.weight"]    # 示例,通过指定键值匹配.关键是训练模型的权重表获取到正确的预训练模型的层
for key in state_dict.keys():   # 主要操作
  new_state_dict['model.20.' + key] = state_dict[key]
model.load_state_dict(new_state_dict, strict=False) # strict=True 只有model的全部键都迁移了才不会报错,不然会报错显示什么键未迁移.可用于检查什么键未迁移,是否符合想象
# 本质: load_state_dict 将 state_dict这种格式的数据 从 字典new_state_dict 加载到 训练模型model 中, 字典new_state_dict 先通过键值匹配把权重迁移过来

print('Transferred %g items' % len(new_state_dict))


odict_keys(['Conv1.0.weight', 'Conv1.0.bias', 'Conv2.0.weight', 'Conv2.0.bias', 'FC1.weight', 'FC1.bias', 'FC2.weight', 'FC2.bias', 'FC3.weight', 'FC3.bias'])
Transferred 10 items


In [10]:
from train import create_dataloader_rgb_ir
import cv2
import torch
import numpy as np

EPSILON = 1e-5

def gaus2d(xx, yy, mux=0, muy=0, sigx=1, sigy=1, A=1):
    return A*torch.exp(-((xx - mux)**2. / (2. * sigx**2.) + (yy - muy)**2. / (2. * sigy**2.)))

def create_elipsis_mask(bboxes, img_shape, mode=1):
    bs, c, h, w = img_shape
    mask = torch.zeros((bs, h, w), device='cuda')
    for i, batch_bboxes in enumerate(bboxes):
        n = len(batch_bboxes)
        mask_ = torch.zeros((n, h, w))
        xmin, ymin, xmax, ymax = batch_bboxes[:, 0], batch_bboxes[:, 1], batch_bboxes[:, 2], batch_bboxes[:, 3]
        a = (xmax - xmin)/2
        b = (ymax - ymin)/2
        cx = (xmax + xmin)/2
        cy = (ymax + ymin)/2

        x = torch.tensor(np.arange(0, w, 1), device='cuda:0')
        y = torch.tensor(np.arange(0, h, 1), device='cuda:0')
        xx, yy = torch.meshgrid(x, y, indexing='xy')

        for j in range(n):
            # binary mask
            if mode == 1:
                m_ = (xx-cx[j])**2/a[j]**2 + (yy-cy[j])**2/b[j]**2 < 1+EPSILON
                m_.to(dtype=torch.float32)
                mask_[j, :, :] = m_
            else:
                # gauss heatmap
                gauss_map = gaus2d(xx, yy, cx[j], cy[j], a[j], b[j])
                mask_[j, :, :] = gauss_map.clone().detach()


        mask[i, :, :] = mask_.sum(0).clamp_(min=0, max=1)
        # mask[i, :, :] = mask_.sum(0)

    return mask
train_path_rgb = "/project/datasets/FLIR_aligned/visible/test"
train_path_ir = "/project/datasets/FLIR_aligned/infrared/test"

class opt():
    single_cls = False

dataloader, dataset = create_dataloader_rgb_ir(train_path_rgb, train_path_ir, 640, 8, 32, opt)
for i, (imgs, targets, paths, _) in enumerate(dataloader):
    imgs_rgb = imgs[:, :3, :, :]
    imgs_ir = imgs[:, 3:, :, :]
    # print(targets.shape)
    # print(targets[0])
    gt_bboxes = targets[:, 2:]
    # print(gt_bboxes.shape)  # 应该输出 torch.Size([81, 4])
    # print(gt_bboxes[0])  
    m_y = create_elipsis_mask(gt_bboxes, imgs_rgb.shape, mode=1)
    cv2.imshow("mask", m_y)

Scanning RGB '/project/datasets/FLIR_aligned/visible/test.cache' images and labels... 1013 found, 0 missing, 0 empty, 0 corrupted: 100%|██████████| 1013/1013 [00:00<?, ?it/s]
Scanning IR '/project/datasets/FLIR_aligned/visible/test.cache' images and labels... 1013 found, 0 missing, 0 empty, 0 corrupted: 100%|██████████| 1013/1013 [00:00<?, ?it/s]


IndexError: too many indices for tensor of dimension 1

In [15]:
import torch
import numpy as np
import matplotlib.pyplot as plt

# 定义 EPSILON 常量
EPSILON = 1e-5

# 定义 gaus2d 函数（假设这是高斯热图的实现）
def gaus2d(xx, yy, mux=0, muy=0, sigx=1, sigy=1, A=1):
    return A*torch.exp(-((xx - mux)**2. / (2. * sigx**2.) + (yy - muy)**2. / (2. * sigy**2.)))

# 示例数据
bs = 1  # 批量大小
c = 3   # 通道数
h = 100 # 图像高度
w = 100 # 图像宽度
img_shape = (bs, c, h, w)

# 生成一些边界框 (bboxes)
# 格式为 [xmin, ymin, xmax, ymax]
bboxes = [
    torch.tensor([[10, 10, 40, 40], [60, 60, 90, 90]], device='cuda:0')  # 两个边界框
]
# print(bboxes[0].shape)

# 调用 create_elipsis_mask 函数
mask = create_elipsis_mask(bboxes, img_shape, mode=1)

# 打印掩码的形状
print("Mask shape:", mask.shape)

# 显示掩码
plt.figure(figsize=(10, 5))

# 显示第一个边界框的掩码
plt.subplot(1, 2, 1)
plt.imshow(mask[0].cpu().numpy(), cmap='gray')
plt.title('Binary Mask (Mode 1)')

# 显示第二个边界框的掩码
mask_gauss = create_elipsis_mask(bboxes, img_shape, mode=2)
plt.subplot(1, 2, 2)
plt.imshow(mask_gauss[0].cpu().numpy(), cmap='gray')
plt.title('Gaussian Heatmap (Mode 2)')

plt.tight_layout()

# 保存图像到文件
output_filename = 'mask_images.png'
plt.savefig(output_filename)
print(f"Image saved to {output_filename}")

# 关闭图形以释放内存
plt.close()

torch.Size([2, 4])
Mask shape: torch.Size([1, 100, 100])
Image saved to mask_images.png
